6 use File
::Temp qw
/tempfile/;
11 use Bio
::Index
::Fasta
;
13 use CXGN
::Tools
::List qw
/str_in/;
14 use IPC
::Cmd qw
/can_run/;
17 use_ok
( 'CXGN::Cluster' )
18 or plan skip_all
=> 'could not include the module being tested';
20 unless( can_run
('phrap.longreads') ){
21 plan skip_all
=> 'phrap.longreads not available';
27 #now start testing with them
28 my $set = CXGN
::Cluster
::ClusterSet
->new;
30 my @seqnames = sort qw
/monkey bonobo homo orangutan chimpanzee gorilla marmoset lemur/;
32 $set->add_match(@seqnames[0,1]);
33 is_deeply
( cs_contents
($set),
35 'one cluster with 2 members',
39 $set->add_match(@seqnames[0,2]);
40 is_deeply
( cs_contents
($set),
41 [[ @seqnames[0,1,2] ]],
45 $set->add_match(@seqnames[3,3]);
46 is_deeply
( cs_contents
($set),
47 [[ @seqnames[0,1,2] ],
50 'self-match adds an unrelated cluster',
53 $set->add_match(@seqnames[0,0]);
54 is_deeply
( cs_contents
($set),
55 [[ @seqnames[0,1,2] ],
58 'self-match of already present sequence does not change anything',
61 $set->add_match(@seqnames[1,1]);
62 is_deeply
( cs_contents
($set),
63 [[ @seqnames[0,1,2] ],
66 'self-match of already present sequence does not change anything',
70 $set->add_match(@seqnames[3,7]);
71 $set->add_match(@seqnames[3,6]);
72 $set->add_match(@seqnames[3,5]);
73 is_deeply
( cs_contents
($set),
75 [ @seqnames[3,5,6,7] ],
78 'and add more to the second cluster',
83 #find our file of test sequences
84 my $test_seqs_filename = File
::Spec
->catfile($FindBin::Bin
,'data','test.seq');
85 die "can't open $test_seqs_filename" unless -r
$test_seqs_filename;
87 #make a Bio::Index of them so we can retrieve individual ones
88 my $test_seqs_index = Bio
::Index
::Fasta
->new( -filename
=> do { my (undef,$tf) = tempfile
(UNLINK
=> 1); $tf },
91 $test_seqs_index->make_index($test_seqs_filename);
94 $set = CXGN
::Cluster
::ClusterSet
->new;
96 #now test the cluster calculation using a few things we know should cluster in the test set
99 [sort qw
/C04HBa0114G11.1 C04HBa0050I18.1 C04HBa0036C23.1 C04HBa0008H22.1/],
100 [sort qw
/C04HBa0024G05.1 C04HBa0020F17.1 /],
101 [sort qw
/C10HBa0020A12.1 C10HBa0248A13.3 /],
104 #make a cluster with just one member
105 $set->add_match($known_clusters[0][0],$known_clusters[0][0]);
106 my @c = $set->get_clusters;
107 is_deeply
(cs_contents
($set),
108 [[$known_clusters[0][0]]],
109 'single-member cluster loaded OK',
112 is_deeply
( [ $c[0]->get_contig_coords($test_seqs_index) ],
113 [[['C04HBa0008H22.1',1,81572,1]]],
114 'singleton contig coordinates appear correctly',
117 #put in the first cluster
118 $set->add_match($known_clusters[0][0],$_) foreach @
{$known_clusters[0]};;
119 is_deeply
(cs_contents
($set),
120 [$known_clusters[0]],
121 'first known cluster members loaded ok'
124 #now get its coordinates, it should come out as just one contig
125 @c = $set->get_clusters;
126 #print Dumper $c[0]->get_contig_coords($test_seqs_index);
127 is_deeply
( [ $c[0]->get_contig_coords($test_seqs_index) ],
154 'first test contig assembled OK'
158 my $bs = [ $c[0]->get_consensus_base_segments($test_seqs_index, min_segment_size
=> 100 ) ];
196 'get_consensus_base_segments',
200 #put in the second cluster
201 $set->add_match($known_clusters[1][0],$_) foreach @
{$known_clusters[1]};
202 is_deeply
(cs_contents
($set),
203 [@known_clusters[0,1]],
204 'second known cluster members loaded ok'
207 #assemble the second cluster
208 @c = $set->get_clusters;
209 is_deeply
( [ $c[0]->get_contig_coords($test_seqs_index) ],
224 'second test contig assembled OK'
227 $bs = [ $c[0]->get_consensus_base_segments($test_seqs_index) ];
253 #make an artificial linkage between the first and second cluster, this
254 #should assemble into two real contigs
255 $set->add_match($known_clusters[0][0],$known_clusters[1][1]);
256 is_deeply
(cs_contents
($set),
257 [[sort @
{$known_clusters[0]},@
{$known_clusters[1]}]],
258 'artificial cluster linkage results in joining of clusters'
261 #now check that they assemble into two actual contigs
262 @c = $set->get_clusters;
263 #bprint Dumper $c[0]->get_contig_coords($test_seqs_index);
264 is_deeply
( [ $c[0]->get_contig_coords($test_seqs_index) ],
307 'erroneous precluster assembles into two actual contigs'
312 $set = CXGN
::Cluster
::ClusterSet
->new;
314 # now add fake matches between and among the known clusters to put
315 # them all in one precluster
316 $set->add_match( $known_clusters[0][0], $known_clusters[2][0] );
317 $set->add_match( $known_clusters[2][0], $_ ) for @
{$known_clusters[2]};
319 @c = $set->get_clusters;
320 is
( scalar(@c), 1, 'got one cluster' );
321 # and check the base segments that are made
322 $bs = [ $c[0]->get_consensus_base_segments($test_seqs_index, min_segment_size
=> 100 ) ];
362 'segment merging worked'
366 #make a sorted list of sorted arrayrefs representing the clusters in the set
367 #clusters sorted in descending size order,
368 #members sorted in alphabetical order
371 my @c = $set->get_clusters;
372 return [ sort {@
$b <=> @
$a} map [sort $_->get_members], @c ];