1 # -*-Perl-*- Test Harness script for Bioperl
9 test_begin(-tests => 21);
12 use_ok('Bio::SeqFeature::Tools::Unflattener');
15 my $verbosity = test_debug();
18 my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
19 $unflattener->verbose($verbosity);
23 my @path = ("NC_000007-ribosomal-slippage.gb");
26 my @topsfs = $seq->get_SeqFeatures;
27 if( $verbosity > 0 ) {
28 warn sprintf "TOP:%d\n", scalar(@topsfs);
33 @sfs = $unflattener->unflatten_seq(-seq => $seq,
35 if( $verbosity > 0 ) {
36 warn "\n\nPOST PROCESSING:\n";
38 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
45 my @path = ("ribosome-slippage.gb");
48 my @topsfs = $seq->get_SeqFeatures;
49 if( $verbosity > 0 ) {
50 warn sprintf "TOP:%d\n", scalar(@topsfs);
55 @sfs = $unflattener->unflatten_seq(-seq => $seq,
57 if( $verbosity > 0 ) {
58 warn "\n\nPOST PROCESSING:\n";
60 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
67 my @path = ("AE003644_Adh-genomic.gb");
70 is ($seq->accession_number, 'AE003644');
71 my @topsfs = $seq->get_SeqFeatures;
72 if( $verbosity > 0 ) {
73 warn sprintf "TOP:%d\n", scalar(@topsfs);
78 @sfs = $unflattener->unflatten_seq(-seq => $seq,
79 -group_tag => 'locus_tag');
80 if( $verbosity > 0 ) {
81 warn "\n\nPOST PROCESSING:\n";
83 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
88 # now try again, using a custom subroutine to link together features
89 $seq = getseq("AE003644_Adh-genomic.gb");
90 @sfs = $unflattener->unflatten_seq
92 -group_tag => 'locus_tag',
96 my ($sf, @candidate_container_sfs) = @_;
98 if ($sf->has_tag('note')) {
99 my @notes = $sf->get_tag_values('note');
100 my @trnames = map {/from transcript\s+(.*)/; $1;}
102 @trnames = grep {$_} @trnames;
106 $self->throw("UNRESOLVABLE");
108 elsif (@trnames == 1) {
109 $trname = $trnames[0];
112 $self->throw("AMBIGUOUS: @trnames");
118 $_->has_tag('product') ? $_->get_tag_values('product')
121 } @candidate_container_sfs;
123 if (@container_sfs == 0) {
124 $self->throw("UNRESOLVABLE");
126 elsif (@container_sfs == 1) {
128 return ($container_sfs[0]=>0);
131 $self->throw("AMBIGUOUS");
135 $unflattener->feature_from_splitloc(-seq => $seq);
136 if( $verbosity > 0 ) {
137 warn "\n\nPOST PROCESSING:\n";
139 warn sprintf "PROCESSED2:%d\n", scalar(@sfs);
143 # try again; different sequence
144 # this is an E-Coli seq with no mRNA features;
145 # we just want to link all features directly with gene
147 $seq = getseq("D10483.gbk");
150 @sfs = $unflattener->unflatten_seq(-seq => $seq,
151 -partonomy => {'*'=>'gene'});
152 if( $verbosity > 0 ) {
153 warn "\n\nPOST PROCESSING:\n";
155 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
159 # this sequence has no locus_tag or or gene tags
160 $seq = getseq("AY763288.gb");
163 @sfs = $unflattener->unflatten_seq(-seq => $seq,
165 if( $verbosity > 0 ) {
166 warn "\n\nPOST PROCESSING:\n";
168 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
172 # try again; different sequence - dicistronic gene, mRNA record
174 $seq = getseq("X98338_Adh-mRNA.gb");
177 @sfs = $unflattener->unflatten_seq(-seq => $seq,
178 -partonomy => {'*'=>'gene'});
179 if( $verbosity > 0 ) {
180 warn "\n\nPOST PROCESSING:\n";
182 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
186 # try again; this sequence has no CDSs but rRNA present
188 $seq = getseq("no_cds_example.gb");
191 @sfs = $unflattener->unflatten_seq(-seq => $seq,
193 if( $verbosity > 0 ) {
194 warn "\n\nPOST PROCESSING:\n";
196 warn sprintf "PROCESSED:%d\n", scalar(@sfs);
199 my @all_sfs = $seq->get_all_SeqFeatures;
201 my @exons = grep { $_-> primary_tag eq 'exon' } @all_sfs ;
207 # this is an arabidopsise gbk record. it has no mRNA features.
208 # it has explicit exon/intron records
209 my @path = ("ATF14F8.gbk");
210 $seq = getseq(@path);
212 is ($seq->accession_number, 'AL391144');
213 my @topsfs = $seq->get_SeqFeatures;
214 my @cdss = grep {$_->primary_tag eq 'CDS'} @topsfs;
215 my $n = scalar(@topsfs);
216 if( $verbosity > 0 ) {
217 warn sprintf "TOP:%d\n", scalar(@topsfs);
222 @sfs = $unflattener->unflatten_seq(-seq => $seq,
224 @sfs = $seq->get_SeqFeatures;
225 if( $verbosity > 0 ) {
226 warn "\n\nPOST PROCESSING:\n";
228 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
232 my @allsfs = $seq->get_all_SeqFeatures;
235 my @mrnas = grep {$_->primary_tag eq 'mRNA'} @allsfs;
236 if( $verbosity > 0 ) {
237 warn sprintf "ALL:%d\n", scalar(@allsfs);
238 warn sprintf "mRNAs:%d\n", scalar(@mrnas);
240 # relationship between mRNA and CDS should be one-one
246 # this is a record from FlyBase
247 # it has mRNA features, and explicit exon/intron records
248 my @path = ("AnnIX-v003.gbk");
249 $seq = getseq(@path);
251 my @topsfs = $seq->get_SeqFeatures;
252 if( $verbosity > 0 ) {
253 warn sprintf "TOP:%d\n", scalar(@topsfs);
258 @sfs = $unflattener->unflatten_seq(-seq => $seq,
260 @sfs = $seq->get_SeqFeatures;
261 if( $verbosity > 0 ) {
262 warn "\n\nPOST PROCESSING:\n";
264 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
268 my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
269 is scalar(@exons), 6; # total number of exons per splice
271 my %numberh = map {$_->get_tag_values("number") => 1} @exons;
272 my @numbers = keys %numberh;
273 if( $verbosity > 0 ) {
274 warn sprintf "DISTINCT EXONS: %d [@numbers]\n", scalar(@numbers);
276 is scalar(@numbers), 6; # distinct exons
281 # example of a BAD genbank entry
282 my @path = ("dmel_2Lchunk.gb");
283 $seq = getseq(@path);
285 my @topsfs = $seq->get_SeqFeatures;
286 if( $verbosity > 0 ) {
287 warn sprintf "TOP:%d\n", scalar(@topsfs);
293 # we EXPECT problems with this erroneous record
294 $unflattener->error_threshold(2);
295 @sfs = $unflattener->unflatten_seq(-seq => $seq,
297 @sfs = $seq->get_SeqFeatures;
298 if( $verbosity > 0 ) {
299 warn "\n\nPOST PROCESSING:\n";
301 warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
305 my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
306 is scalar(@exons), 2; # total number of exons per splice
308 my @probs = $unflattener->get_problems;
309 $unflattener->report_problems(\*STDERR) if $verbosity > 0;
310 $unflattener->clear_problems;
311 if( $verbosity > 0 ) {
312 warn sprintf "PROBLEMS ENCOUNTERED: %d (EXPECTED: 6)\n", scalar(@probs);
314 is scalar(@probs), 6;
320 _write_hier(0, @sfs);
326 foreach my $sf (@sfs) {
328 if ($sf->has_tag('gene')) {
329 ($label) = $sf->get_tag_values('gene');
331 elsif ($sf->has_tag('product')) {
332 ($label) = $sf->get_tag_values('product');
334 elsif ($sf->has_tag('number')) {
335 $label = join("; ", $sf->get_tag_values('number'));
337 warn sprintf "%s%s $label\n", ' ' x $indent, $sf->primary_tag;
338 my @sub_sfs = $sf->sub_SeqFeature;
339 _write_hier($indent+1, @sub_sfs);
345 my $seqio = Bio::SeqIO->new('-file' => test_input_file(@path),
346 '-format' => 'GenBank');
347 $seqio->verbose($verbosity);
349 my $seq = $seqio->next_seq();