Added to perldoc instructions about the use of the environment variables.
[cxgn-corelibs.git] / lib / Bio / FeatureIO / geneseqer.pm
blobddcae5e379196fee25bb1170b838377cf33b77dd
1 =pod
3 =head1 NAME
5 Bio::FeatureIO::geneseqer - FeatureIO parser for GeneSeqer reports
7 =head1 SYNOPSIS
9 my $feature_in = Bio::FeatureIO->new(-format => 'geneseqer',
10 -file => 'myfile.geneseqer.out',
13 $feature_in->attach_alignments(1); #attach alignments to their AGSs
15 $feature_in->mode('both_separate'); #parse both alignments and PGLs from the
16 #report
18 while(my $feat = $feature_in->next_alignment) {
19 #do something with alignment feature
22 while(my $feat = $feature_in->next_feature) {
24 #get the alignment objects attached to the PGL feature
25 my @alignments =
26 map {
27 $_->value
28 } $feat->annotation->get_Annotations('supporting_alignment');
30 foreach my $alignment (@alignments) {
31 # $alignment is now a feature representing one of the EST
32 # alignments used to compute this PGL
38 =head1 DESCRIPTION
40 Parses GeneSeqer reports, returning L<Bio::SeqFeature::Annotated>
41 objects. Alignments are represented by a
42 L<Bio::SeqFeature::Annotated> object with a 'Target' annotation,
43 holding a L<Bio::Annotation::Target> object. PGLs are also
44 L<Bio::SeqFeature::Annotated> objects.
46 The mode() accessor controls the parsing mode, which is one of:
48 =over 4
50 =item 'pgls'
52 Only predicted gene location (PGL) features will be parsed and
53 returned. Supporting alignments in the file will be ignored.
55 =item 'alignments'
57 Only supporting EST/protein alignments will be returned, PGLs will be
58 ignored.
60 =item 'both_separate'
62 Both PGLs and alignments will be parsed. next_feature() will return
63 PGLs only, and next_alignment() will return the alignments
65 =item 'both_merged'
67 Both PGLs and alignments will be parsed. next_feature() will return
68 B<both> PGLs and alignments. next_alignment will always return undef
70 =back
72 If the parse mode is set to 'both_separate' or 'both_merged', you can
73 also set the accessor attach_alignments() to have the supporting
74 alignment features attached to the AGS features in which they are
75 used. These are attached under the Annotation key/tag
76 'supporting_alignment'. This is off by default.
78 In the future, this module may be enhanced to have the capability of
79 producing L<Bio::SeqFeature::Generic> and
80 L<Bio::SeqFeature::FeaturePair> objects.
82 =head1 STRUCTURE OF RETURNED FEATURES
84 This module returns a bunch of hierarchical features (that is, the
85 features have subfeatures) that represent the GeneSeqer output. The
86 feature hierarchy for PGLs is arranged as:
88 region - the GeneSeqer predicted gene location (PGL)
89 |- processed_transcript - each alternative gene structure (AGS) in the PGL
90 | |- CDS - each exon in that AGS
91 |- (optional) match - alignment, see below
92 |- match_part
94 and for alignments it is
96 match - spliced alignment of an EST/protein to the ref seq
97 |- match_part - each exon in the spliced alignment
99 Note that the alignments (match and match_part features) will only be
100 attached to the gene features if attach_alignments() is set to true.
102 =head1 REDUCING MEMORY REQUIREMENTS
104 If you find yourself running out of memory while parsing large
105 GeneSeqer output files with this parser, there are a couple of things
106 you can do to decrease the memory requirements:
108 1. do not use the 'both_separate' parsing mode, since it can sometimes
109 need to keep a large buffer of features in memory
110 2. make sure attach_alignments() is set to false (0 or '')
112 If these two measures are not adequate for you, please explain your
113 problem on the mailing list, and we may be able to address your
114 situation.
116 =head1 FEEDBACK
118 =head2 Mailing Lists
120 User feedback is an integral part of the evolution of this and other
121 Bioperl modules. Send your comments and suggestions preferably to
122 the Bioperl mailing list. Your participation is much appreciated.
124 bioperl-l@bioperl.org - General discussion
125 http://bioperl.org/MailList.shtml - About the mailing lists
127 =head2 Reporting Bugs
129 Report bugs to the Bioperl bug tracking system to help us keep track
130 of the bugs and their resolution. Bug reports can be submitted via
131 the web:
133 http://bugzilla.bioperl.org/
135 =head1 AUTHOR
137 Robert Buels, rmb32@cornell.edu
139 =head1 APPENDIX
141 The rest of the documentation details each of the object methods.
142 Internal methods are usually preceded with a _
144 =cut
146 # Let the code begin...
148 package Bio::FeatureIO::geneseqer;
149 use strict;
151 use base qw(Bio::FeatureIO);
153 use Bio::SeqFeature::Annotated;
155 =head2 new()
157 constructor options for this parser are:
159 -mode
160 takes same arguments as mode() accessor below
161 -attach_alignments
162 takes same arguments as attach_alignments() accessor below
164 =cut
166 sub _initialize {
167 my($self,%arg) = @_;
169 $self->SUPER::_initialize(%arg);
171 #init buffers
172 $self->{alignment_buffer} = [];
173 $self->{feature_buffer} = [];
175 #set defaults
176 $arg{-mode} ||= 'pgls';
177 $arg{-attach_alignments} = 0 unless defined $arg{-attach_alignments};
179 #set options
180 $self->mode($arg{-mode});
181 $self->attach_alignments($arg{-attach_alignments});
184 =head2 mode()
186 Usage: $fio->mode('alignments');
187 #parse and return only alignments
188 Desc : get/set the current parsing mode. Defaults to
189 'pgls'
190 Ret : currently set parse mode
191 Args : (optional) new parse mode, which is one of 'pgls',
192 'alignments', 'both_separate', or 'both_merged'
194 =cut
196 sub mode {
197 my ($self,$newmode) = @_;
198 if(defined $newmode) {
199 grep {$_ eq $newmode} qw/alignments pgls both_separate both_merged/
200 or $self->throw("invalid mode selection $newmode");
201 $self->{parse_mode} = $newmode;
203 return $self->{parse_mode};
206 =head2 attach_alignments()
208 Usage: $fio->attach_alignments(1);
209 Desc : get/set a flag telling the parser whether to attach
210 the supporting alignment features to each AGS (gene
211 feature) returned.
212 Off by default.
213 Attaches the alignments under a tag/Annotation
214 named 'supporting_alignment'.
215 Ret : current value of this flag.
216 Args : (optional) new true/false value of this flag
218 =cut
220 sub attach_alignments {
221 my ($self,$newval) = @_;
222 if(defined $newval) {
223 $self->{attach_alignments} = $newval;
225 return $self->{attach_alignments};
228 =head2 next_feature()
230 Usage : my $feature = $featureio->next_feature();
231 Function: reads the next Predicted Gene Location from the given
232 geneseqer file, returning it as an object. The PGLs
233 will be returned in the same order in which they appear
234 in the GeneSeqer output.
235 Returns : a predicted gene location (PGL) feature, of class
236 L<Bio::SeqFeature::Annotated>
237 Args : none
239 =cut
241 # next_feature
242 # return 'pgl/alignment' if in buffer
243 # else buffer 'pgl/alignment' and return it
244 sub next_feature {
245 my ($self,$featuretype) = shift;
246 $featuretype ||= 'pgl';
248 #validate attach_alignments and mode selections
249 if( $self->attach_alignments && $self->mode !~ /^both/ ) {
250 $self->throw("mode must be set to 'both_merged' or 'both_separate' if attach_alignments() is set to true");
253 return $self->next_alignment if $featuretype eq 'alignment';
255 $self->throw("invalid featuretype '$featuretype'") unless $featuretype eq 'pgl';
257 #now down to business
259 #get a pgl into the buffer if necessary
260 unless( @{$self->{feature_buffer}} ) {
261 $self->_buffer_item($self->mode eq 'both_merged' ? 'either' : 'pgl');
264 return shift @{$self->{feature_buffer}};
267 =head2 next_alignment()
269 Usage: my $feature = $fio->next_alignment
270 Desc : get the next EST/protein alignment feature from the
271 GeneSeqer report. The alignments will be returned in
272 the same order in which they appear in the file.
273 Ret : an alignment feature, or undef if there are no more
274 Args : none
276 =cut
278 # next_alignment
279 # alias for next_feature('alignment')
280 sub next_alignment {
281 my ($self) = @_;
283 #validate attach_alignments and mode selections
284 if( $self->attach_alignments && $self->mode !~ /^both_/ ) {
285 $self->throw("mode must be set to 'both_separate' or 'both_merged' if attach_alignments() is set to true");
288 #get an alignment into the buffer if necessary
289 unless( @{$self->{alignment_buffer}} ) {
290 $self->_buffer_item('alignment');
293 return shift @{$self->{alignment_buffer}};
296 =head2 write_feature()
298 Not implemented.
300 =cut
302 sub write_feature {
303 shift->throw_not_implemented;
306 #central repository for match patterns
307 sub _pattern {
308 my ($self,$patname) = @_;
309 my %patterns =
311 pgl => qr/^\s*PGL\s+\d+\s+\(/,
312 ags => qr/^\s*AGS-\d+\s/,
313 sequence => qr/^\s*Sequence\s+\d+\s*:\s+(.+),\s+from/,
314 align => qr/^\s*\*{10}/,
315 finished => qr/^[^A-Za-z]+finished\s/,
317 return $patterns{$patname};
320 # buffer_item (central parsing loop)
321 # read from file. if mode says we're using it, parse the thing and buffer it.
322 # otherwise, **skip** it
323 # return 1 when you finally buffer a thing of the type your arg says you're looking for
324 # return 0 if EOF
325 sub _buffer_item {
326 my ($self,$requested_type) = @_;
327 grep {$requested_type eq $_} qw/ alignment pgl either /
328 or $self->throw("invalid requested type '$requested_type'");
330 ### MAIN PARSING LOOP detects the beginnings of sections, pushes
331 # them back on the buffer, then calls the appropriate parsing
332 # routine for them. returns only if a.) it successfully finds and
333 # parses an item of the requested type or b.) it reaches EOF
334 while( my $line = $self->_readline ) {
335 # chomp $line;
336 # warn "decide line $.: $line\n";
337 #ifs are sorted in decreasing order of how common a case they are
338 if( $self->mode ne 'pgls' && $line =~ $self->_pattern('align') ) {
339 $self->_pushback($line);
340 $self->_buffer_alignment;
341 return 1 if $requested_type eq 'alignment' || $requested_type eq 'either';
344 elsif( $self->mode ne 'alignments' && $line =~ $self->_pattern('pgl') ) {
345 $self->_pushback($line);
346 $self->_buffer_pgl;
347 return 1 if $requested_type eq 'pgl' || $requested_type eq 'either';
350 elsif( $line =~ $self->_pattern('sequence') ) {
351 $self->_pushback($line);
352 $self->_purge_alignment_index;
353 $self->_parse_sequence_section;
358 # buffer_alignment
359 # assumes starting at the beginning of alignment section.
360 # parse the alignment section, and put the alignment feature in the alignment buffer
361 # if we're attaching, also index the alignment by its signature
363 #parse an 'EST sequence' section of the report, up to and
364 #including the '^MATCH ' line, returning a feature with
365 #subfeatures representing the spliced cDNA alignment
366 sub _buffer_alignment {
367 my ($self) = @_;
369 my $buffer_for_alignments = $self->mode eq 'both_merged'
370 ? $self->{feature_buffer}
371 : $self->{alignment_buffer};
373 #these uninitialized variables are filled in by the parsing below
374 my $parent_feature;
375 my @subfeatures;
377 my $cstrand; #strandedness of the EST we're currently looking at, either '+' or '-'
378 my $cid; #identifier of the EST we're looking at
379 my $clength; #length of the EST we're looking at
381 my %patterns = (
382 hqpgs => qr/^\s*hqPGS_(\S+_\S+[+-])/,
383 est_seq => qr/^\s*EST\s+sequence\s+\d+\s+([+-])\s*strand\s+(?:(\d+)\s+n\s+)?\(\s*File:\s*([^\)\s]+)\s*\)/,
384 exon => qr/^\s+Exon\s/,
385 match => qr/^MATCH/,
386 seqline => qr/^\s+(\d+)\s+([ACTG ]+)/,
389 while( my $line = $self->_readline ) {
390 # chomp $line;
391 # warn "cdna parsing '$line'\n";
392 #EST sequence introduction
393 if( $line =~ $patterns{est_seq} ) {
394 # warn "cdna est line\n";
395 #parse out the strand, length, and ID of the EST we're dealing with,
396 #storing them in the lexicals above
397 $cstrand = $1;
398 $cid = $3;
399 $clength = $2;
400 $cstrand eq chop $cid or $self->throw("inconsistent strandedness in line '$line'");
401 $cstrand = $cstrand eq '+' ? 1 :
402 $cstrand eq '-' ? -1 : $self->throw("Unknown strand direction '$cstrand'");
404 #do a little error checking
405 $cid or $self->throw("can't parse sequence identifier from line '$line'");
406 $cstrand == 1 or $cstrand == -1 or $self->throw("can't parse EST alignment strand from line '$line'");
408 #try to figure out the est's length from the sequence lines
409 #if it wasn't given in the 'EST sequence' line
410 elsif( $line =~ $patterns{seqline} ) {
411 my $startlength = $1;
412 my $residues = $2;
413 $residues =~ s/\s//g;
414 my $newlength = $startlength + length($residues) - 1;
415 unless($clength >= $newlength) {
416 $clength = $newlength;
417 # chomp $line;
418 # warn "faked up clength $clength from line '$line'\n";
421 #Exon line
422 elsif( $line =~ $patterns{exon} ) {
423 #parse out the genomic and cDNA start,end,length, and the score of the match
424 my (undef,$gstart,$gend,undef,$cstart,$cend,undef,$exon_score)
425 = _parse_numbers($line);
427 #if we're looking at a reverse-complemented EST, un-reverse-complement its start and end coordinates
428 unless($cstrand == 1) {
429 $cstrand == -1 or $self->throw("invalid strandedness '$cstrand'");
430 $clength > 0 or $self->throw("can't parse EST length from line '$line'");
431 ($cstart,$cend) = _reverse_complement_coords($cstart,$cend,$clength);
434 my $subfeature = $self->_new_feature( -start => $gstart,
435 -end => $gend,
436 -score => $exon_score,
437 -type => 'match_part',
438 -source => 'GeneSeqer',
439 -target => { -target_id => $cid,
440 -start => $cstart,
441 -end => $cend,
442 -strand => $cstrand,
444 ); #note: we'll go through
445 #and set the seq_id
446 #later, when we get it
447 #from the MATCH line
448 push @subfeatures,$subfeature;
450 #MATCH line
451 elsif( $line =~ $patterns{match} ) {
452 my (undef,$genomic_seqname,$cdna_seqname,$alignment_score) = split /\s+/,$line;
453 my $gstrand = _chop_strand(\$genomic_seqname);
454 my $other_cstrand = _chop_strand(\$cdna_seqname);
455 $cstrand eq $other_cstrand or $self->throw("inconsistent cDNA strandedness: $cstrand vs $other_cstrand");
457 #go back over the subfeatures and error check, figure out the
458 #parent feature start and end, and set the appropriate seq_id on
459 #each subfeature
460 my $gstart;
461 my $gend;
462 foreach my $subfeature (@subfeatures) {
463 #check that the strandedness is correct
464 $subfeature->strand eq $gstrand or $self->throw("parser bug, inconsistent strands found ('".$subfeature->strand."' vs '$gstrand')");
465 #set the proper seq_id
466 $subfeature->seq_id($genomic_seqname);
467 #set parent feature limits
468 $gstart = $subfeature->start unless $gstart && $gstart < $subfeature->start;
469 $gend = $subfeature->end unless $gend && $gend > $subfeature->end;
472 #make an overall feature for this EST alignment
473 $parent_feature = $self->_new_feature( -start => $gstart,
474 -end => $gend,
475 -seq_id => $genomic_seqname,
476 -source => 'GeneSeqer',
477 -score => $alignment_score,
478 -strand => $gstrand,
479 -type => 'match',
482 #add all the subfeatures to it, reversing so they'll come out in the right order
483 $parent_feature->add_SeqFeature($_) foreach @subfeatures;
485 elsif( $line =~ $patterns{hqpgs} ) {
487 #do some consistency checks with the contents of the PGS line
488 my (undef,$other_cid) = split /_/, $1;
490 if( $self->attach_alignments ) {
491 #now make the pgs signature string, and return it with the parent feature we made
492 my (undef,@pgs_stuff) = split /\s+/,$line;
494 #$pgs_signature a string GeneSeqer uses to uniquely identify
495 #this alignment. We'll use these later to connect the PGLs to
496 #their supporting alignments
497 my $pgs_signature = join(' ',@pgs_stuff,$other_cid);
498 # $parent_feature->add_Annotation('comment',Bio::Annotation::Comment->new(-text => $pgs_signature));
499 $self->_index_alignment($pgs_signature,$parent_feature);
502 $parent_feature or $self->throw("parse error");
504 push @$buffer_for_alignments, $parent_feature;
505 return;
510 #parse the section of the report introducing the processing of a new
511 #genomic sequence. right now, just par
512 sub _parse_sequence_section {
513 my ($self) = @_;
514 my $line = $self->_readline;
516 #parse the sequence identifier out of the line
517 $line =~ $self->_pattern('sequence')
518 or $self->throw("improper call of _parse_sequence_section, line '$line' does not match expected pattern");
520 # warn "got new seq $1\n";
522 #store the sequence identifier in the current object.
523 #_new_feature() uses this as the seq_id to return
524 $self->{current_genomic_sequence} = $1
525 or $self->throw("improper 'sequence' pattern in geneseqer parser. it does not capture the sequence identifier from the Sequence line in \$1");
528 # buffer_pgl
529 # assumes starting at the beginning of pgl section
530 # parse the pgl section, put the pgl feature in the pgl buffer
531 # if attach_alignments() is set, remember to attach the associated
532 # alignments, getting them from the alignment index
533 sub _buffer_pgl {
534 my $self = shift;
536 while(my $line = $self->_readline) {
537 chomp $line;
538 # warn "pgl parse $.: $line\n";
539 if( $line =~ $self->_pattern('pgl')) {
540 # warn $line;
541 my (undef,$pgl_start,$pgl_end) = _parse_numbers($line);
542 my @ags; # list of AGSs in the current PGL. filled in by the
543 # parsing below
544 my %alignments; #this is a hash by signature of alignments that
545 #are members of this PGL
546 while( $line = $self->_readline) {
547 if( $line =~ $self->_pattern('ags') ) {
548 # warn $line;
549 my ($ags_start) = $line =~ /\(\s*(\d+)/ or $self->throw("parse error");
550 my ($ags_end) = $line =~ /(\d+)\s*\)/ or $self->throw("parse error");
551 my %subfeatures = $self->_parse_ags_subfeatures;
553 push @ags,$self->_new_feature( -start => $ags_start,
554 -end => $ags_end,
555 -type => 'processed_transcript',
556 -subfeatures => $subfeatures{exon_features},
557 supporting_alignments => $subfeatures{alignment_features},
560 elsif( $line =~ $self->_pattern('pgl') || $line =~ $self->_pattern('finished')) {
561 $self->_pushback($line);
562 last;
566 @ags or $self->throw("no ags found for pgl. premature end of file?");
568 push @{$self->{feature_buffer}},
569 $self->_new_feature( -start => $pgl_start,
570 -end => $pgl_end,
571 -type => 'region',
572 -subfeatures => [@ags, values %alignments],
574 return;
577 $self->throw("parse error, cannot find end of PGL section");
580 #parse a single AGS entry, making subfeatures for their exons, etc
581 #return a hash-style list as exon_features => [feat,feat...],
582 # alignment_features => { signature => feat, signature => feat, ...},
583 sub _parse_ags_subfeatures {
584 my ($self) = @_;
586 #these variables are all filled in by the parsing below
587 my @exon_features;
588 my @supporting_alignment_features;
590 while( my $line = $self->_readline ) {
591 if( $line =~ /^\s*Exon\s/ ) {
592 my (undef,$gstart,$gend,undef,$exon_score) = _parse_numbers($line);
593 push @exon_features, $self->_new_feature( -start => $gstart,
594 -end => $gend,
595 -score => $exon_score,
596 -type => 'CDS',
599 elsif( $line =~ /^\s*PGS\s+\(/ ) {
600 if( $self->attach_alignments ) {
601 #parse all the PGS's and then break out of the while loop above when done
602 do {
603 my (undef,@pgs_tokens) = grep {$_} (split /\s+/,$line);
604 # use Data::Dumper;
605 # die "got tokens: ",Dumper(\@pgs_tokens);
606 my $pgs_sig = join ' ',@pgs_tokens;
607 push @supporting_alignment_features,$self->_get_indexed_alignment($pgs_sig)
608 || $self->throw("geneseqer parse error: no supporting alignment found with signature '$pgs_sig', line was '$line'");
609 } while ( $line = $self->_readline and $line =~ /^\s*PGS\s+\(/ );
611 $self->_pushback($line);
613 #after the supporting PGS's, we are done parsing this AGS
615 last;
619 return ( exon_features => \@exon_features,
620 alignment_features => \@supporting_alignment_features,
624 #object method to create a new feature object, with some defaults and
625 #automation of the more repetitive bits (like adding targets and
626 #subfeatures)
627 sub _new_feature(@) {
628 my ($self,%a) = @_;
630 # use Data::Dumper;
631 # warn "got feature: ",Dumper(\%a);
633 $a{-seq_id} ||= $self->{current_genomic_sequence};
635 #replace spaces in source with underscores
636 $a{-source} ||= 'GeneSeqer';
637 $a{-source} =~ s/\s+/_/g;
639 #if no strand is given, make the proper strand and flip start and end if necessary
640 @a{qw/-start -end -strand/} = _start_end_strand(@a{qw/-start -end/}) unless $a{-strand};
642 #do some type mapping
643 my %type_map = ( similarity => 'nucleotide_motif',
644 exon => delete($a{is_alignment}) ? 'match_part' : 'exon',
646 $a{-type} = $type_map{ $a{-type}} || $a{-type};
648 #intercept target if present
649 my $target = do {
650 if(my $t = delete $a{target}) {
651 unless( $t->{-strand} ) {
652 @{$t}{qw/-start -end -strand/} = _start_end_strand(@{$t}{qw/-start -end/});
655 #HACK fix up target ids for geneseqer alignments
656 if( $t->{-target_id} =~ s/([+-])$//) {
657 $t->{-strand} = $1;
660 Bio::Annotation::Target->new(%$t);
663 my $feature = Bio::SeqFeature::Annotated->new(%a);
664 $feature->add_Annotation('Target',$target) if $target;
665 $feature->add_Annotation('ID',Bio::Annotation::SimpleValue->new( -value => $a{id} ) ) if $a{id};
666 if( ref $a{supporting_alignments} eq 'ARRAY' ) {
667 foreach ( @{$a{-subfeatures}} ) {
668 $feature->add_Annotation('supporting_alignment',Bio::Annotation::SimpleValue->new( -value => $_ ));
669 # $feature->add_Annotation('Parent',Bio::Annotation::SimpleValue->new( -value => $_ ));
672 return $feature;
676 #object method that, given two (one-based) coordinates on a sequence
677 #and its length, returns the reverse complement of the coordinates in
678 #a two-element list
679 sub _reverse_complement_coords {
680 my ($s,$e,$l) = @_;
681 return ( $l-$e+1, $l-$s+1 );
684 #make a gff3-compliant feature start, end, and strand
685 #from a gamexml-style start and end that might be backwards
686 sub _start_end_strand(@) {
687 my ($start,$end) = @_;
688 if($start > $end) {
689 return ($end,$start,-1);
690 } else {
691 return ($start,$end,1);
695 #extract all the (possibly fractional) numbers from a line of text,
696 #return them as a list
697 sub _parse_numbers {
698 my ($line) = @_;
699 my @nums;
700 while( $line =~ /(\d+(?:\.\d+)?)/g ) {
701 push @nums, $1;
703 return @nums;
706 # lookup_alignment
707 # get_alignment
708 # purge_alignments
709 sub _index_alignment {
710 my ($self,$key,$feature) = @_;
711 # warn "indexing with '$key'\n";
712 $self->{alignment_features}{$key} = $feature;
714 sub _purge_alignment_index {
715 shift->{alignment_features} = {};
717 sub _get_indexed_alignment {
718 my ($self,$key) = @_;
719 return $self->{alignment_features}{$key};
722 #chop the strand (+ or -) off of a seq name and return it as 1 for +
723 #or -1 for - note that this has the side effect of modifying the value
724 #of the thing that's passed to it
725 sub _chop_strand {
726 my ($ident) = @_;
727 my $strand = chop $$ident;
728 if( $strand eq '+' ) {
729 return 1;
731 elsif( $strand eq '-' ) {
732 return -1;
734 die "Invalid strand '$strand' at end of identifier '$$ident$strand'";
738 1;# do not remove