maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / Tools / GuessSeqFormat.pm
blobca93cffd65903fd6deb80fda00368ead0afbc48a
1 #------------------------------------------------------------------
3 # BioPerl module Bio::Tools::GuessSeqFormat
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Andreas Kähäri, andreas.kahari@ebi.ac.uk
9 # You may distribute this module under the same terms as perl itself
10 #------------------------------------------------------------------
12 =encoding utf-8
14 =head1 NAME
16 Bio::Tools::GuessSeqFormat - Module for determining the sequence
17 format of the contents of a file, a string, or through a
18 filehandle.
20 =head1 SYNOPSIS
22 # To guess the format of a flat file, given a filename:
23 my $guesser = Bio::Tools::GuessSeqFormat->new( -file => $filename );
24 my $format = $guesser->guess;
26 # To guess the format from an already open filehandle:
27 my $guesser = Bio::Tools::GuessSeqFormat->new( -fh => $filehandle );
28 my $format = $guesser->guess;
29 # The filehandle will be returned to its original position. Note that this
30 # filehandle can be STDIN.
32 # To guess the format of one or several lines of text (with
33 # embedded newlines):
34 my $guesser = Bio::Tools::GuessSeqFormat->new( -text => $linesoftext );
35 my $format = $guesser->guess;
37 # To create a Bio::Tools::GuessSeqFormat object and set the
38 # filename, filehandle, or line to parse afterwards:
39 my $guesser = Bio::Tools::GuessSeqFormat->new();
40 $guesser->file($filename);
41 $guesser->fh($filehandle);
42 $guesser->text($linesoftext);
44 # To guess in one go, given e.g. a filename:
45 my $format = Bio::Tools::GuessSeqFormat->new( -file => $filename )->guess;
47 =head1 DESCRIPTION
49 Bio::Tools::GuessSeqFormat tries to guess the format ("swiss",
50 "pir", "fasta" etc.) of the sequence or MSA in a file, in a
51 scalar, or through a filehandle.
53 The guess() method of a Bio::Tools::GuessSeqFormat object will
54 examine the data, line by line, until it finds a line to which
55 only one format can be assigned. If no conclusive guess can be
56 made, undef is returned.
58 If the Bio::Tools::GuessSeqFormat object is given a filehandle,
59 e.g. STDIN, it will be restored to its original position on
60 return from the guess() method.
62 =head2 Formats
64 Tests are currently implemented for the following formats:
66 =over
68 =item *
70 ACeDB ("ace")
72 =item *
74 Blast ("blast")
76 =item *
78 ClustalW ("clustalw")
80 =item *
82 Codata ("codata")
84 =item *
86 EMBL ("embl")
88 =item *
90 FastA sequence ("fasta")
92 =item *
94 FastQ sequence ("fastq")
96 =item *
98 FastXY/FastA alignment ("fastxy")
100 =item *
102 Game XML ("game")
104 =item *
106 GCG ("gcg")
108 =item *
110 GCG Blast ("gcgblast")
112 =item *
114 GCG FastA ("gcgfasta")
116 =item *
118 GDE ("gde")
120 =item *
122 Genbank ("genbank")
124 =item *
126 Genscan ("genscan")
128 =item *
130 GFF ("gff")
132 =item *
134 HMMER ("hmmer")
136 =item *
138 PAUP/NEXUS ("nexus")
140 =item *
142 Phrap assembly file ("phrap")
144 =item *
146 NBRF/PIR ("pir")
148 =item *
150 Mase ("mase")
152 =item *
154 Mega ("mega")
156 =item *
158 GCG/MSF ("msf")
160 =item *
162 Pfam ("pfam")
164 =item *
166 Phylip ("phylip")
168 =item *
170 Prodom ("prodom")
172 =item *
174 Raw ("raw")
176 =item *
178 RSF ("rsf")
180 =item *
182 Selex ("selex")
184 =item *
186 Stockholm ("stockholm")
188 =item *
190 Swissprot ("swiss")
192 =item *
194 Tab ("tab")
196 =item *
198 Variant Call Format ("vcf")
200 =back
202 =head1 FEEDBACK
204 =head2 Mailing Lists
206 User feedback is an integral part of the evolution of this and
207 other Bioperl modules. Send your comments and suggestions
208 preferably to one of the Bioperl mailing lists. Your
209 participation is much appreciated.
211 bioperl-l@bioperl.org - General discussion
212 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
214 =head2 Support
216 Please direct usage questions or support issues to the mailing list:
218 I<bioperl-l@bioperl.org>
220 rather than to the module maintainer directly. Many experienced and
221 reponsive experts will be able look at the problem and quickly
222 address it. Please include a thorough description of the problem
223 with code and data examples if at all possible.
225 =head2 Reporting Bugs
227 Report bugs to the Bioperl bug tracking system to help us
228 keep track the bugs and their resolution. Bug reports can be
229 submitted via the web:
231 https://github.com/bioperl/bioperl-live/issues
233 =head1 AUTHOR
235 Andreas KE<228>hE<228>ri, andreas.kahari@ebi.ac.uk
237 =head1 CONTRIBUTORS
239 Heikki LehvE<228>slaiho, heikki-at-bioperl-dot-org
240 Mark A. Jensen, maj-at-fortinbras-dot-us
242 =cut
245 package Bio::Tools::GuessSeqFormat;
247 use strict;
248 use warnings;
251 use base qw(Bio::Root::Root);
253 =head1 METHODS
255 Methods available to Bio::Tools::GuessSeqFormat objects
256 are described below. Methods with names beginning with an
257 underscore are considered to be internal.
259 =cut
261 =head2 new
263 Title : new
264 Usage : $guesser = Bio::Tools::GuessSeqFormat->new( ... );
265 Function : Creates a new object.
266 Example : See SYNOPSIS.
267 Returns : A new object.
268 Arguments : -file The filename of the file whose format is to
269 be guessed, e.g. STDIN, or
270 -fh An already opened filehandle from which a text
271 stream may be read, or
272 -text A scalar containing one or several lines of
273 text with embedded newlines.
275 If more than one of the above arguments are given, they
276 are tested in the order -text, -file, -fh, and the first
277 available argument will be used.
279 =cut
281 sub new
283 my $class = shift;
284 my @args = @_;
286 my $self = $class->SUPER::new(@args);
288 my $attr;
289 my $value;
291 while (@args) {
292 $attr = shift @args;
293 $attr = lc $attr;
294 $value = shift @args;
295 $self->{$attr} = $value;
298 return $self;
301 =head2 file
303 Title : file
304 Usage : $guesser->file($filename);
305 $filename = $guesser->file;
306 Function : Gets or sets the current filename associated with
307 an object.
308 Returns : The new filename.
309 Arguments : The filename of the file whose format is to be
310 guessed.
312 A call to this method will clear the current filehandle and
313 the current lines of text associated with the object.
315 =cut
317 sub file
319 # Sets and/or returns the filename to use.
320 my $self = shift;
321 my $file = shift;
323 if (defined $file) {
324 # Set the active filename, and clear the filehandle and
325 # text line, if present.
326 $self->{-file} = $file;
327 $self->{-fh} = $self->{-text} = undef;
330 return $self->{-file};
333 =head2 fh
335 Title : fh
336 Usage : $guesser->fh($filehandle);
337 $filehandle = $guesser->fh;
338 Function : Gets or sets the current filehandle associated with
339 an object.
340 Returns : The new filehandle.
341 Arguments : An already opened filehandle from which a text
342 stream may be read.
344 A call to this method will clear the current filename and
345 the current lines of text associated with the object.
347 =cut
349 sub fh
351 # Sets and/or returns the filehandle to use.
352 my $self = shift;
353 my $fh = shift;
355 if (defined $fh) {
356 # Set the active filehandle, and clear the filename and
357 # text line, if present.
358 $self->{-fh} = $fh;
359 $self->{-file} = $self->{-text} = undef;
362 return $self->{-fh};
366 =head2 text
368 Title : text
369 Usage : $guesser->text($linesoftext);
370 $linesofext = $guesser->text;
371 Function : Gets or sets the current text associated with an
372 object.
373 Returns : The new lines of texts.
374 Arguments : A scalar containing one or several lines of text,
375 including embedded newlines.
377 A call to this method will clear the current filename and
378 the current filehandle associated with the object.
380 =cut
382 sub text
384 # Sets and/or returns the text lines to use.
385 my $self = shift;
386 my $text = shift;
388 if (defined $text) {
389 # Set the active text lines, and clear the filehandle
390 # and filename, if present.
391 $self->{-text} = $text;
392 $self->{-fh} = $self->{-file} = undef;
395 return $self->{-text};
398 =head2 guess
400 Title : guess
401 Usage : $format = $guesser->guess;
402 @format = $guesser->guess; # if given a line of text
403 Function : Guesses the format of the data accociated with the
404 object.
405 Returns : A format string such as "swiss" or "pir". If a
406 format can not be found, undef is returned.
407 Arguments : None.
409 If the object is associated with a filehandle, the position
410 of the filehandle will be returned to its original position
411 before the method returns.
413 =cut
415 our %formats = (
416 ace => { test => \&_possibly_ace },
417 blast => { test => \&_possibly_blast },
418 bowtie => { test => \&_possibly_bowtie },
419 clustalw => { test => \&_possibly_clustalw },
420 codata => { test => \&_possibly_codata },
421 embl => { test => \&_possibly_embl },
422 fasta => { test => \&_possibly_fasta },
423 fastq => { test => \&_possibly_fastq },
424 fastxy => { test => \&_possibly_fastxy },
425 game => { test => \&_possibly_game },
426 gcg => { test => \&_possibly_gcg },
427 gcgblast => { test => \&_possibly_gcgblast },
428 gcgfasta => { test => \&_possibly_gcgfasta },
429 gde => { test => \&_possibly_gde },
430 genbank => { test => \&_possibly_genbank },
431 genscan => { test => \&_possibly_genscan },
432 gff => { test => \&_possibly_gff },
433 hmmer => { test => \&_possibly_hmmer },
434 nexus => { test => \&_possibly_nexus },
435 mase => { test => \&_possibly_mase },
436 mega => { test => \&_possibly_mega },
437 msf => { test => \&_possibly_msf },
438 pfam => { test => \&_possibly_pfam },
439 phrap => { test => \&_possibly_phrap },
440 phylip => { test => \&_possibly_phylip },
441 pir => { test => \&_possibly_pir },
442 prodom => { test => \&_possibly_prodom },
443 raw => { test => \&_possibly_raw },
444 rsf => { test => \&_possibly_rsf },
445 selex => { test => \&_possibly_selex },
446 stockholm => { test => \&_possibly_stockholm },
447 swiss => { test => \&_possibly_swiss },
448 tab => { test => \&_possibly_tab },
449 vcf => { test => \&_possibly_vcf },
452 sub guess
454 my $self = shift;
456 while (my ($fmt_key) = each (%formats)) {
457 $formats{$fmt_key}{fmt_string} = $fmt_key;
460 my $fh;
461 my $start_pos;
462 if (defined $self->{-text}) {
463 # Break the text into separate lines.
464 my $text = $self->{-text};
465 open $fh, '<', \$text or $self->throw("Could not read from string: $!");
467 } elsif (defined $self->{-file}) {
468 # If given a filename, open the file.
469 my $file = $self->{-file};
470 open $fh, '<', $file or $self->throw("Could not read file '$file': $!");
472 } elsif (defined $self->{-fh}) {
473 # If given a filehandle, get the current position in the stream.
474 $fh = $self->{-fh};
475 if (not seek $fh, 0, 1) { # seek to current position to determine seekability
476 # Work around non-seekable filehandles if IO::Scalar is available
477 # (adapted from http://www.perlmonks.org/?node_id=33587)
478 # IO::Mark may be an option for very large streams?
479 $self->throw("Need IO::Scalar to guess from unseekable filehandles")
480 if not eval { require IO::Scalar };
481 my $data;
482 { local $/; $data = <$fh>; $.-- }; # copy raw data from fh
483 tie *$fh, 'IO::Scalar', my $s; # replace fh by scalar-tied fh
484 print $fh $data; # write raw data to tied fh
485 seek $fh, 0, 0; # return to start of tied fh
487 $start_pos = tell $fh;
490 my $done = 0;
491 my $lineno = 0;
492 my $guess;
493 while (!$done) {
494 my $line; # The next line of the file.
495 my $match = 0; # Number of possible formats of this line.
497 last if (!defined($line = <$fh>));
498 next if ($line =~ /^\s*$/); # Skip white and empty lines.
499 chomp $line;
500 $line =~ s/\r$//; # Fix for DOS files on Unix.
501 ++$lineno;
503 while (my ($fmt_key, $fmt) = each (%formats)) {
504 if ($fmt->{test}($line, $lineno)) {
505 ++$match;
506 $guess = $fmt->{fmt_string};
510 # We're done if there was only one match.
511 $done = ($match == 1);
514 if (defined $self->{-fh}) {
515 # Go back to original position in filehandle
516 seek $fh, $start_pos, 0 or $self->throw("Could not reset filehandle $fh: $!");
517 } else {
518 # Close the filehandle we opened
519 close $fh;
521 return ($done ? $guess : undef);
524 =head1 HELPER SUBROUTINES
526 All helper subroutines will, given a line of text and the line
527 number of the same line, return 1 if the line possibly is from a
528 file of the type that they perform a test of.
530 A zero return value does not mean that the line is not part
531 of a certain type of file, just that the test did not find any
532 characteristics of that type of file in the line.
534 =head2 _possibly_ace
536 From bioperl test data, and from
537 "http://www.isrec.isb-sib.ch/DEA/module8/B_Stevenson/Practicals/transcriptome_recon/transcriptome_recon.html".
539 =cut
541 sub _possibly_ace
543 my ($line, $lineno) = (shift, shift);
544 return ($line =~ /^(?:Sequence|Peptide|DNA|Protein) [":]/);
547 =head2 _possibly_blast
549 From various blast results.
551 =cut
553 sub _possibly_blast
555 my ($line, $lineno) = (shift, shift);
556 return ($lineno == 1 &&
557 $line =~ /^[[:upper:]]*BLAST[[:upper:]]*.*\[.*\]$/);
560 =head2 _possibly_bowtie
562 Contributed by kortsch.
564 =cut
566 sub _possibly_bowtie
568 my ($line, $lineno) = (shift, shift);
569 return ($line =~ /^[[:graph:]]+\t[-+]\t[[:graph:]]+\t\d+\t([[:alpha:]]+)\t([[:graph:]]+)\t\d+\t[[:graph:]]?/)
570 && length($1)==length($2);
573 =head2 _possibly_clustalw
575 From "http://www.ebi.ac.uk/help/formats.html".
577 =cut
579 sub _possibly_clustalw
581 my ($line, $lineno) = (shift, shift);
582 return ($lineno == 1 && $line =~ /CLUSTAL/);
585 =head2 _possibly_codata
587 From "http://www.ebi.ac.uk/help/formats.html".
589 =cut
591 sub _possibly_codata
593 my ($line, $lineno) = (shift, shift);
594 return (($lineno == 1 && $line =~ /^ENTRY/) ||
595 ($lineno == 2 && $line =~ /^SEQUENCE/) ||
596 $line =~ m{^(?:ENTRY|SEQUENCE|///)});
599 =head2 _possibly_embl
601 From
602 "http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html#3.3".
604 =cut
606 sub _possibly_embl
608 my ($line, $lineno) = (shift, shift);
609 return ($lineno == 1 && $line =~ /^ID / && $line =~ /BP\.$/);
612 =head2 _possibly_fasta
614 From "http://www.ebi.ac.uk/help/formats.html".
616 =cut
618 sub _possibly_fasta
620 my ($line, $lineno) = (shift, shift);
621 return (($lineno != 1 && $line =~ /^[A-IK-NP-Z]+$/i) ||
622 $line =~ /^>\s*\w/);
625 =head2 _possibly_fastq
627 From bioperl test data.
629 =cut
631 sub _possibly_fastq
633 my ($line, $lineno) = (shift, shift);
634 return ( ($lineno == 1 && $line =~ /^@/) ||
635 ($lineno == 3 && $line =~ /^\+/) );
638 =head2 _possibly_fastxy
640 From bioperl test data.
642 =cut
644 sub _possibly_fastxy
646 my ($line, $lineno) = (shift, shift);
647 return (($lineno == 1 && $line =~ /^ FAST(?:XY|A)/) ||
648 ($lineno == 2 && $line =~ /^ version \d/));
651 =head2 _possibly_game
653 From bioperl testdata.
655 =cut
657 sub _possibly_game
659 my ($line, $lineno) = (shift, shift);
660 return ($line =~ /^<!DOCTYPE game/);
663 =head2 _possibly_gcg
665 From bioperl, Bio::SeqIO::gcg.
667 =cut
669 sub _possibly_gcg
671 my ($line, $lineno) = (shift, shift);
672 return ($line =~ /Length: .*Type: .*Check: .*\.\.$/);
675 =head2 _possibly_gcgblast
677 From bioperl testdata.
679 =cut
681 sub _possibly_gcgblast
683 my ($line, $lineno) = (shift, shift);
684 return (($lineno == 1 && $line =~ /^!!SEQUENCE_LIST/) ||
685 ($lineno == 2 &&
686 $line =~ /^[[:upper:]]*BLAST[[:upper:]]*.*\[.*\]$/));
689 =head2 _possibly_gcgfasta
691 From bioperl testdata.
693 =cut
695 sub _possibly_gcgfasta
697 my ($line, $lineno) = (shift, shift);
698 return (($lineno == 1 && $line =~ /^!!SEQUENCE_LIST/) ||
699 ($lineno == 2 && $line =~ /FASTA/));
702 =head2 _possibly_gde
704 From "http://www.ebi.ac.uk/help/formats.html".
706 =cut
708 sub _possibly_gde
710 my ($line, $lineno) = (shift, shift);
711 return ($line =~ /^[{}]$/ ||
712 $line =~ /^(?:name|longname|sequence-ID|
713 creation-date|direction|strandedness|
714 type|offset|group-ID|creator|descrip|
715 comment|sequence)/x);
718 =head2 _possibly_genbank
720 From "http://www.ebi.ac.uk/help/formats.html".
721 Format of [apparantly optional] file header from
722 "http://www.umdnj.edu/rcompweb/PA/Notes/GenbankFF.htm". (TODO: dead link)
724 =cut
726 sub _possibly_genbank
728 my ($line, $lineno) = (shift, shift);
729 return (($lineno == 1 && $line =~ /GENETIC SEQUENCE DATA BANK/) ||
730 ($lineno == 1 && $line =~ /^LOCUS /) ||
731 ($lineno == 2 && $line =~ /^DEFINITION /) ||
732 ($lineno == 3 && $line =~ /^ACCESSION /));
735 =head2 _possibly_genscan
737 From bioperl test data.
739 =cut
741 sub _possibly_genscan
743 my ($line, $lineno) = (shift, shift);
744 return (($lineno == 1 && $line =~ /^GENSCAN.*Date.*Time/) ||
745 ($line =~ /^(?:Sequence\s+\w+|Parameter matrix|Predicted genes)/));
748 =head2 _possibly_gff
750 From bioperl test data.
752 =cut
754 sub _possibly_gff
756 my ($line, $lineno) = (shift, shift);
757 return (($lineno == 1 && $line =~ /^##gff-version/) ||
758 ($lineno == 2 && $line =~ /^##date/));
761 =head2 _possibly_hmmer
763 From bioperl test data.
765 =cut
767 sub _possibly_hmmer
769 my ($line, $lineno) = (shift, shift);
770 return (($lineno == 2 && $line =~ /^HMMER/) ||
771 ($lineno == 3 &&
772 $line =~ /Washington University School of Medicine/));
775 =head2 _possibly_nexus
777 From "http://paup.csit.fsu.edu/nfiles.html".
779 =cut
781 sub _possibly_nexus
783 my ($line, $lineno) = (shift, shift);
784 return ($lineno == 1 && $line =~ /^#NEXUS/);
787 =head2 _possibly_mase
789 From bioperl test data.
790 More detail from "http://www.umdnj.edu/rcompweb/PA/Notes/GenbankFF.htm" (TODO: dead link)
792 =cut
794 sub _possibly_mase
796 my ($line, $lineno) = (shift, shift);
797 return (($lineno == 1 && $line =~ /^;;/) ||
798 ($lineno > 1 && $line =~ /^;[^;]?/));
801 =head2 _possibly_mega
803 From the ensembl broswer (AlignView data export).
805 =cut
807 sub _possibly_mega
809 my ($line, $lineno) = (shift, shift);
810 return ($lineno == 1 && $line =~ /^#mega$/);
814 =head2 _possibly_msf
816 From "http://www.ebi.ac.uk/help/formats.html".
818 =cut
820 sub _possibly_msf
822 my ($line, $lineno) = (shift, shift);
823 return ($line =~ m{^//} ||
824 $line =~ /MSF:.*Type:.*Check:|Name:.*Len:/);
827 =head2 _possibly_phrap
829 From "http://biodata.ccgb.umn.edu/docs/contigimage.html". (TODO: dead link)
830 From "http://genetics.gene.cwru.edu/gene508/Lec6.htm". (TODO: dead link)
831 From bioperl test data ("*.ace.1" files).
833 =cut
835 sub _possibly_phrap
837 my ($line, $lineno) = (shift, shift);
838 return ($line =~ /^(?:AS\ |CO\ Contig|BQ|AF\ |BS\ |RD\ |
839 QA\ |DS\ |RT\{)/x);
842 =head2 _possibly_pir
844 From "http://www.ebi.ac.uk/help/formats.html".
845 The ".,()" spotted in bioperl test data.
847 =cut
849 sub _possibly_pir # "NBRF/PIR" (?)
851 my ($line, $lineno) = (shift, shift);
852 return (($lineno != 1 && $line =~ /^[\sA-IK-NP-Z.,()]+\*?$/i) ||
853 $line =~ /^>(?:P1|F1|DL|DC|RL|RC|N3|N1);/);
856 =head2 _possibly_pfam
858 From bioperl test data.
860 =cut
862 sub _possibly_pfam
864 my ($line, $lineno) = (shift, shift);
865 return ($line =~ m{^\w+/\d+-\d+\s+[A-IK-NP-Z.]+}i);
868 =head2 _possibly_phylip
870 From "http://www.ebi.ac.uk/help/formats.html". Initial space
871 allowed on first line (spotted in ensembl AlignView exported
872 data).
874 =cut
876 sub _possibly_phylip
878 my ($line, $lineno) = (shift, shift);
879 return (($lineno == 1 && $line =~ /^\s*\d+\s\d+/) ||
880 ($lineno == 2 && $line =~ /^\w\s+[A-IK-NP-Z\s]+/) ||
881 ($lineno == 3 && $line =~ /(?:^\w\s+[A-IK-NP-Z\s]+|\s+[A-IK-NP-Z\s]+)/)
885 =head2 _possibly_prodom
887 From "http://prodom.prabi.fr/prodom/current/documentation/data.php".
889 =cut
891 sub _possibly_prodom
893 my ($line, $lineno) = (shift, shift);
894 return ($lineno == 1 && $line =~ /^ID / && $line =~ /\d+ seq\.$/);
897 =head2 _possibly_raw
899 From "http://www.ebi.ac.uk/help/formats.html".
901 =cut
903 sub _possibly_raw
905 my ($line, $lineno) = (shift, shift);
906 return ($line =~ /^[A-Za-z\s]+$/);
909 =head2 _possibly_rsf
911 From "http://www.ebi.ac.uk/help/formats.html".
913 =cut
915 sub _possibly_rsf
917 my ($line, $lineno) = (shift, shift);
918 return (($lineno == 1 && $line =~ /^!!RICH_SEQUENCE/) ||
919 $line =~ /^[{}]$/ ||
920 $line =~ /^(?:name|type|longname|
921 checksum|creation-date|strand|sequence)/x);
924 =head2 _possibly_selex
926 From "http://www.ebc.ee/WWW/hmmer2-html/node27.html".
928 Assuming presence of Selex file header. Data exported by
929 Bioperl on Pfam and Selex formats are identical, but Pfam file
930 only holds one alignment.
932 =cut
934 sub _possibly_selex
936 my ($line, $lineno) = (shift, shift);
937 return (($lineno == 1 && $line =~ /^#=ID /) ||
938 ($lineno == 2 && $line =~ /^#=AC /) ||
939 ($line =~ /^#=SQ /));
942 =head2 _possibly_stockholm
944 From bioperl test data.
946 =cut
948 sub _possibly_stockholm
950 my ($line, $lineno) = (shift, shift);
951 return (($lineno == 1 && $line =~ /^# STOCKHOLM/) ||
952 $line =~ /^#=(?:GF|GS) /);
957 =head2 _possibly_swiss
959 From "http://ca.expasy.org/sprot/userman.html#entrystruc".
961 =cut
963 sub _possibly_swiss
965 my ($line, $lineno) = (shift, shift);
966 return ($lineno == 1 && $line =~ /^ID / && $line =~ /AA\.$/);
969 =head2 _possibly_tab
971 Contributed by Heikki.
973 =cut
975 sub _possibly_tab
977 my ($line, $lineno) = (shift, shift);
978 return ($lineno == 1 && $line =~ /^[^\t]+\t[^\t]+/) ;
981 =head2 _possibly_vcf
983 From "http://www.1000genomes.org/wiki/analysis/vcf4.0".
985 Assumptions made about sanity - format and date lines are line 1 and 2
986 respectively. This is not specified in the format document.
988 =cut
990 sub _possibly_vcf
992 my ($line, $lineno) = (shift, shift);
993 return (($lineno == 1 && $line =~ /##fileformat=VCFv/) ||
994 ($lineno == 2 && $line =~ /##fileDate=/));