5 fastm9_to_table - turn FASTA -m 9 output into NCBI -m 9 tabular output
9 fastm9_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ...
14 -e/--evalue evalue -- filter by evalue
15 -b/--bitscore bitscore -- filter by bitscore
16 --header -- boolean flag to print column header
17 -o/--out -- optional outputfile to write data,
18 otherwise will write to STDOUT
19 -h/--help -- show this documentation
21 Not technically a SearchIO script as this doesn't use any Bioperl
22 components but is a useful and fast. The output is tabular output
23 with the standard NCBI -m9 columns.
31 query start (if on rev-strand start > end)
33 hit start (if on rev-strand start > end)
38 Additionally 3 more columns are provided
47 =head1 AUTHOR - Jason Stajich
49 Jason Stajich jason_at_bioperl-dot-org
59 my ($evalue,$bitscore,$header,$outfile) = ( 10, 0);
61 'b|bitscore|bits:f' => \
$bitscore,
62 'e|evalue:f' => \
$evalue,
64 'o|out|outfile:s' => \
$outfile,
65 'h|help' => sub { exec('perldoc',$0); exit; }
70 open $outfh, '>', $outfile or die "Could not write file '$outfile': $!\n";
80 my @fields = qw(qname hname percid alen mmcount gapcount
81 qstart qend hstart hend evalue score bits fs sw-score
82 percsim qlen hlen qgap hgap);
83 print $outfh "#",uc(join("", map{ sprintf("%-10s",$_) } @fields)), "\n" if $header;
87 if( /^\s*\d+>>>(\S+).+/ ) {
89 if( /\-?\s+(\d+)\s+(aa|nt)\s+$/ ){
92 } elsif( $hitsection && (/^>>>\Q$data{'qname'}/ || /^>>>/) ) {
94 } elsif( /^The best scores are:/ ) {
97 } elsif( $hitsection ) {
98 if( s/^(\S+)\s+(.+)\(\s*(\d+)\)\s+// ) {
99 my ($hit, $desc,$hitlen) = ($1,$2,$3);
100 my ($dir) = ( s/^\[(r|f)\]\s+// );
101 my @line = split(/\s+/,$_);
102 $data{'hname'} = $hit;
103 $data{'hlen'} = $hitlen;
104 $data{'score'} = shift @line;
105 $data{'bits'} = shift @line;
106 $data{'evalue'} = shift @line;
107 $data{'percid'} = shift @line;
109 $data{'percsim'} = shift @line;
110 $data{'sw-score'} = shift @line;
111 $data{'alen'} = shift @line;
112 $data{'qstart'} = shift @line;
113 $data{'qend'} = shift @line;
114 $data{'pn0'} = shift @line; # pn0
115 $data{'px0'} = shift @line; # px0
116 $data{'hstart'} = shift @line; # an1
117 $data{'hend'} = shift @line; # ax1
118 $data{'pn1'} = shift @line; # pn1
119 $data{'px1'} = shift @line; # px1
121 $data{'qgap'} = shift @line;
122 $data{'hgap'} = shift @line;
123 $data{'gapcount'} = $data{'qgap'} + $data{'hgap'};
124 $data{'fs'} = shift @line;
126 $data{'mmcount'} = $data{'alen'} - ( int($data{'percid'} * $data{'alen'}) + $data{'gapcount'});
127 #next if( $data{'evalue'} > $evalue ||
128 # $data{'bits'} < $bitscore );
130 for ( $data{'percid'}, $data{'percsim'} ) {
131 $_ = sprintf("%.2f",$_*100);
133 print $outfh join( "\t",map { $data{$_} } @fields),"\n";
135 # print STDERR "unrecognized line \n$linestr";
138 # warn("skipping a line like this: $_");