maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / examples / searchio / hitwriter.pl
blob089d44c8318421a3b935bc29c4605ca4f5dcbb2e
1 #!/usr/bin/perl
3 # Demonstrates the use of a SearchIO Blast parser and a SearchWriterI object
4 # for producing tab-delimited output of hit data from a Blast report
5 # input stream.
7 # Each row in the output represents data for a single hit.
8 # For hits containing multiple HSPs, the output information represents a
9 # summary across all HSPs.
11 # This parser represents a new and improved version of Bio::Tools::Blast.
13 # Usage:
14 # STDIN: stream containing one or more BLAST or PSI-BLAST reports.
15 # STDOUT: none, but generates an output file "hitwriter.out"
16 # containing tab-delimited data on a per-hit basis.
17 # STDERR: Progress info and any errors.
19 # In this example, we create a SearchIO parser that screens out hits
20 # based on expect (or P) scores and a default HitTableWriter. This writer
21 # provides the same functionality as the original Bio::Tools::Blast::table()
22 # function (i.e., a tab-delimited summary of each hit per row).
23 # HitTableWriter, however, is customizable so you can specify just the columns
24 # you want to have in the output table.
26 # For more documentation about the writer, including
27 # a complete list of columns, execute:
28 # perldoc Bio::SearchIO::Writer::HitTableWriter.
30 # For more documentation about working with Blast result objects,
31 # see docs for these modules:
32 # Bio::Search::Result::BlastResult
33 # Bio::Search::Iteration::IterationI
34 # Bio::Search::Hit::BlastHit
35 # Bio::Search::HSP::BlastHSP
37 # For more documentation about the Blast parser, see docs for
38 # Bio::SearchIO
40 # Author: Steve Chervitz <sac@bioperl.org>
42 use strict;
44 use Bio::SearchIO;
45 use Bio::SearchIO::Writer::HitTableWriter;
47 # These are the columns that will be in the output table of BLAST results.
48 my @columns = qw(
49 query_name
50 query_length
51 hit_name
52 hit_length
53 num_hsps
54 expect
55 frac_aligned_query
56 frac_identical_query
57 length_aln_query
58 gaps_total
59 strand_query
60 strand_hit
63 # The following columns require HSP alignment data:
64 # num_hsps
65 # frac_identical_query
66 # length_aln_query
67 # gaps_total
68 # strand_query
69 # strand_hit
71 print STDERR "\nUsing SearchIO->new()\n";
73 # Note that all parameters for the $in, $out, and $writer objects are optional.
74 # Default in = STDIN; Default out = STDOUT; Default writer = all columns
75 # In this example, we're reading from STDIN and writing to a file
76 # called "hitwriter.out"
77 # TODO: write hitless reports to STDERR and note if filtered.
78 my $in = Bio::SearchIO->new( -format => 'blast',
79 -fh => \*ARGV,
80 -signif => 0.1,
81 # -verbose=> 2
83 my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns
85 my $out = Bio::SearchIO->new( -format => 'blast',
86 -writer => $writer,
87 -file => ">hitwriter.out" );
88 # Need to keep a separate count of reports with hits
89 # to know when to include labels. The first report may be hitless,
90 # so we can't use $in->result_count
91 my $hit_count = 0;
92 while ( my $blast = $in->next_result() ) {
93 printf STDERR "\nReport %d: $blast\n", $in->result_count;
95 printf STDERR "query=%s, length=%d\n", $blast->query_name, $blast->query_length;
97 if( $blast->hits ) {
98 print STDERR "# hits= ", $blast->num_hits, "\n";
99 $hit_count++;
100 my @hits= $blast->hits;
101 print STDERR "frac_aligned_query= ", $hits[0]->frac_aligned_query, "\n";
103 $out->write_result($blast, $hit_count==1 );
105 else {
106 print STDERR "Hitless Blast Report ";
107 print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
110 ## For a simple progress monitor, uncomment this line:
111 #print STDERR "."; print STDERR "\n" if $in->result_count % 50 == 0;
114 printf STDERR "\n%d Blast report(s) processed.\n", $in->result_count;
115 printf STDERR "Output sent to file: %s\n", $out->file if $out->file;