2 # BioPerl module for Bio::SearchIO::Writer::TextResultWriter
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SearchIO::Writer::TextResultWriter - Object to implement writing
17 a Bio::Search::ResultI in Text.
22 use Bio::SearchIO::Writer::TextResultWriter;
24 my $in = Bio::SearchIO->new(-format => 'blast',
25 -file => shift @ARGV);
27 my $writer = Bio::SearchIO::Writer::TextResultWriter->new();
28 my $out = Bio::SearchIO->new(-writer => $writer);
29 $out->write_result($in->next_result);
33 This object implements the SearchWriterI interface which will produce
34 a set of Text for a specific Bio::Search::Report::ReportI interface.
36 You can also provide the argument -filters =E<gt> \%hash to filter the at
37 the hsp, hit, or result level. %hash is an associative array which
38 contains any or all of the keys (HSP, HIT, RESULT). The values
39 pointed to by these keys would be references to a subroutine which
40 expects to be passed an object - one of Bio::Search::HSP::HSPI,
41 Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively.
42 Each function needs to return a boolean value as to whether or not the
43 passed element should be included in the output report - true if it is
44 to be included, false if it to be omitted.
46 For example to filter on sequences in the database which are too short
47 for your criteria you would do the following.
49 Define a hit filter method
53 return $hit->length E<gt> 100; # test if length of the hit sequence
56 my $writer = Bio::SearchIO::Writer::TextResultWriter->new(
57 -filters => { 'HIT' =E<gt> \&hit_filter }
60 Another example would be to filter HSPs on percent identity, let's
61 only include HSPs which are 75% identical or better.
65 return $hsp->percent_identity E<gt> 75;
67 my $writer = Bio::SearchIO::Writer::TextResultWriter->new(
68 -filters => { 'HSP' =E<gt> \&hsp_filter }
71 See L<Bio::SearchIO::SearchWriterI> for more info on the filter method.
74 This module will use the module Text::Wrap if it is installed to wrap
75 the Query description line. If you do not have Text::Wrap installed
76 this module will work fine but you won't have the Query line wrapped.
77 You will see a warning about this when you first instantiate a
78 TextResultWriter - to avoid these warnings from showing up, simply set
79 the verbosity upon initialization to -1 like this: my $writer = new
80 Bio::SearchIO::Writer::TextResultWriter(-verbose =E<gt> -1);
86 User feedback is an integral part of the evolution of this and other
87 Bioperl modules. Send your comments and suggestions preferably to
88 the Bioperl mailing list. Your participation is much appreciated.
90 bioperl-l@bioperl.org - General discussion
91 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
95 Please direct usage questions or support issues to the mailing list:
97 I<bioperl-l@bioperl.org>
99 rather than to the module maintainer directly. Many experienced and
100 reponsive experts will be able look at the problem and quickly
101 address it. Please include a thorough description of the problem
102 with code and data examples if at all possible.
104 =head2 Reporting Bugs
106 Report bugs to the Bioperl bug tracking system to help us keep track
107 of the bugs and their resolution. Bug reports can be submitted via the
110 https://github.com/bioperl/bioperl-live/issues
112 =head1 AUTHOR - Jason Stajich
114 Email jason@bioperl.org
118 The rest of the documentation details each of the object methods.
119 Internal methods are usually preceded with a _
124 # Let the code begin...
127 package Bio
::SearchIO
::Writer
::TextResultWriter
;
128 use vars
qw($MaxNameLen $MaxDescLen $AlignmentLineWidth $DescLineLen $TextWrapLoaded);
131 # Object preamble - inherits from Bio::Root::RootI
135 $AlignmentLineWidth = 60;
136 eval { require Text::Wrap; $TextWrapLoaded = 1;};
144 use base qw(Bio::Root::Root Bio::SearchIO::SearchWriterI);
149 Usage : my $obj = Bio::SearchIO::Writer::TextResultWriter->new();
150 Function: Builds a new Bio::SearchIO::Writer::TextResultWriter object
151 Returns : Bio::SearchIO::Writer::TextResultWriter
152 Args : -filters => hashref with any or all of the keys (HSP HIT RESULT)
153 which have values pointing to a subroutine reference
154 which will expect to get a Hit,HSP, Result object respectively
155 -no_wublastlinks => boolean. Do not display WU-BLAST lines even if
162 my($class,@args) = @_;
164 my $self = $class->SUPER::new
(@args);
165 my ($filters,$nowublastlinks) = $self->_rearrange([qw(FILTERS
168 if( defined $filters ) {
169 if( !ref($filters) =~ /HASH/i ) {
170 $self->warn("Did not provide a hashref for the FILTERS option, ignoring.");
172 while( my ($type,$code) = each %{$filters} ) {
173 $self->filter($type,$code);
177 $self->no_wublastlinks(! $nowublastlinks);
178 unless( $TextWrapLoaded ) {
179 $self->warn("Could not load Text::Wrap - the Query Description will not be line wrapped\n");
181 $Text::Wrap
::columns
= $MaxDescLen;
189 Purpose : Produces data for each Search::Result::ResultI in a string.
190 : This is an abstract method. For some useful implementations,
191 : see ResultTableWriter.pm, HitTableWriter.pm,
192 : and HSPTableWriter.pm.
193 Usage : print $writer->to_string( $result_obj, @args );
194 Argument : $result_obj = A Bio::Search::Result::ResultI object
195 : @args = any additional arguments used by your implementation.
196 Returns : String containing data for each search Result or any of its
197 : sub-objects (Hits and HSPs).
203 my ($self,$result,$num) = @_;
205 return unless defined $result;
206 my $links = $self->no_wublastlinks;
207 my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
208 $self->filter('HIT'),
209 $self->filter('HSP') );
210 return '' if( defined $resultfilter && ! &{$resultfilter}($result) );
212 my ($qtype,$dbtype,$dbseqtype,$type);
213 my $alg = $result->algorithm;
215 my $wublast = ($result->algorithm_version =~ /WashU/) ?
1 : 0;
217 # This is actually wrong for the FASTAs I think
218 if( $alg =~ /T(FAST|BLAST)([XY])/i ) {
219 $qtype = $dbtype = 'translated';
220 $dbseqtype = $type = 'PROTEIN';
221 } elsif( $alg =~ /T(FAST|BLAST)N/i ) {
223 $dbtype = 'translated';
225 $dbseqtype = 'NUCLEOTIDE';
226 } elsif( $alg =~ /(FAST|BLAST)N/i ||
227 $alg =~ /(WABA|EXONERATE)/i ) {
228 $qtype = $dbtype = '';
229 $type = $dbseqtype = 'NUCLEOTIDE';
230 } elsif( $alg =~ /(FAST|BLAST)P/ ||
231 $alg =~ /SSEARCH|(HMM|SEARCH|PFAM)/i ) {
232 $qtype = $dbtype = '';
233 $type = $dbseqtype = 'PROTEIN';
234 } elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
235 $qtype = 'translated';
237 $dbseqtype = $type = 'PROTEIN';
239 print STDERR
"algorithm was ", $result->algorithm, " couldn't match\n";
243 my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ?
3 : 1,
244 'Query:' => ( $qtype eq 'translated' ) ?
3 : 1);
247 if( ! defined $num || $num <= 1 ) {
248 $str = &{$self->start_report}($result);
251 $str .= &{$self->title}($result);
252 $str .= $result->algorithm . " " . $result->algorithm_version . "\n\n\n";
253 $str .= $result->algorithm_reference || $self->algorithm_reference($result);
254 $str .= &{$self->introduction}($result);
259 Sequences producing significant alignments
: (bits
) value
262 if( $result->can('rewind')) {
263 $result->rewind(); # support stream based parsing routines
265 while( my $hit = $result->next_hit ) {
266 next if( defined $hitfilter && ! &{$hitfilter}($hit) );
267 my $nm = $hit->name();
268 $self->debug( "no $nm for name (".$hit->description(). "\n")
270 my ($gi,$acc) = &{$self->id_parser}($nm);
271 my $p = "%-$MaxDescLen". "s";
273 my $desc = sprintf("%s %s",$nm,$hit->description);
274 if( length($desc) - 3 > $MaxDescLen) {
275 $descsub = sprintf($p,
276 substr($desc,0,$MaxDescLen-3) .
279 $descsub = sprintf($p,$desc);
281 $str .= $wublast ?
sprintf("%s %-4s %s\n",
283 defined $hit->raw_score ?
$hit->raw_score : ' ',
284 defined $hit->significance ?
$hit->significance : '?') :
285 sprintf("%s %-4s %s\n",
287 defined $hit->bits ?
$hit->bits: ' ',
288 defined $hit->significance ?
$hit->significance : '?');
289 my @hsps = $hit->hsps;
291 $hspstr .= sprintf(">%s %s\n%9sLength = %d\n\n",
293 defined $hit->description ?
$hit->description : '',
294 '', # empty is for the %9s in the str formatting
297 foreach my $hsp ( @hsps ) {
298 next if( defined $hspfilter && ! &{$hspfilter}($hsp) );
299 $hspstr .= sprintf(" Score = %4s bits (%s), Expect = %s",
300 $hsp->bits, $hsp->score, $hsp->evalue);
302 $hspstr .= ", P = ".$hsp->pvalue;
305 $hspstr .= sprintf(" Identities = %d/%d (%d%%)",
306 ( $hsp->frac_identical('total') *
307 $hsp->length('total')),
308 $hsp->length('total'),
309 POSIX
::floor
($hsp->frac_identical('total')
312 if( $type eq 'PROTEIN' ) {
313 $hspstr .= sprintf(", Positives = %d/%d (%d%%)",
314 ( $hsp->frac_conserved('total') *
315 $hsp->length('total')),
316 $hsp->length('total'),
317 POSIX
::floor
($hsp->frac_conserved('total') * 100));
321 $hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
323 $hsp->length('total'),
324 POSIX
::floor
(100 * $hsp->gaps('total') /
325 $hsp->length('total')));
328 my ($hframe,$qframe) = ( $hsp->hit->frame,
330 my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
331 # so TBLASTX will have Query/Hit frames
332 # BLASTX will have Query frame
333 # TBLASTN will have Hit frame
334 if( $hstrand || $qstrand ) {
335 $hspstr .= " Frame = ";
339 # if strand is null or 0 then it is protein
342 $signh = $hstrand < 0 ?
'-' : '+';
346 # if strand is null or 0 then it is protein
348 $signq =$qstrand < 0 ?
'-' : '+';
350 # remember bioperl stores frames as 0,1,2 (GFF way)
351 # BLAST reports reports as 1,2,3 so
352 # we have to add 1 to the frame values
353 if( defined $hframe && ! defined $qframe) {
354 $hspstr .= "$signh".($hframe+1);
355 } elsif( defined $qframe && ! defined $hframe) {
356 $hspstr .= "$signq".($qframe+1);
358 $hspstr .= sprintf(" %s%d / %s%d",
365 $hsp->can('links') && defined(my $lnks = $hsp->links) ) {
366 $hspstr .= sprintf(" Links = %s\n",$lnks);
370 my @hspvals = ( {'name' => 'Query:',
371 'seq' => $hsp->query_string,
372 'start' => ( $qstrand >= 0 ?
375 'end' => ($qstrand >= 0 ?
379 'direction' => $qstrand || 1
381 { 'name' => ' 'x6
, # this might need to adjust for long coordinates??
382 'seq' => $hsp->homology_string,
388 { 'name' => 'Sbjct:',
389 'seq' => $hsp->hit_string,
390 'start' => ($hstrand >= 0 ?
391 $hsp->hit->start : $hsp->hit->end),
392 'end' => ($hstrand >= 0 ?
393 $hsp->hit->end : $hsp->hit->start),
395 'direction' => $hstrand || 1
400 # let's set the expected length (in chars) of the starting number
401 # in an alignment block so we can have things line up
402 # Just going to try and set to the largest
404 my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
405 length($hspvals[0]->{'end'}),
406 length($hspvals[2]->{'start'}),
407 length($hspvals[2]->{'end'}));
409 while ( $count <= $hsp->length('total') ) {
410 foreach my $v ( @hspvals ) {
411 my $piece = substr($v->{'seq'}, $v->{'index'} +$count,
412 $AlignmentLineWidth);
414 my $plen = scalar ( $cp =~ tr/\-//);
415 my ($start,$end) = ('','');
416 if( defined $v->{'start'} ) {
417 $start = $v->{'start'};
418 # since strand can be + or - use the direction
419 # to signify which whether to add or subtract from end
420 my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
421 $baselens{$v->{'name'}};
422 if( length($piece) < $AlignmentLineWidth ) {
423 $d = (length($piece) - $plen) * $v->{'direction'} *
424 $baselens{$v->{'name'}};
426 $end = $v->{'start'} + $d - $v->{'direction'};
429 $hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
436 $count += $AlignmentLineWidth;
443 $str .= "\n\n".$hspstr;
445 $str .= sprintf(qq{ Database
: %s
447 Number of letters
in database
: %s
448 Number of sequences
in database
: %s
452 $result->database_name(),
453 $result->get_statistic('posted_date') ||
454 POSIX
::strftime
("%b %d, %Y %I:%M %p",localtime),
455 &_numwithcommas
($result->database_letters()),
456 &_numwithcommas
($result->database_entries()),
457 $result->get_parameter('matrix') || '');
459 if( defined (my $open = $result->get_parameter('gapopen')) ) {
460 $str .= sprintf("Gap Penalties Existence: %d, Extension: %d\n",
461 $open || 0, $result->get_parameter('gapext') || 0);
464 # skip those params we've already output
465 foreach my $param ( grep { ! /matrix|gapopen|gapext/i }
466 $result->available_parameters ) {
467 $str .= "$param: ". $result->get_parameter($param) ."\n";
470 $str .= "Search Statistics\n";
471 # skip posted date, we already output it
472 foreach my $stat ( sort grep { ! /posted_date/ }
473 $result->available_statistics ) {
474 my $expect = $result->get_parameter('expect');
475 my $v = $result->get_statistic($stat);
476 if( $v =~ /^\d+$/ ) {
477 $v = &_numwithcommas
($v);
479 if( defined $expect &&
480 $stat eq 'seqs_better_than_cutoff' ) {
481 $str .= "seqs_better_than_$expect: $v\n";
484 $str .= "$stat: $v\n";
495 Usage : $index->start_report( CODE )
496 Function: Stores or returns the code to
497 write the start of the <HTML> block, the <TITLE> block
498 and the start of the <BODY> block of HTML. Useful
499 for (for instance) specifying alternative
500 HTML if you are embedding the output in
501 an HTML page which you have already started.
502 (For example a routine returning a null string).
503 Returns \&default_start_report (see below) if not
505 Example : $index->start_report( \&my_start_report )
506 Returns : ref to CODE if called without arguments
512 my( $self, $code ) = @_;
514 $self->{'_start_report'} = $code;
516 return $self->{'_start_report'} || \
&default_start_report
;
519 =head2 default_start_report
521 Title : default_start_report
522 Usage : $self->default_start_report($result)
523 Function: The default method to call when starting a report.
525 Args : First argument is a Bio::Search::Result::ResultI
529 sub default_start_report
{
537 Usage : $self->title($CODE)
539 Function: Stores or returns the code to provide HTML for the given
540 BLAST report that will appear at the top of the BLAST report
541 HTML output. Useful for (for instance) specifying
542 alternative routines to write your own titles.
543 Returns \&default_title (see below) if not
545 Example : $index->title( \&my_title )
546 Returns : ref to CODE if called without arguments
552 my( $self, $code ) = @_;
554 $self->{'_title'} = $code;
556 return $self->{'_title'} || \
&default_title
;
561 Title : default_title
562 Usage : $self->default_title($result)
563 Function: Provides HTML for the given BLAST report that will appear
564 at the top of the BLAST report output.
565 Returns : empty for text implementation
566 Args : First argument is a Bio::Search::Result::ResultI
573 # The HTML implementation
575 # qq{<CENTER><H1><a href="http://bioperl.org">Bioperl</a> Reformatted HTML of %s Search Report<br> for %s</H1></CENTER>},
576 # $result->algorithm,
577 # $result->query_name());
584 Usage : $self->introduction($CODE)
586 Function: Stores or returns the code to provide HTML for the given
587 BLAST report detailing the query and the
588 database information.
589 Useful for (for instance) specifying
590 routines returning alternative introductions.
591 Returns \&default_introduction (see below) if not
593 Example : $index->introduction( \&my_introduction )
594 Returns : ref to CODE if called without arguments
600 my( $self, $code ) = @_;
602 $self->{'_introduction'} = $code;
604 return $self->{'_introduction'} || \
&default_introduction
;
607 =head2 default_introduction
609 Title : default_introduction
610 Usage : $self->default_introduction($result)
611 Function: Outputs HTML to provide the query
612 and the database information
613 Returns : string containing HTML
614 Args : First argument is a Bio::Search::Result::ResultI
615 Second argument is string holding literature citation
619 sub default_introduction
{
628 %s sequences
; %s total letters
630 &_linewrap
($result->query_name . " " .
631 $result->query_description),
632 &_numwithcommas
($result->query_length),
633 $result->database_name(),
634 &_numwithcommas
($result->database_entries()),
635 &_numwithcommas
($result->database_letters()),
642 Usage : $self->end_report()
643 Function: The method to call when ending a report, this is
644 mostly for cleanup for formats which require you to
645 have something at the end of the document (</BODY></HTML>)
657 # copied from Bio::Index::Fasta
658 # useful here as well
663 Usage : $index->id_parser( CODE )
664 Function: Stores or returns the code used by record_id to
665 parse the ID for record from a string. Useful
666 for (for instance) specifying a different
667 parser for different flavours of FASTA file.
668 Returns \&default_id_parser (see below) if not
669 set. If you supply your own id_parser
670 subroutine, then it should expect a fasta
671 description line. An entry will be added to
672 the index for each string in the list returned.
673 Example : $index->id_parser( \&my_id_parser )
674 Returns : ref to CODE if called without arguments
680 my( $self, $code ) = @_;
683 $self->{'_id_parser'} = $code;
685 return $self->{'_id_parser'} || \
&default_id_parser
;
690 =head2 default_id_parser
692 Title : default_id_parser
693 Usage : $id = default_id_parser( $header )
694 Function: The default Fasta ID parser for Fasta.pm
695 Returns $1 from applying the regexp /^>\s*(\S+)/
698 Args : a fasta header line string
702 sub default_id_parser
{
705 if( $string =~ s/gi\|(\d+)\|?// )
706 { $gi = $1; $acc = $1;}
708 if( $string =~ /(\w+)\|([A-Z\d\.\_]+)(\|[A-Z\d\_]+)?/ ) {
709 $acc = defined $2 ?
$2 : $1;
712 $acc =~ s/^\s+(\S+)/$1/;
713 $acc =~ s/(\S+)\s+$/$1/;
718 sub MIN
{ $a <=> $b ?
$a : $b; }
719 sub MAX
{ $a <=> $b ?
$b : $a; }
722 =head2 algorithm_reference
724 Title : algorithm_reference
725 Usage : my $reference = $writer->algorithm_reference($result);
726 Function: Returns the appropriate Bibliographic reference for the
727 algorithm format being produced
729 Args : L<Bio::Search::Result::ResultI> to reference
734 sub algorithm_reference
{
735 my ($self,$result) = @_;
736 return '' if( ! defined $result || !ref($result) ||
737 ! $result->isa('Bio::Search::Result::ResultI')) ;
738 if( $result->algorithm =~ /BLAST/i ) {
739 my $res = $result->algorithm . ' '. $result->algorithm_version. "\n";
740 if( $result->algorithm_version =~ /WashU/i ) {
742 Copyright
(C
) 1996-2000 Washington University
, Saint Louis
, Missouri USA
.
745 Reference
: Gish
, W
. (1996-2000) http
://blast
.wustl
.edu
749 Reference
: Altschul
, Stephen F
., Thomas L
. Madden
, Alejandro A
. Schaffer
,
750 Jinghui Zhang
, Zheng Zhang
, Webb Miller
, and David J
. Lipman
(1997),
751 "Gapped BLAST and PSI-BLAST: a new generation of protein database search
752 programs", Nucleic Acids Res
. 25:3389-3402.
755 } elsif( $result->algorithm =~ /FAST/i ) {
756 return $result->algorithm. " ". $result->algorithm_version . "\n".
757 "\nReference: Pearson et al, Genomics (1997) 46:24-36\n";
763 # from Perl Cookbook 2.17
765 my $num = reverse( $_[0] );
766 $num =~ s/(\d{3})(?=\d)(?!\d*\.)/$1,/g;
767 return scalar reverse $num;
772 if($TextWrapLoaded) {
773 return Text
::Wrap
::wrap
("","",$str); # use Text::Wrap
774 } else { return $str; } # cannot wrap
776 =head2 Methods Bio::SearchIO::SearchWriterI
778 L<Bio::SearchIO::SearchWriterI> inherited methods.
783 Usage : $writer->filter('hsp', \&hsp_filter);
784 Function: Filter out either at HSP,Hit,or Result level
786 Args : string => data type,
792 =head2 no_wublastlinks
794 Title : no_wublastlinks
795 Usage : $obj->no_wublastlinks($newval)
796 Function: Get/Set boolean value regarding whether or not to display
798 type output in the report output (WU-BLAST only)
800 Args : on set, new boolean value (a scalar or undef, optional)
808 return $self->{'no_wublastlinks'} = shift if @_;
809 return $self->{'no_wublastlinks'};