t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / Tools / Blat.pm
blobf565f17e55f30aa869f3b392c58eeef4a49a8f8d
2 # BioPerl module for Bio::Tools::Blat
4 # Written by Balamurugan Kumarasamy
6 # You may distribute this module under the same terms as perl itself
9 =head1 NAME
11 Bio::Tools::Blat - parser for Blat program
13 =head1 SYNOPSIS
15 use Bio::Tools::Blat;
16 my $blat_parser = Bio::Tools::Blat->new(-fh =>$filehandle );
17 while( my $blat_feat = $blat_parser->next_result ) {
18 push @blat_feat, $blat_feat;
21 =head1 DESCRIPTION
23 Parser for Blat program
25 =head1 FEEDBACK
27 =head2 Mailing Lists
29 User feedback is an integral part of the evolution of this and other
30 Bioperl modules. Send your comments and suggestions preferably to
31 the Bioperl mailing list. Your participation is much appreciated.
33 bioperl-l@bioperl.org - General discussion
34 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
36 =head2 Support
38 Please direct usage questions or support issues to the mailing list:
40 I<bioperl-l@bioperl.org>
42 rather than to the module maintainer directly. Many experienced and
43 reponsive experts will be able look at the problem and quickly
44 address it. Please include a thorough description of the problem
45 with code and data examples if at all possible.
47 =head2 Reporting Bugs
49 Report bugs to the Bioperl bug tracking system to help us keep track
50 of the bugs and their resolution. Bug reports can be submitted the
51 web:
53 https://github.com/bioperl/bioperl-live/issues
55 =head1 AUTHOR - Balamurugan Kumarasamy
57 Email: bala@tll.org.sg
59 =head1 APPENDIX
61 The rest of the documentation details each of the object methods.
62 Internal methods are usually preceded with a _
64 =cut
66 package Bio::Tools::Blat;
67 use strict;
68 use Bio::SeqFeature::Generic;
69 use Bio::SeqFeature::FeaturePair;
70 use Bio::SeqFeature::Generic;
71 use base qw(Bio::Root::Root Bio::Root::IO);
73 =head2 new
75 Title : new
76 Usage : my $obj = Bio::Tools::Blat->new(-fh=>$filehandle);
77 Function: Builds a new Bio::Tools::Blat object
78 Returns : Bio::Tools::Blat
79 Args : -filename
80 -fh (filehandle)
82 =cut
84 sub new {
85 my($class,@args) = @_;
87 my $self = $class->SUPER::new(@args);
88 $self->_initialize_io(@args);
90 return $self;
93 =head2 next_result
95 Title : next_result
96 Usage : my $feat = $blat_parser->next_result
97 Function: Get the next result set from parser data
98 Returns : L<Bio::SeqFeature::Generic>
99 Args : none
101 =cut
103 sub next_result {
104 my ($self) = @_;
105 my $filehandle;
106 my $line;
107 my $id;
109 while ($_=$self->_readline()){
110 # first split on spaces:
111 $line = $_;
112 chomp $line;
114 my ($matches, $mismatches, $rep_matches, $n_count, $q_num_insert,
115 $q_base_insert, $t_num_insert, $t_base_insert, $strand, $q_name,
116 $q_length, $q_start, $q_end, $t_name, $t_length,
117 $t_start, $t_end, $block_count, $block_sizes, $q_starts,
118 $t_starts
119 ) = split;
121 my $superfeature = Bio::SeqFeature::Generic->new();
123 # ignore any preceeding text
124 next unless ( $matches =~/^\d+$/ );
126 # create as many features as blocks there are in each output line
127 my (%feat1, %feat2);
128 $feat1{name} = $t_name;
129 $feat2{name} = $q_name;
131 $strand = $1 if ($strand =~/([+-])[+-]/);
133 $feat2{strand} = 1;
134 $feat1{strand} = $strand;
136 my $percent_id = sprintf "%.2f",
137 (100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches));
139 unless ( $q_length ){
140 $self->warn("length of query is zero, something is wrong!");
141 next;
144 my $score = sprintf "%.2f",
145 (100 * ( $matches + $mismatches + $rep_matches ) / $q_length);
147 # size of each block of alignment (inclusive)
148 my @block_sizes = split ",",$block_sizes;
150 # start position of each block (you must add 1 as psl output
151 # is off by one in the start coordinate)
152 my @q_start_positions = split ",",$q_starts;
153 my @t_start_positions = split ",",$t_starts;
155 $superfeature->seq_id($q_name);
156 $superfeature->score( $score );
157 $superfeature->add_tag_value('percent_id',$percent_id);
159 # each line of output represents one possible entire aligment
160 # of the query (feat1) and the target(feat2)
162 for (my $i=0; $i<$block_count; $i++ ){
164 my ($query_start,$query_end);
166 if ( $strand eq '+' ){
167 $query_start = $q_start_positions[$i] + 1;
168 $query_end = $query_start + $block_sizes[$i] - 1;
169 }else{
170 $query_end = $q_length - $q_start_positions[$i];
171 $query_start = $query_end - $block_sizes[$i] + 1;
174 #$feat2 {start} = $q_start_positions[$i] + 1;
175 #$feat2 {end} = $feat2{start} + $block_sizes[$i] - 1;
176 $feat2 {start} = $query_start;
177 $feat2 {end} = $query_end;
178 if ( $query_end < $query_start ){
179 $self->warn("dodgy feature coordinates: end = $query_end, start = $query_start. Reversing...");
180 $feat2 {end} = $query_start;
181 $feat2 {start} = $query_end;
184 $feat1 {start} = $t_start_positions[$i] + 1;
185 $feat1 {end} = $feat1{start} + $block_sizes[$i] - 1;
187 # we put all the features with the same score and percent_id
188 $feat2 {score} = $score;
189 $feat1 {score} = $feat2 {score};
190 $feat2 {percent} = $percent_id;
191 $feat1 {percent} = $feat2 {percent};
193 # other stuff:
194 $feat1 {db} = undef;
195 $feat1 {db_version} = undef;
196 $feat1 {program} = 'blat';
197 $feat1 {p_version} = '1';
198 $feat1 {source} = 'blat';
199 $feat1 {primary} = 'similarity';
200 $feat2 {source} = 'blat';
201 $feat2 {primary} = 'similarity';
203 my $feature_pair = $self->create_feature(\%feat1, \%feat2);
204 $superfeature->add_sub_SeqFeature( $feature_pair,'EXPAND');
206 return $superfeature;
210 =head2 create_feature
212 Title : create_feature
213 Usage : my $feat=$blat_parser->create_feature($feature,$seqname)
214 Function: creates a SeqFeature Generic object
215 Returns : L<Bio::SeqFeature::Generic>
216 Args :
219 =cut
221 sub create_feature {
222 my ($self, $feat1,$feat2) = @_;
223 my $feature1= Bio::SeqFeature::Generic->new(
224 -seq_id =>$feat1->{name},
225 -start =>$feat1->{start},
226 -end =>$feat1->{end},
227 -strand =>$feat1->{strand},
228 -score =>$feat1->{score},
229 -source =>$feat1->{source},
230 -primary =>$feat1->{primary} );
232 my $feature2= Bio::SeqFeature::Generic->new(
233 -seq_id =>$feat2->{name},
234 -start =>$feat2->{start},
235 -end =>$feat2->{end},
236 -strand =>$feat2->{strand},
237 -score =>$feat2->{score},
238 -source =>$feat2->{source},
239 -primary =>$feat2->{primary} );
241 my $featurepair = Bio::SeqFeature::FeaturePair->new;
242 $featurepair->feature1 ($feature1);
243 $featurepair->feature2 ($feature2);
245 $featurepair->add_tag_value('evalue',$feat2->{p});
246 $featurepair->add_tag_value('percent_id',$feat2->{percent});
247 $featurepair->add_tag_value("hid",$feat2->{primary});
248 return $featurepair;