2 # BioPerl module for Bio::Tools::Blat
4 # Written by Balamurugan Kumarasamy
6 # You may distribute this module under the same terms as perl itself
11 Bio::Tools::Blat - parser for Blat program
16 my $blat_parser = Bio::Tools::Blat->new(-fh =>$filehandle );
17 while( my $blat_feat = $blat_parser->next_result ) {
18 push @blat_feat, $blat_feat;
23 Parser for Blat program
29 User feedback is an integral part of the evolution of this and other
30 Bioperl modules. Send your comments and suggestions preferably to
31 the Bioperl mailing list. Your participation is much appreciated.
33 bioperl-l@bioperl.org - General discussion
34 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
38 Please direct usage questions or support issues to the mailing list:
40 I<bioperl-l@bioperl.org>
42 rather than to the module maintainer directly. Many experienced and
43 reponsive experts will be able look at the problem and quickly
44 address it. Please include a thorough description of the problem
45 with code and data examples if at all possible.
49 Report bugs to the Bioperl bug tracking system to help us keep track
50 of the bugs and their resolution. Bug reports can be submitted the
53 https://github.com/bioperl/bioperl-live/issues
55 =head1 AUTHOR - Balamurugan Kumarasamy
57 Email: bala@tll.org.sg
61 The rest of the documentation details each of the object methods.
62 Internal methods are usually preceded with a _
66 package Bio
::Tools
::Blat
;
68 use Bio
::SeqFeature
::Generic
;
69 use Bio
::SeqFeature
::FeaturePair
;
70 use Bio
::SeqFeature
::Generic
;
71 use base
qw(Bio::Root::Root Bio::Root::IO);
76 Usage : my $obj = Bio::Tools::Blat->new(-fh=>$filehandle);
77 Function: Builds a new Bio::Tools::Blat object
78 Returns : Bio::Tools::Blat
85 my($class,@args) = @_;
87 my $self = $class->SUPER::new
(@args);
88 $self->_initialize_io(@args);
96 Usage : my $feat = $blat_parser->next_result
97 Function: Get the next result set from parser data
98 Returns : L<Bio::SeqFeature::Generic>
109 while ($_=$self->_readline()){
110 # first split on spaces:
114 my ($matches, $mismatches, $rep_matches, $n_count, $q_num_insert,
115 $q_base_insert, $t_num_insert, $t_base_insert, $strand, $q_name,
116 $q_length, $q_start, $q_end, $t_name, $t_length,
117 $t_start, $t_end, $block_count, $block_sizes, $q_starts,
121 my $superfeature = Bio
::SeqFeature
::Generic
->new();
123 # ignore any preceeding text
124 next unless ( $matches =~/^\d+$/ );
126 # create as many features as blocks there are in each output line
128 $feat1{name
} = $t_name;
129 $feat2{name
} = $q_name;
131 $strand = $1 if ($strand =~/([+-])[+-]/);
134 $feat1{strand
} = $strand;
136 my $percent_id = sprintf "%.2f",
137 (100 * ($matches + $rep_matches)/( $matches + $mismatches + $rep_matches));
139 unless ( $q_length ){
140 $self->warn("length of query is zero, something is wrong!");
144 my $score = sprintf "%.2f",
145 (100 * ( $matches + $mismatches + $rep_matches ) / $q_length);
147 # size of each block of alignment (inclusive)
148 my @block_sizes = split ",",$block_sizes;
150 # start position of each block (you must add 1 as psl output
151 # is off by one in the start coordinate)
152 my @q_start_positions = split ",",$q_starts;
153 my @t_start_positions = split ",",$t_starts;
155 $superfeature->seq_id($q_name);
156 $superfeature->score( $score );
157 $superfeature->add_tag_value('percent_id',$percent_id);
159 # each line of output represents one possible entire aligment
160 # of the query (feat1) and the target(feat2)
162 for (my $i=0; $i<$block_count; $i++ ){
164 my ($query_start,$query_end);
166 if ( $strand eq '+' ){
167 $query_start = $q_start_positions[$i] + 1;
168 $query_end = $query_start + $block_sizes[$i] - 1;
170 $query_end = $q_length - $q_start_positions[$i];
171 $query_start = $query_end - $block_sizes[$i] + 1;
174 #$feat2 {start} = $q_start_positions[$i] + 1;
175 #$feat2 {end} = $feat2{start} + $block_sizes[$i] - 1;
176 $feat2 {start
} = $query_start;
177 $feat2 {end
} = $query_end;
178 if ( $query_end < $query_start ){
179 $self->warn("dodgy feature coordinates: end = $query_end, start = $query_start. Reversing...");
180 $feat2 {end
} = $query_start;
181 $feat2 {start
} = $query_end;
184 $feat1 {start
} = $t_start_positions[$i] + 1;
185 $feat1 {end
} = $feat1{start
} + $block_sizes[$i] - 1;
187 # we put all the features with the same score and percent_id
188 $feat2 {score
} = $score;
189 $feat1 {score
} = $feat2 {score
};
190 $feat2 {percent
} = $percent_id;
191 $feat1 {percent
} = $feat2 {percent
};
195 $feat1 {db_version
} = undef;
196 $feat1 {program
} = 'blat';
197 $feat1 {p_version
} = '1';
198 $feat1 {source
} = 'blat';
199 $feat1 {primary
} = 'similarity';
200 $feat2 {source
} = 'blat';
201 $feat2 {primary
} = 'similarity';
203 my $feature_pair = $self->create_feature(\
%feat1, \
%feat2);
204 $superfeature->add_sub_SeqFeature( $feature_pair,'EXPAND');
206 return $superfeature;
210 =head2 create_feature
212 Title : create_feature
213 Usage : my $feat=$blat_parser->create_feature($feature,$seqname)
214 Function: creates a SeqFeature Generic object
215 Returns : L<Bio::SeqFeature::Generic>
222 my ($self, $feat1,$feat2) = @_;
223 my $feature1= Bio
::SeqFeature
::Generic
->new(
224 -seq_id
=>$feat1->{name
},
225 -start
=>$feat1->{start
},
226 -end
=>$feat1->{end
},
227 -strand
=>$feat1->{strand
},
228 -score
=>$feat1->{score
},
229 -source
=>$feat1->{source
},
230 -primary
=>$feat1->{primary
} );
232 my $feature2= Bio
::SeqFeature
::Generic
->new(
233 -seq_id
=>$feat2->{name
},
234 -start
=>$feat2->{start
},
235 -end
=>$feat2->{end
},
236 -strand
=>$feat2->{strand
},
237 -score
=>$feat2->{score
},
238 -source
=>$feat2->{source
},
239 -primary
=>$feat2->{primary
} );
241 my $featurepair = Bio
::SeqFeature
::FeaturePair
->new;
242 $featurepair->feature1 ($feature1);
243 $featurepair->feature2 ($feature2);
245 $featurepair->add_tag_value('evalue',$feat2->{p
});
246 $featurepair->add_tag_value('percent_id',$feat2->{percent
});
247 $featurepair->add_tag_value("hid",$feat2->{primary
});