Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / SearchIO / cross_match.pm
blobbeb883ec61a0efa667cd036ce3c26511873f5319
2 # BioPerl module for Bio::SearchIO::cross_match
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Shin Leong <sleong@watson.wustl.edu>
8 # Copyright Shin Leong
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SearchIO::cross_match - CrossMatch-specific subclass of Bio::SearchIO
18 =head1 SYNOPSIS
20 # Working with iterations (CrossMatch results)
21 my $searchIO = Bio::SearchIO->new( -format => 'cross_match',
22 -file => "$file.screen.out" )
23 while(my $r = $searchIO->next_result) {
24 while(my $hit = $r->next_hit) {
25 while(my $hsp = $hit->next_hsp) {
26 #Do the processing here.
31 See L<Bio::SearchIO> for details about working with Bio::SearchIO.
33 =head1 DESCRIPTION
35 This object is a subclass of Bio::SearchIO
36 and provides some operations that facilitate working with CrossMatch
37 and CrossMatch results.
39 For general information about working with Results, see
40 L<Bio::Search::Result::GenericResult>.
42 =head1 FEEDBACK
44 =head2 Mailing Lists
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
53 =head2 Support
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
64 =head2 Reporting Bugs
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via the
68 web:
70 https://github.com/bioperl/bioperl-live/issues
72 =head1 AUTHOR - Shin Leong
74 Email sleong@watson.wustl.edu
76 =head1 CONTRIBUTORS
78 Additional contributors names and emails here
80 =head1 APPENDIX
82 The rest of the documentation details each of the object methods.
83 Internal methods are usually preceded with a _
85 =cut
88 # Let the code begin...
90 package Bio::SearchIO::cross_match;
91 use Bio::Search::Result::CrossMatchResult;
92 use Bio::SearchIO;
93 use Bio::Search::Hit::GenericHit;
94 use Bio::Search::HSP::GenericHSP;
95 use base qw(Bio::SearchIO);
97 =head2 next_result
99 Title : next_result
100 Usage : $result = stream->next_result
101 Function: Reads the next ResultI object from the stream and returns it.
103 Certain driver modules may encounter entries in the stream that
104 are either misformatted or that use syntax not yet understood
105 by the driver. If such an incident is recoverable, e.g., by
106 dismissing a feature of a feature table or some other non-mandatory
107 part of an entry, the driver will issue a warning. In the case
108 of a non-recoverable situation an exception will be thrown.
109 Do not assume that you can resume parsing the same stream after
110 catching the exception. Note that you can always turn recoverable
111 errors into exceptions by calling $stream->verbose(2) (see
112 Bio::Root::RootI POD page).
113 Returns : A Bio::Search::Result::ResultI object
114 Args : n/a
116 See L<Bio::Root::RootI>
118 =cut
120 sub next_result {
121 my ($self) = @_;
122 my $start = 0;
123 while ( defined( $_ = $self->_readline ) ) {
124 return if ( $self->{'_end_document'} );
125 if (/^cross_match version\s+(.*?)$/) {
126 $self->{_algorithm_version} = $1;
128 elsif (/^Maximal single base matches/) {
129 $start = 1;
131 elsif (/^(\d+) matching entries/) {
132 $self->{'_end_document'} = 1;
133 return;
135 elsif ( ( $start || $self->{'_result_count'} ) && /^\s*(\d+)/xms ) {
136 $self->{'_result_count'}++;
137 return $self->_parse($_);
139 elsif ( !$self->{_parameters} ) {
140 if (/.*?\s+(\-.*?)$/) {
141 my $p = $1;
142 my @pp = split /\s+/, $p;
143 for ( my $i = 0 ; $i < @pp ; $i++ ) {
144 if ( $pp[$i] =~ /^\-/ ) {
145 if ( $pp[ $i + 1 ] && $pp[ $i + 1 ] !~ /^\-/ ) {
146 $self->{_parameters}->{ $pp[$i] } = $pp[ $i + 1 ];
147 $i++;
149 else {
150 $self->{_parameters}->{ $pp[$i] } = "";
156 elsif (/^Query file(s):\s+(.*?)$/) {
157 $self->{_query_name} = $1;
159 elsif (/^Subject file(s):\s+(.*?)$/) {
160 $self->{_subject_name} = $2;
166 =head2 _alignment
168 Title : _alignment
169 Usage : private
171 =cut
173 sub _alignment {
174 my $self = shift;
176 # C H_EO-aaa01PCR02 243 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 194
178 # 0284119_008.c1- 1 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 50
180 # C H_EO-aaa01PCR02 193 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 144
182 # 0284119_008.c1- 51 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 100
184 # C H_EO-aaa01PCR02 143 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 94
186 # 0284119_008.c1- 101 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 150
188 # C H_EO-aaa01PCR02 93 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 44
190 # 0284119_008.c1- 151 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 200
192 # C H_EO-aaa01PCR02 43 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 9
194 # 0284119_008.c1- 201 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 235
196 # LSF: Should be the blank line. Otherwise error.
197 my $blank = $self->_readline;
198 unless ( $blank =~ /^\s*$/ ) {
199 return;
201 my @data;
202 my @pad;
203 $count = 0;
204 while ( defined( $_ = $self->_readline ) ) {
205 $count = 0 if ( $count >= 3 );
206 next if (/^$/);
207 if (/^(C \S+.*?\d+ )(\S+) \d+$|^( \S+.*?\d+ )(\S+) \d+$$|^\s+$/) {
208 $count++;
209 if ( $1 || $3 ) {
210 $pad[$count] = $1 ? $1 : $3;
211 push @{ $data[$count] }, ( $2 ? $2 : $4 );
213 else {
214 if (/\s{$pad[0],$pad[0]}(.*?)$/) {
215 push @{ $data[$count] }, $1;
217 else {
218 $self->throw("Format error for the homology line [$_].");
222 else {
223 last;
226 return @data;
230 =head2 _parse
232 Title : _parse
233 Usage : private
235 =cut
237 sub _parse {
238 my $self = shift;
239 my $line = shift;
240 my $is_alignment = 0;
241 my ( $hit_seq, $homology_seq, $query_seq );
243 # 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
245 #ALIGNMENT 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
246 $line =~ s/^\s+|\s+$//g;
247 my @r = split /\s+/, $line;
248 if ( $r[0] eq "ALIGNMENT" ) {
249 $is_alignment = 1;
250 shift @r;
251 ( $hit_seq, $homology_seq, $query_seq ) = $self->_alignment();
253 my $subject_seq_id;
254 my $query_seq_id = $r[4];
255 my $query_start = $r[5];
256 my $query_end = $r[6];
257 my $is_complement = 0;
258 my $subject_start;
259 my $subject_end;
261 if ( $r[8] eq "C" && $r[9] !~ /^\(\d+\)$/ ) {
262 $subject_seq_id = $r[9];
263 $is_complement = 1;
264 $subject_start = $r[11];
265 $subject_end = $r[12];
267 else {
268 $subject_seq_id = $r[8];
269 $subject_start = $r[9];
270 $subject_end = $r[10];
272 my $hit = Bio::Search::Hit::GenericHit->new(
273 -name => $subject_seq_id,
274 -hsps => [
275 Bio::Search::HSP::GenericHSP->new(
276 -query_name => $query_seq_id,
277 -query_start => $query_start,
278 -query_end => $query_end,
279 -hit_name => $subject_seq_id,
280 -hit_start => $subject_start,
281 -hit_end => $subject_end,
282 -query_length => 0,
283 -hit_length => 0,
284 -identical => $r[0],
285 -conserved => $r[0],
286 -query_seq => $query_seq
287 ? ( join "", @$query_seq )
288 : "", #query sequence portion of the HSP
289 -hit_seq => $hit_seq
290 ? ( join "", @$hit_seq )
291 : "", #hit sequence portion of the HSP
292 -homology_seq => $homology_seq
293 ? ( join "", @$homology_seq )
294 : "", #homology sequence for the HSP
295 #LSF: Need the direction, just to fool the GenericHSP module.
296 -algorithm => 'SW',
300 my $result = Bio::Search::Result::CrossMatchResult->new(
301 -query_name => $self->{_query_name},
302 -query_accession => '',
303 -query_description => '',
304 -query_length => 0,
305 -database_name => $self->{_subject_name},
306 -database_letters => 0,
307 -database_entries => 0,
308 -parameters => $self->{_parameters},
309 -statistics => {},
310 -algorithm => 'cross_match',
311 -algorithm_version => $self->{_algorithm_version},
313 $result->add_hit($hit);
314 return $result;
318 =head2 result_count
320 Title : result_count
321 Usage : $num = $stream->result_count;
322 Function: Gets the number of CrossMatch results that have been parsed.
323 Returns : integer
324 Args : none
325 Throws : none
327 =cut
329 sub result_count {
330 my $self = shift;
331 return $self->{'_result_count'};
336 #$Header$