2 # BioPerl module for Bio::SearchIO::cross_match
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Shin Leong <sleong@watson.wustl.edu>
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::SearchIO::cross_match - CrossMatch-specific subclass of Bio::SearchIO
20 # Working with iterations (CrossMatch results)
21 my $searchIO = Bio::SearchIO->new( -format => 'cross_match',
22 -file => "$file.screen.out" )
23 while(my $r = $searchIO->next_result) {
24 while(my $hit = $r->next_hit) {
25 while(my $hsp = $hit->next_hsp) {
26 #Do the processing here.
31 See L<Bio::SearchIO> for details about working with Bio::SearchIO.
35 This object is a subclass of Bio::SearchIO
36 and provides some operations that facilitate working with CrossMatch
37 and CrossMatch results.
39 For general information about working with Results, see
40 L<Bio::Search::Result::GenericResult>.
46 User feedback is an integral part of the evolution of this and other
47 Bioperl modules. Send your comments and suggestions preferably to
48 the Bioperl mailing list. Your participation is much appreciated.
50 bioperl-l@bioperl.org - General discussion
51 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
55 Please direct usage questions or support issues to the mailing list:
57 I<bioperl-l@bioperl.org>
59 rather than to the module maintainer directly. Many experienced and
60 reponsive experts will be able look at the problem and quickly
61 address it. Please include a thorough description of the problem
62 with code and data examples if at all possible.
66 Report bugs to the Bioperl bug tracking system to help us keep track
67 of the bugs and their resolution. Bug reports can be submitted via the
70 https://github.com/bioperl/bioperl-live/issues
72 =head1 AUTHOR - Shin Leong
74 Email sleong@watson.wustl.edu
78 Additional contributors names and emails here
82 The rest of the documentation details each of the object methods.
83 Internal methods are usually preceded with a _
88 # Let the code begin...
90 package Bio
::SearchIO
::cross_match
;
91 use Bio
::Search
::Result
::CrossMatchResult
;
93 use Bio
::Search
::Hit
::GenericHit
;
94 use Bio
::Search
::HSP
::GenericHSP
;
95 use base
qw(Bio::SearchIO);
100 Usage : $result = stream->next_result
101 Function: Reads the next ResultI object from the stream and returns it.
103 Certain driver modules may encounter entries in the stream that
104 are either misformatted or that use syntax not yet understood
105 by the driver. If such an incident is recoverable, e.g., by
106 dismissing a feature of a feature table or some other non-mandatory
107 part of an entry, the driver will issue a warning. In the case
108 of a non-recoverable situation an exception will be thrown.
109 Do not assume that you can resume parsing the same stream after
110 catching the exception. Note that you can always turn recoverable
111 errors into exceptions by calling $stream->verbose(2) (see
112 Bio::Root::RootI POD page).
113 Returns : A Bio::Search::Result::ResultI object
116 See L<Bio::Root::RootI>
123 while ( defined( $_ = $self->_readline ) ) {
124 return if ( $self->{'_end_document'} );
125 if (/^cross_match version\s+(.*?)$/) {
126 $self->{_algorithm_version
} = $1;
128 elsif (/^Maximal single base matches/) {
131 elsif (/^(\d+) matching entries/) {
132 $self->{'_end_document'} = 1;
135 elsif ( ( $start || $self->{'_result_count'} ) && /^\s*(\d+)/xms ) {
136 $self->{'_result_count'}++;
137 return $self->_parse($_);
139 elsif ( !$self->{_parameters
} ) {
140 if (/.*?\s+(\-.*?)$/) {
142 my @pp = split /\s+/, $p;
143 for ( my $i = 0 ; $i < @pp ; $i++ ) {
144 if ( $pp[$i] =~ /^\-/ ) {
145 if ( $pp[ $i + 1 ] && $pp[ $i + 1 ] !~ /^\-/ ) {
146 $self->{_parameters
}->{ $pp[$i] } = $pp[ $i + 1 ];
150 $self->{_parameters
}->{ $pp[$i] } = "";
156 elsif (/^Query file(s):\s+(.*?)$/) {
157 $self->{_query_name
} = $1;
159 elsif (/^Subject file(s):\s+(.*?)$/) {
160 $self->{_subject_name
} = $2;
176 # C H_EO-aaa01PCR02 243 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 194
178 # 0284119_008.c1- 1 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 50
180 # C H_EO-aaa01PCR02 193 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 144
182 # 0284119_008.c1- 51 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 100
184 # C H_EO-aaa01PCR02 143 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 94
186 # 0284119_008.c1- 101 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 150
188 # C H_EO-aaa01PCR02 93 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 44
190 # 0284119_008.c1- 151 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 200
192 # C H_EO-aaa01PCR02 43 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 9
194 # 0284119_008.c1- 201 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 235
196 # LSF: Should be the blank line. Otherwise error.
197 my $blank = $self->_readline;
198 unless ( $blank =~ /^\s*$/ ) {
204 while ( defined( $_ = $self->_readline ) ) {
205 $count = 0 if ( $count >= 3 );
207 if (/^(C \S+.*?\d+ )(\S+) \d+$|^( \S+.*?\d+ )(\S+) \d+$$|^\s+$/) {
210 $pad[$count] = $1 ?
$1 : $3;
211 push @
{ $data[$count] }, ( $2 ?
$2 : $4 );
214 if (/\s{$pad[0],$pad[0]}(.*?)$/) {
215 push @
{ $data[$count] }, $1;
218 $self->throw("Format error for the homology line [$_].");
240 my $is_alignment = 0;
241 my ( $hit_seq, $homology_seq, $query_seq );
243 # 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
245 #ALIGNMENT 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
246 $line =~ s/^\s+|\s+$//g;
247 my @r = split /\s+/, $line;
248 if ( $r[0] eq "ALIGNMENT" ) {
251 ( $hit_seq, $homology_seq, $query_seq ) = $self->_alignment();
254 my $query_seq_id = $r[4];
255 my $query_start = $r[5];
256 my $query_end = $r[6];
257 my $is_complement = 0;
261 if ( $r[8] eq "C" && $r[9] !~ /^\(\d+\)$/ ) {
262 $subject_seq_id = $r[9];
264 $subject_start = $r[11];
265 $subject_end = $r[12];
268 $subject_seq_id = $r[8];
269 $subject_start = $r[9];
270 $subject_end = $r[10];
272 my $hit = Bio
::Search
::Hit
::GenericHit
->new(
273 -name
=> $subject_seq_id,
275 Bio
::Search
::HSP
::GenericHSP
->new(
276 -query_name
=> $query_seq_id,
277 -query_start
=> $query_start,
278 -query_end
=> $query_end,
279 -hit_name
=> $subject_seq_id,
280 -hit_start
=> $subject_start,
281 -hit_end
=> $subject_end,
286 -query_seq
=> $query_seq
287 ?
( join "", @
$query_seq )
288 : "", #query sequence portion of the HSP
290 ?
( join "", @
$hit_seq )
291 : "", #hit sequence portion of the HSP
292 -homology_seq
=> $homology_seq
293 ?
( join "", @
$homology_seq )
294 : "", #homology sequence for the HSP
295 #LSF: Need the direction, just to fool the GenericHSP module.
300 my $result = Bio
::Search
::Result
::CrossMatchResult
->new(
301 -query_name
=> $self->{_query_name
},
302 -query_accession
=> '',
303 -query_description
=> '',
305 -database_name
=> $self->{_subject_name
},
306 -database_letters
=> 0,
307 -database_entries
=> 0,
308 -parameters
=> $self->{_parameters
},
310 -algorithm
=> 'cross_match',
311 -algorithm_version
=> $self->{_algorithm_version
},
313 $result->add_hit($hit);
321 Usage : $num = $stream->result_count;
322 Function: Gets the number of CrossMatch results that have been parsed.
331 return $self->{'_result_count'};