4 Bio::Tools::PrositeScan - Parser for ps_scan result
8 use Bio::Tools::PrositeScan;
10 my $factory = Bio::Tools::PrositeScan->new(
11 -file => 'out.PrositeScan',
15 while(my $match = $factory->next_prediction){
16 # $match is a Bio::SeqFeature::FeaturePair
19 my $seq_id = $match->seq_id;
21 # PROSITE accession number
22 my $psac = $match->hseq_id;
25 my @coords = ( $match->start, $match->end );
28 my $seq = $match->feature1->seq;
33 This is a parser of the output of the ps_scan program. It takes either a file
34 handle or a file name, and returns a L<Bio::SeqFeature::FeaturePair> object.
36 Note that the current implementation parses the entire file at once.
40 Juguang Xiao, juguang@tll.org.sg
46 =item * L<ps_scan software|ftp://ftp.expasy.org/databases/prosite/ps_scan>
48 =item * L<PROSITE User Manual|http://prosite.expasy.org/prosuser.html>
54 # Let the code begin...
56 package Bio
::Tools
::PrositeScan
;
57 use vars
qw(@FORMATS);
60 use Bio::SeqFeature::Generic;
61 use Bio::SeqFeature::FeaturePair;
63 use base qw(Bio::Root::Root Bio::Root::IO);
64 @FORMATS = qw(SCAN FASTA PSA MSA PFF MATCHLIST);
69 Usage : Bio::Tools::PrositeScan->new(-file => 'out.PrositeScan');
70 Bio::Tools::PrositeScan->new(-fh => \*FH);
71 Returns : L<Bio::Tools::PrositeScan>
72 Args : -format => string representing the format type for the
73 ps_scan output, REQUIRED
75 The C<-format> argument must currently be set to C<fasta> since this is the
76 only parser implemented. This corresponds with using the ps_scan arguments
82 my ($class, @args) = @_;
83 my $self = $class->SUPER::new
(@args);
84 $self->_initialize_io(@args);
85 my ($format) = $self->_rearrange([qw(FORMAT)], @args);
86 $format || $self->throw("format needed");
87 if(grep /^$format$/i, @FORMATS){
88 $self->format($format);
90 $self->throw("Invalid format, [$format]");
97 return $self->{_format
} = shift if(@_);
98 return $self->{_format
};
101 =head2 next_prediction
105 while($result = $factory->next_prediction){
109 Returns : a Bio::SeqFeature::FeaturePair object where
110 feature1 is the matched subsequence and
111 feature2 is the PROSITE accession number.
112 See <http://prosite.expasy.org/prosuser.html#conv_ac>.
116 sub next_prediction
{
118 unless($self->_parsed){
122 return shift @
{$self->{_matches
}};
126 return shift->next_prediction;
131 return $self->{_parsed
} = 1 if @_ && $_[0];
132 return $self->{_parsed
};
137 my $format = $self->format;
138 if($self->format =~ /^fasta$/){
141 $self->throw("the [$format] parser has not been written");
150 while(defined($_ = $self->_readline)){
154 if($fasta_head =~ /([^\/]+)\
/(\d+)\-(\d+)(\s+)\:(\s+)(\S+)/){
160 $self->_attach_seq($seq, $fp);
163 $fp = Bio
::SeqFeature
::FeaturePair
->new(
164 -feature1
=> Bio
::SeqFeature
::Generic
->new(
169 -feature2
=> Bio
::SeqFeature
::Generic
->new(
177 $self->throw("ERR:\t\[$_\]");
179 }else{ # sequence lines, ignored
184 $self->_attach_seq($seq, $fp);
187 push @
{$self->{_matches
}}, @matches;
191 my ($self, $seq, $fp) = @_;
193 my $whole_seq = 'X' x
($fp->start-1);
195 $fp->feature1->attach_seq(
196 Bio
::Seq
->new(-seq
=> $whole_seq)