Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / Tools / PrositeScan.pm
blobad9d5b9edbde4d3f3447ff3b66e7c62271c47ec2
2 =head1 NAME
4 Bio::Tools::PrositeScan - Parser for ps_scan result
6 =head1 SYNOPSIS
8 use Bio::Tools::PrositeScan;
10 my $factory = Bio::Tools::PrositeScan->new(
11 -file => 'out.PrositeScan',
12 -format => 'fasta'
15 while(my $match = $factory->next_prediction){
16 # $match is a Bio::SeqFeature::FeaturePair
18 # Sequence ID
19 my $seq_id = $match->seq_id;
21 # PROSITE accession number
22 my $psac = $match->hseq_id;
24 # Coordinates
25 my @coords = ( $match->start, $match->end );
27 # Subsequence
28 my $seq = $match->feature1->seq;
31 =head1 DESCRIPTION
33 This is a parser of the output of the ps_scan program. It takes either a file
34 handle or a file name, and returns a L<Bio::SeqFeature::FeaturePair> object.
36 Note that the current implementation parses the entire file at once.
38 =head1 AUTHOR
40 Juguang Xiao, juguang@tll.org.sg
42 =head1 SEE ALSO
44 =over
46 =item * L<ps_scan software|ftp://ftp.expasy.org/databases/prosite/ps_scan>
48 =item * L<PROSITE User Manual|http://prosite.expasy.org/prosuser.html>
50 =back
52 =cut
54 # Let the code begin...
56 package Bio::Tools::PrositeScan;
57 use vars qw(@FORMATS);
58 use strict;
59 use Bio::Seq;
60 use Bio::SeqFeature::Generic;
61 use Bio::SeqFeature::FeaturePair;
63 use base qw(Bio::Root::Root Bio::Root::IO);
64 @FORMATS = qw(SCAN FASTA PSA MSA PFF MATCHLIST);
66 =head2 new
68 Title : new
69 Usage : Bio::Tools::PrositeScan->new(-file => 'out.PrositeScan');
70 Bio::Tools::PrositeScan->new(-fh => \*FH);
71 Returns : L<Bio::Tools::PrositeScan>
72 Args : -format => string representing the format type for the
73 ps_scan output, REQUIRED
75 The C<-format> argument must currently be set to C<fasta> since this is the
76 only parser implemented. This corresponds with using the ps_scan arguments
77 C<-o fasta>.
79 =cut
81 sub new {
82 my ($class, @args) = @_;
83 my $self = $class->SUPER::new(@args);
84 $self->_initialize_io(@args);
85 my ($format) = $self->_rearrange([qw(FORMAT)], @args);
86 $format || $self->throw("format needed");
87 if(grep /^$format$/i, @FORMATS){
88 $self->format($format);
89 }else{
90 $self->throw("Invalid format, [$format]");
92 return $self;
95 sub format {
96 my $self = shift;
97 return $self->{_format} = shift if(@_);
98 return $self->{_format};
101 =head2 next_prediction
103 Title : new
104 Usage :
105 while($result = $factory->next_prediction){
109 Returns : a Bio::SeqFeature::FeaturePair object where
110 feature1 is the matched subsequence and
111 feature2 is the PROSITE accession number.
112 See <http://prosite.expasy.org/prosuser.html#conv_ac>.
114 =cut
116 sub next_prediction {
117 my ($self) = @_;
118 unless($self->_parsed){
119 $self->_parse;
120 $self->_parsed(1);
122 return shift @{$self->{_matches}};
125 sub next_result {
126 return shift->next_prediction;
129 sub _parsed {
130 my $self = shift;
131 return $self->{_parsed} = 1 if @_ && $_[0];
132 return $self->{_parsed};
135 sub _parse {
136 my $self = shift;
137 my $format = $self->format;
138 if($self->format =~ /^fasta$/){
139 $self->_parse_fasta;
140 }else{
141 $self->throw("the [$format] parser has not been written");
145 sub _parse_fasta {
146 my ($self) = @_;
147 my @matches;
148 my $fp;
149 my $seq;
150 while(defined($_ = $self->_readline)){
151 chop;
152 if(/^\>([^>]+)/){
153 my $fasta_head = $1;
154 if($fasta_head =~ /([^\/]+)\/(\d+)\-(\d+)(\s+)\:(\s+)(\S+)/){
155 my $q_id = $1;
156 my $q_start = $2;
157 my $q_end = $3;
158 my $h_id = $6;
159 if(defined $fp){
160 $self->_attach_seq($seq, $fp);
161 push @matches, $fp;
163 $fp = Bio::SeqFeature::FeaturePair->new(
164 -feature1 => Bio::SeqFeature::Generic->new(
165 -seq_id => $q_id,
166 -start => $q_start,
167 -end => $q_end
169 -feature2 => Bio::SeqFeature::Generic->new(
170 -seq_id => $h_id,
171 -start => 0,
172 -end => 0
175 $seq = '';
176 }else{
177 $self->throw("ERR:\t\[$_\]");
179 }else{ # sequence lines, ignored
180 $seq .= $_;
183 if(defined $fp){
184 $self->_attach_seq($seq, $fp);
185 push @matches, $fp;
187 push @{$self->{_matches}}, @matches;
190 sub _attach_seq {
191 my ($self, $seq, $fp) = @_;
192 if(defined $fp){
193 my $whole_seq = 'X' x ($fp->start-1);
194 $whole_seq .= $seq;
195 $fp->feature1->attach_seq(
196 Bio::Seq->new(-seq => $whole_seq)