2 # BioPerl module for Bio::Tools::Analysis::Protein::ELM
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Richard Adams <richard.adams@ed.ac.uk>
8 # Copyright Richard Adams
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::Analysis::Protein::ELM - a wrapper around the ELM server which predicts short functional motifs on amino acid sequences
20 # get a Bio::Seq object to start with, or a Bio::PrimaryI object.
22 my $tool = Bio::Tools::Analysis::Protein::ELM->
23 new(seq => $seqobj->primary_seq() );
24 $tool->compartment(['ER', 'Golgi']);
27 my @fts = $tool->Result('Bio::SeqFeatureI');
28 $seqobj->addSeqFeature(@fts);
32 This module is a wrapper around the ELM server L<http://elm.eu.org/>
33 which predicts short functional motifs on amino acid sequences.
35 False positives can be limited by providing values for the species
36 and cellular compartment of the protein. To set the species attribute,
37 use either a L<Bio::Species> object or an NCBI taxon ID number. To set
38 the cell compartment attribute (any number of compartments can be
39 chosen) use an array reference to a list of compartment names.
41 Results can be obtained either as raw text output, parsed into a
42 data structure, or as Bio::SeqFeature::Generic objects.
46 L<Bio::SimpleAnalysisI>,
53 User feedback is an integral part of the evolution of this and other
54 Bioperl modules. Send your comments and suggestions preferably to one
55 of the Bioperl mailing lists. Your participation is much appreciated.
57 bioperl-l@bioperl.org - General discussion
58 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62 Please direct usage questions or support issues to the mailing list:
64 I<bioperl-l@bioperl.org>
66 rather than to the module maintainer directly. Many experienced and
67 reponsive experts will be able look at the problem and quickly
68 address it. Please include a thorough description of the problem
69 with code and data examples if at all possible.
73 Report bugs to the Bioperl bug tracking system to help us keep track
74 the bugs and their resolution. Bug reports can be submitted via the
77 https://github.com/bioperl/bioperl-live/issues
81 Richard Adams, Richard.Adams@ed.ac.uk,
85 The rest of the documentation details each of the object
86 methods. Internal methods are usually preceded with a _
91 package Bio
::Tools
::Analysis
::Protein
::ELM
;
94 use Bio::SeqFeature::Generic;
95 use HTTP::Request::Common qw(POST);
97 use base
qw(Bio::Tools::Analysis::SimpleAnalysisBase);
99 ## valid cell compartments ##
102 nucleus
=> 'GO:0005634',
103 extracellular
=> 'GO:0005576',
104 cytoplasm
=> 'GO:0005737',
105 peroxisome
=> 'GO:0005777',
106 glycosome
=> 'GO:0020015',
107 glyoxisome
=> 'GO:0009514',
108 golgi
=> 'GO:0005794',
110 lysosome
=> 'GO:0005764',
111 endosome
=> 'GO:0005768',
112 plasma_membrane
=> 'GO:0005886',
115 my $URL = 'http://elm.eu.org/cgimodel.py';
116 my $ANALYSIS_NAME = 'ELM';
120 'mandatory' => 'true',
121 'type' => 'Bio::PrimarySeqI',
125 'mandatory' => 'false',
126 'type' => 'taxon_id or Bio::Species object',
131 'mandatory' => 'false',
133 'name' => 'compartment',
140 '' => 'bulk', # same as undef
141 'Bio::SeqFeatureI' => 'ARRAY of Bio::SeqFeature::Generic',
142 'parsed' => '{motif1_name=>{locus=>[],
148 my $ANALYSIS_SPEC= {name
=> 'ELM',
151 supplier
=>'BioComputing Unit, EMBL',
152 description
=>'Prediction of linear functional motifs
154 reference
=> 'NAR, 31:3625-3630'};
160 $self->{'_ANALYSIS_SPEC'} = $ANALYSIS_SPEC;
161 $self->{'_INPUT_SPEC'} = $INPUT_SPEC;
162 $self->{'_RESULT_SPEC'} = $RESULT_SPEC;
163 $self->{'_ANALYSIS_NAME'} = $ANALYSIS_NAME;
170 usage : $elm->compartment(['golgi', 'er']);
171 purpose : get/setter for cell compartment specifications
172 arguments : None, single compartment string or ref to array of
174 returns : Array of compartment names (default if not previously set).
180 my ($self, $arg) = @_;
183 # convert to array ref if not one already
184 if (ref ($arg) ne 'ARRAY') {
188 ## now add params if valid
189 for my $param (@
$arg) {
190 if (exists($cc{lc($param)})) {
191 push @
{$self->{'_compartment'}} , $cc{$param};
193 $self->warn("invalid argument ! Must be one of " .
194 join "\n", keys %cc );
199 return defined($self->{'_compartment'})?
$self->{'_compartment'}
200 : $self->input_spec()->[2]{'default'};
207 usage : $tool->species('9606');
208 purpose : get/setter for species selection for ELM server
209 arguments : none, taxon_id or Bio::Species object
210 returns : a string of the ncbi taxon_id
215 my ($self, $arg) = @_;
218 if (ref($arg) && $arg->isa('Bio::Species')) {
219 $self->{'_species'} = $arg->ncbi_taxid();
220 } elsif ($arg =~ /^\d+$/) {
221 $self->{'_species'} = $arg;
223 $self->warn("Argument must be a Bio::Species object or ".
224 " an integer NCBI taxon id. ");
227 return defined($self->{'_species'})?
$self->{'_species'}
228 :$self->input_spec()->[1]{'default'};
235 # delay repeated calls by default by 3 sec, set delay() to change
237 $self->status('TERMINATED_BY_ERROR');
239 #### this deals with being able to submit multiple checkboxed
242 #1st of all make param array
244 my @cmpts = @
{$self->compartment()};
245 for (my $i = 0; $i <= $#cmpts ; $i++) {
246 splice @cc_str, @cc_str, 0, 'userCC',$cmpts[$i];
248 my %h = (swissprotId
=> "",
249 sequence
=> $self->seq->seq,
250 userSpecies
=> $self->species,
251 typedUserSpecies
=> '',
253 splice (@cc_str, @cc_str,0, ( map{$_, $h{$_}} keys %h));
256 my $request = POST
$self->url(),
257 Content_Type
=> 'form-data',
259 $self->debug( $request->as_string);
260 my $r1 = $self->request($request);
261 if ( $r1->is_error ) {
262 $self->warn(ref($self)." Request Error:\n".$r1->as_string);
266 my $text = $r1->content;
267 my ($url) = $text =~ /URL=\S+(fun=\S+r=\d)/s;
270 $url = $URL . "?" .$url;
272 my $req2 = HTTP
::Request
->new(GET
=>$url);
273 my $r2 = $self->request ($req2);
274 if ( $r2->is_error ) {
275 $self->warn(ref($self)." Request Error:\n".$r2->as_string);
278 $resp2 = $r2->content();
280 if ($resp2 !~ /patient/s) {
281 $self->status('COMPLETED');
282 $resp2=~ s/<[^>]+>/ /sg;
283 $self->{'_result'} = $resp2;
286 print "." if $self->verbose > 0;
295 usage : $tool->result('Bio::SeqFeatureI');
296 purpose : parse results into sequence features or basic data format
297 arguments : 1. none (retrieves raw text without html)
298 2. a value (retrieves data structure)
299 3. 'Bio::SeqFeatureI' (returns array of sequence features)
300 tag names are : {method => 'ELM', motif => motifname,
301 peptide => seqeunce of match,
302 concensus => regexp of match}.
303 returns : see arguments.
308 my ($self, $val) = @_;
310 if (!exists($self->{'_parsed'}) ) {
313 if ($val eq 'Bio::SeqFeatureI') {
315 for my $motif (keys %{$self->{'_parsed'}}) {
316 for (my $i = 0; $i< scalar @
{$self->{'_parsed'}{$motif}{'locus'}};$i++) {
317 my ($st, $end) = split /\-/, $self->{'_parsed'}{$motif}{'locus'}[$i];
318 push @fts, Bio
::SeqFeature
::Generic
->new
322 -primary_tag
=> 'Domain',
327 peptide
=> $self->{'_parsed'}{$motif}{'peptide'}[$i],
328 concensus
=> $self->{'_parsed'}{$motif}{'regexp'}[0],
333 } #end if BioSeqFeature
334 return $self->{'_parsed'};
336 return $self->{'_result'};
339 ## internal sub to parse raw data into internal data structure which is cached.
342 my $result = IO
::String
->new($self->{'_result'});
347 while (my $l = <$result>) {
348 next unless $in_results > 0 ||$l =~ /^\s+Elm\s+Name\s+Instances/;
349 $in_results++; #will be set whnstart of results reached.
350 last if $l =~ /List of excluded/;
351 next unless $in_results >1;
353 my @line_parts = split /\s+/, $l;
355 ## if result has motif name on 1 line
356 if (scalar @line_parts == 1 && $line_parts[0]=~ /^\s*(\w+_\w+)/) {
360 ## else if is line with loci /seq matches
361 elsif (@line_parts > 1) {
362 my $index = 0; ## array index
363 my $read_loci = 0; ## flag to know that loci are being read
364 while ($index <= $#line_parts) {
365 my $word = $line_parts[$index++];
366 if ($read_loci ==0 && $word =~/_/) {
368 } elsif ($read_loci == 0 && $word =~ /^\w+$/ ) {
369 push @
{$results{$name}{'peptide'}}, $word;
370 } elsif ($word =~ /\d+\-\d+/) {
372 push @
{$results{$name}{'locus'}}, $word;
373 } else { ## only get here if there are elements
377 push @
{$results{$name}{'regexp'}}, $line_parts[$#line_parts];
382 $self->{'_parsed'} = \
%results;