1 package SGN
::Controller
::Project
::Secretom
::SecreTary
;
3 use namespace
::autoclean
;
5 use Bio
::SecreTary
::SecreTarySelect
;
6 use Bio
::SecreTary
::SecreTaryAnalyse
;
7 use Bio
::SecreTary
::TMpred
;
8 use Bio
::SecreTary
::Cleavage
;
9 #use Bio::SecreTary::TMpred_Cinline;
10 #use Bio::SecreTary::Cleavage_Cinline;
15 BEGIN { extends
'Catalyst::Controller'; }
16 with
'Catalyst::Component::ApplicationAttribute';
18 __PACKAGE__
->config( namespace
=> 'secretom/secretary', );
22 SGN::Controller::Project::Secretom::SecreTary - Catalyst Controller
34 Just forwards to the the /secretom/secretary.mas template.
38 sub index : Path
: Args
(0) {
39 my ( $self, $c ) = @_;
40 $c->stash->{template
} = '/secretom/secretary.mas';
45 Just forwards to the the /secretom/secretary/instructions.mas template.
49 sub instructions
: Path
('instructions') {
50 my ( $self, $c ) = @_;
51 $c->stash->{template
} = '/secretom/secretary/instructions.mas';
56 Takes a GET or POST of data to analyze.
60 sequence: text sequence to run
61 sequence_file: uploaded sequence file to run against
62 sort: boolean whether to sort the output by score
63 show_only_sp: boolean whether to show only predicted signal peptides
64 st_output_action: st_out_download
67 HTML SecreTary results.
71 sub run
: Path
('run') {
72 my ( $self, $c ) = @_;
74 my $input = $c->req->param("sequence") || '';
75 my $sort_it = $c->req->param("sort");
76 my $show_only_sp = $c->req->param("show_only_sp");
77 my $st_output_action = $c->req->param("st_output_action");
79 for my $upload ( $c->req->upload('sequence_file') ) {
80 $input .= $upload->slurp;
83 # need to add the programs dir to PATH so secretary code can find tmpred
85 $ENV{PATH
} . ':' . $c->path_to( $c->config->{programs_subdir
} );
87 my ($STresults, $temp_file_handle) = $self->_run_secretary( $input, $sort_it, $show_only_sp, $c );
89 # stash the results of the run
90 @
{ $c->stash }{qw{ STresults
}} = ( $STresults );
92 if( $st_output_action && $st_output_action eq 'st_out_download'){ # download the output
93 $c->stash->{download_filename
} = $temp_file_handle->filename;
94 $c->forward('/download/download');
96 else { # display in browser
98 # and set the template to use for output
99 $c->stash->{template
} = '/secretom/secretary/result.mas';
103 ############# helper subs ##########
106 my ( $self, $input, $sort_it, $show_only_sp, $c ) = @_;
109 my $trunc_length = 100;
111 my $tmpred_obj = Bio
::SecreTary
::TMpred
->new( {} ); # use defaults here for min_score, etc.
112 # Bio::SecreTary::TMpred_Cinline->new( {} );
114 my $cleavage_predictor_obj = Bio
::SecreTary
::Cleavage
->new();
115 # Bio::SecreTary::Cleavage_Cinline->new();
117 my $id_seqs = process_input
($input);
119 #Calculate the necessary quantities for each sequence:
120 foreach (@
$id_seqs) {
122 my ( $id, $sequence ) = ( $1, $2 );
125 Bio
::SecreTary
::SecreTaryAnalyse
->new( {sequence_id
=> $id,
126 sequence
=> substr( $sequence, 0, $trunc_length ), tmpred_obj
=> $tmpred_obj, cleavage_predictor
=> $cleavage_predictor_obj });
127 push @STAarray, $STAobj;
131 my $STSobj = Bio
::SecreTary
::SecreTarySelect
->new();
132 my $STApreds = $STSobj->categorize( \
@STAarray );
134 my $result_string = "";
135 my $show_max_length = 62;
137 my @sort_STApreds = ($sort_it)
139 $b->[1] =~ /^ \s* (\S+) \s+ (-?[0-9.]+) /xms; #
141 $a->[1] =~ /^ \s* (\S+) \s+ (-?[0-9.]+) /xms;
143 return $score_b <=> $score_a;
150 foreach (@sort_STApreds) {
152 my $pred_string = $_->[1];
154 $pred_string =~ / ^ \s* (\S+) \s+ (\S+) \s*(.*) /xms;
155 my $prediction = substr("$1 ", 0, 3); # 'YES' or 'NO '
156 my $STscore = padtrunc
($2, 8);
159 next if ( $prediction eq 'NO ' and $show_only_sp );
161 my ( $score, $start, $end ) = ( ' ', ' ', ' ' );
162 if ( $solution =~ /^ \s* (\S+) \s+ (\S+) \s+ (\S+)/xms ) {
163 ( $score, $start, $end ) = ( $1, $2, $3 );
167 my $id = padtrunc
( $STA->sequence_id(), 15 );
168 my $sequence = $STA->sequence();
169 my $cleavage = $STA->cleavage_prediction();
170 my ( $sp_length, $hstart, $cstart, $typical ) = @
$cleavage;
171 my $hstartp1 = padtrunc
( $hstart + 1, 4 );
172 my $cstartp1 = padtrunc
( $cstart + 1, 4 );
173 $sp_length = padtrunc
( $sp_length, 4 );
174 my $orig_length = length $sequence;
177 [ $id, $prediction, $STscore, $sp_length, $sequence, $hstart, $cstart ];
178 push @
$STresults, $pred_array_ref;
181 my $temp_file_handle = File
::Temp
->new(
182 TEMPLATE
=> 'secretary_output_XXXXXX',
183 DIR
=> $c->get_conf('basepath') . $c->get_conf('tempfiles_subdir'),
188 print $temp_file_handle join(' ', @
$_[0,1,2,3,4]), "\n";
191 return ( $STresults, $temp_file_handle );
196 # process fasta input to get hash with ids for keys, sequences for values.
197 # expects fasta format, but can handle just sequence with no >id line, for first
200 my $max_sequences_to_do = 10000;
202 my @id_sequence_array;
203 my $min_sequence_length = 8; # this is the minimal length of sequence string which will be recognized as a sequence if no fasta idline is present.
205 my $anon_seq_count = 0;
207 $input =~ s/\r//g; #remove weird line endings.
208 $input =~ s/\A \s*//xms; # remove initial whitespace
209 $input =~ s/ \s* \z//xms; # remove final whitespace
210 if ( $input =~ s/\A ([^>]+) //xms )
211 { # if >= 1 chars before first > capture them.
213 # if letters and spaces optionally with * as last non whitespace char,
214 # and starts with at least $min_sequence_length letters, treat as sequence
215 if ( $fasta =~ /\A [A-Z]{$min_sequence_length,} [A-Z\s]* [*]? \s* \z/xms )
216 { # looks like sequence with no identifier
217 $fasta =~ s/\s* \z//xms;
218 $fasta = 'sequence_' . $anon_seq_count . "\n" . $fasta . "\n";
219 push @fastas, $fasta;
223 # otherwise stuff ahead of first > is considered junk, discarded ($1 not used)
225 $input =~ s/\A > //xms; # eliminate initial >
226 @fastas = (@fastas, split(">", $input));
228 @fastas = @fastas[0..$max_sequences_to_do-1] if(scalar @fastas > $max_sequences_to_do); # keep just the first $max_sequence_to_do
231 foreach my $fasta (@fastas) {
232 $fasta = '>' . $fasta;
233 next if ( $fasta =~ /\A\z/xms );
236 $fasta =~ s/\A \s+ //xms; # delete initial whitespace
237 if ( $fasta =~ s/\A > (\S+) [^\n]* \n //xms ) { # line starts with >
241 $fasta =~ s/\A > \s*//xms; # handles case of > not followed by id.
242 $id = 'sequence_' . $anon_seq_count;
245 $fasta =~ s/\s//xmsg; # remove whitespace from sequence;
246 $fasta =~ s/\* \z//xms; # remove final * if present.
248 push @id_sequence_array, "$id $fasta";
250 return \
@id_sequence_array;
253 sub padtrunc
{ #return a string of length $length, truncating or
254 # padding on right with spaces as necessary
257 while ( length $str < $length ) { $str .= " "; }
258 return substr( $str, 0, $length );