seedlot upload with accession synonyms. seedlot upload works to update existing seedlots
[sgn.git] / lib / SGN / Controller / Project / Secretom / SecreTary.pm
blobce8d5043871c56710d5711af43cdc5c2b6e029b9
1 package SGN::Controller::Project::Secretom::SecreTary;
2 use Moose;
3 use namespace::autoclean;
5 use Bio::SecreTary::SecreTarySelect;
6 use Bio::SecreTary::SecreTaryAnalyse;
7 use Bio::SecreTary::TMpred;
8 use Bio::SecreTary::Cleavage;
9 #use Bio::SecreTary::TMpred_Cinline;
10 #use Bio::SecreTary::Cleavage_Cinline;
11 use File::Temp;
12 use File::Basename;
13 use File::Spec;
15 BEGIN { extends 'Catalyst::Controller'; }
16 with 'Catalyst::Component::ApplicationAttribute';
18 __PACKAGE__->config( namespace => 'secretom/secretary', );
20 =head1 NAME
22 SGN::Controller::Project::Secretom::SecreTary - Catalyst Controller
24 =head1 DESCRIPTION
26 Catalyst Controller.
28 =head1 ACTIONS
30 =cut
32 =head2 index
34 Just forwards to the the /secretom/secretary.mas template.
36 =cut
38 sub index : Path : Args(0) {
39 my ( $self, $c ) = @_;
40 $c->stash->{template} = '/secretom/secretary.mas';
43 =head2 instructions
45 Just forwards to the the /secretom/secretary/instructions.mas template.
47 =cut
49 sub instructions : Path('instructions') {
50 my ( $self, $c ) = @_;
51 $c->stash->{template} = '/secretom/secretary/instructions.mas';
54 =head2 run
56 Takes a GET or POST of data to analyze.
58 Params:
60 sequence: text sequence to run
61 sequence_file: uploaded sequence file to run against
62 sort: boolean whether to sort the output by score
63 show_only_sp: boolean whether to show only predicted signal peptides
64 st_output_action: st_out_download
65 Output:
67 HTML SecreTary results.
69 =cut
71 sub run : Path('run') {
72 my ( $self, $c ) = @_;
74 my $input = $c->req->param("sequence") || '';
75 my $sort_it = $c->req->param("sort");
76 my $show_only_sp = $c->req->param("show_only_sp");
77 my $st_output_action = $c->req->param("st_output_action");
79 for my $upload ( $c->req->upload('sequence_file') ) {
80 $input .= $upload->slurp;
83 # need to add the programs dir to PATH so secretary code can find tmpred
84 local $ENV{PATH} =
85 $ENV{PATH} . ':' . $c->path_to( $c->config->{programs_subdir} );
87 my ($STresults, $temp_file_handle) = $self->_run_secretary( $input, $sort_it, $show_only_sp, $c );
89 # stash the results of the run
90 @{ $c->stash }{qw{ STresults }} = ( $STresults );
92 if( $st_output_action && $st_output_action eq 'st_out_download'){ # download the output
93 $c->stash->{download_filename} = $temp_file_handle->filename;
94 $c->forward('/download/download');
96 else { # display in browser
98 # and set the template to use for output
99 $c->stash->{template} = '/secretom/secretary/result.mas';
103 ############# helper subs ##########
105 sub _run_secretary {
106 my ( $self, $input, $sort_it, $show_only_sp, $c ) = @_;
108 my @STAarray;
109 my $trunc_length = 100;
111 my $tmpred_obj = Bio::SecreTary::TMpred->new( {} ); # use defaults here for min_score, etc.
112 # Bio::SecreTary::TMpred_Cinline->new( {} );
114 my $cleavage_predictor_obj = Bio::SecreTary::Cleavage->new();
115 # Bio::SecreTary::Cleavage_Cinline->new();
117 my $id_seqs = process_input($input);
119 #Calculate the necessary quantities for each sequence:
120 foreach (@$id_seqs) {
121 /^\s*(\S+)\s+(\S+)/;
122 my ( $id, $sequence ) = ( $1, $2 );
124 my $STAobj =
125 Bio::SecreTary::SecreTaryAnalyse->new( {sequence_id => $id,
126 sequence => substr( $sequence, 0, $trunc_length ), tmpred_obj => $tmpred_obj, cleavage_predictor => $cleavage_predictor_obj });
127 push @STAarray, $STAobj;
131 my $STSobj = Bio::SecreTary::SecreTarySelect->new();
132 my $STApreds = $STSobj->categorize( \@STAarray );
134 my $result_string = "";
135 my $show_max_length = 62;
137 my @sort_STApreds = ($sort_it)
138 ? sort {
139 $b->[1] =~ /^ \s* (\S+) \s+ (-?[0-9.]+) /xms; #
140 my $score_b = $2;
141 $a->[1] =~ /^ \s* (\S+) \s+ (-?[0-9.]+) /xms;
142 my $score_a = $2;
143 return $score_b <=> $score_a;
144 } @$STApreds
145 : @$STApreds;
149 my $STresults = [];
150 foreach (@sort_STApreds) {
151 my $STA = $_->[0];
152 my $pred_string = $_->[1];
154 $pred_string =~ / ^ \s* (\S+) \s+ (\S+) \s*(.*) /xms;
155 my $prediction = substr("$1 ", 0, 3); # 'YES' or 'NO '
156 my $STscore = padtrunc($2, 8);
157 my $solution = $3;
159 next if ( $prediction eq 'NO ' and $show_only_sp );
161 my ( $score, $start, $end ) = ( ' ', ' ', ' ' );
162 if ( $solution =~ /^ \s* (\S+) \s+ (\S+) \s+ (\S+)/xms ) {
163 ( $score, $start, $end ) = ( $1, $2, $3 );
167 my $id = padtrunc( $STA->sequence_id(), 15 );
168 my $sequence = $STA->sequence();
169 my $cleavage = $STA->cleavage_prediction();
170 my ( $sp_length, $hstart, $cstart, $typical ) = @$cleavage;
171 my $hstartp1 = padtrunc( $hstart + 1, 4 );
172 my $cstartp1 = padtrunc( $cstart + 1, 4 );
173 $sp_length = padtrunc( $sp_length, 4 );
174 my $orig_length = length $sequence;
176 my $pred_array_ref =
177 [ $id, $prediction, $STscore, $sp_length, $sequence, $hstart, $cstart ];
178 push @$STresults, $pred_array_ref;
181 my $temp_file_handle = File::Temp->new(
182 TEMPLATE => 'secretary_output_XXXXXX',
183 DIR => $c->get_conf('basepath') . $c->get_conf('tempfiles_subdir'),
184 UNLINK => 0
187 for(@$STresults){
188 print $temp_file_handle join(' ', @$_[0,1,2,3,4]), "\n";
191 return ( $STresults, $temp_file_handle );
194 sub process_input {
196 # process fasta input to get hash with ids for keys, sequences for values.
197 # expects fasta format, but can handle just sequence with no >id line, for first
198 # sequence only.
200 my $max_sequences_to_do = 10000;
201 my $input = shift;
202 my @id_sequence_array;
203 my $min_sequence_length = 8; # this is the minimal length of sequence string which will be recognized as a sequence if no fasta idline is present.
204 my @fastas = ();
205 my $anon_seq_count = 0;
207 $input =~ s/\r//g; #remove weird line endings.
208 $input =~ s/\A \s*//xms; # remove initial whitespace
209 $input =~ s/ \s* \z//xms; # remove final whitespace
210 if ( $input =~ s/\A ([^>]+) //xms )
211 { # if >= 1 chars before first > capture them.
212 my $fasta = uc $1;
213 # if letters and spaces optionally with * as last non whitespace char,
214 # and starts with at least $min_sequence_length letters, treat as sequence
215 if ( $fasta =~ /\A [A-Z]{$min_sequence_length,} [A-Z\s]* [*]? \s* \z/xms )
216 { # looks like sequence with no identifier
217 $fasta =~ s/\s* \z//xms;
218 $fasta = 'sequence_' . $anon_seq_count . "\n" . $fasta . "\n";
219 push @fastas, $fasta;
220 $anon_seq_count++;
223 # otherwise stuff ahead of first > is considered junk, discarded ($1 not used)
225 $input =~ s/\A > //xms; # eliminate initial >
226 @fastas = (@fastas, split(">", $input));
228 @fastas = @fastas[0..$max_sequences_to_do-1] if(scalar @fastas > $max_sequences_to_do); # keep just the first $max_sequence_to_do
231 foreach my $fasta (@fastas) {
232 $fasta = '>' . $fasta;
233 next if ( $fasta =~ /\A\z/xms );
235 my $id;
236 $fasta =~ s/\A \s+ //xms; # delete initial whitespace
237 if ( $fasta =~ s/\A > (\S+) [^\n]* \n //xms ) { # line starts with >
238 $id = $1;
240 else {
241 $fasta =~ s/\A > \s*//xms; # handles case of > not followed by id.
242 $id = 'sequence_' . $anon_seq_count;
243 $anon_seq_count++;
245 $fasta =~ s/\s//xmsg; # remove whitespace from sequence;
246 $fasta =~ s/\* \z//xms; # remove final * if present.
247 $fasta = uc $fasta;
248 push @id_sequence_array, "$id $fasta";
250 return \@id_sequence_array;
253 sub padtrunc { #return a string of length $length, truncating or
254 # padding on right with spaces as necessary
255 my $str = shift;
256 my $length = shift;
257 while ( length $str < $length ) { $str .= " "; }
258 return substr( $str, 0, $length );