lib/SGN/Controller/Project/Secretom/SecreTary.pm

   1 package SGN::Controller::Project::Secretom::SecreTary;
   2 use Moose;
   3 use namespace::autoclean;
   4
   5 use Bio::SecreTary::SecreTarySelect;
   6 use Bio::SecreTary::SecreTaryAnalyse;
   7 use Bio::SecreTary::TMpred;
   8 use Bio::SecreTary::Cleavage;
   9 #use Bio::SecreTary::TMpred_Cinline;
  10 #use Bio::SecreTary::Cleavage_Cinline;
  11 use File::Temp;
  12 use File::Basename;
  13 use File::Spec;
  14
  15 BEGIN { extends 'Catalyst::Controller'; }
  16 with 'Catalyst::Component::ApplicationAttribute';
  17
  18 __PACKAGE__->config( namespace => 'secretom/secretary', );
  19
  20 =head1 NAME
  21
  22 SGN::Controller::Project::Secretom::SecreTary - Catalyst Controller
  23
  24 =head1 DESCRIPTION
  25
  26 Catalyst Controller.
  27
  28 =head1 ACTIONS
  29
  30 =cut
  31
  32 =head2 index
  33
  34 Just forwards to the the /secretom/secretary.mas template.
  35
  36 =cut
  37
  38 sub index : Path : Args(0) {
  39         my ( $self, $c ) = @_;
  40         $c->stash->{template} = '/secretom/secretary.mas';
  41 }
  42
  43 =head2 instructions
  44
  45 Just forwards to the the /secretom/secretary/instructions.mas template.
  46
  47 =cut
  48
  49 sub instructions : Path('instructions') {
  50         my ( $self, $c ) = @_;
  51         $c->stash->{template} = '/secretom/secretary/instructions.mas';
  52 }
  53
  54 =head2 run
  55
  56 Takes a GET or POST of data to analyze.
  57
  58 Params:
  59
  60 sequence: text sequence to run
  61 sequence_file: uploaded sequence file to run against
  62 sort: boolean whether to sort the output by score
  63 show_only_sp: boolean whether to show only predicted signal peptides
  64 st_output_action: st_out_download
  65 Output:
  66
  67 HTML SecreTary results.
  68
  69 =cut
  70
  71 sub run : Path('run') {
  72         my ( $self, $c ) = @_;
  73
  74         my $input        = $c->req->param("sequence") || '';
  75         my $sort_it      = $c->req->param("sort");
  76         my $show_only_sp = $c->req->param("show_only_sp");
  77 my $st_output_action = $c->req->param("st_output_action");
  78
  79         for my $upload ( $c->req->upload('sequence_file') ) {
  80                 $input .= $upload->slurp;
  81         }
  82
  83         # need to add the programs dir to PATH so secretary code can find tmpred
  84         local $ENV{PATH} =
  85                 $ENV{PATH} . ':' . $c->path_to( $c->config->{programs_subdir} );
  86
  87         my ($STresults, $temp_file_handle) =  $self->_run_secretary( $input, $sort_it, $show_only_sp, $c );
  88
  89         # stash the results of the run
  90         @{ $c->stash }{qw{ STresults }} = ( $STresults );
  91
  92         if( $st_output_action && $st_output_action eq 'st_out_download'){ # download the output
  93             $c->stash->{download_filename} = $temp_file_handle->filename;
  94             $c->forward('/download/download');
  95         }
  96         else { # display in browser
  97
  98             # and set the template to use for output
  99             $c->stash->{template} = '/secretom/secretary/result.mas';
 100         }
 101 }
 102
 103 ############# helper subs ##########
 104
 105 sub _run_secretary {
 106         my ( $self, $input, $sort_it, $show_only_sp, $c ) = @_;
 107
 108         my @STAarray;
 109         my $trunc_length = 100;
 110
 111         my $tmpred_obj = Bio::SecreTary::TMpred->new( {} ); # use defaults here for min_score, etc.
 112 #             Bio::SecreTary::TMpred_Cinline->new( {} );
 113
 114                 my $cleavage_predictor_obj = Bio::SecreTary::Cleavage->new();
 115 #             Bio::SecreTary::Cleavage_Cinline->new();
 116
 117         my $id_seqs = process_input($input);
 118
 119 #Calculate the necessary quantities for each sequence:
 120         foreach (@$id_seqs) {
 121                 /^\s*(\S+)\s+(\S+)/;
 122                 my ( $id, $sequence ) = ( $1, $2 );
 123
 124                 my $STAobj =
 125                         Bio::SecreTary::SecreTaryAnalyse->new( {sequence_id => $id,
 126                                         sequence => substr( $sequence, 0, $trunc_length ), tmpred_obj => $tmpred_obj, cleavage_predictor => $cleavage_predictor_obj });
 127                 push @STAarray, $STAobj;
 128         }
 129
 130
 131         my $STSobj   = Bio::SecreTary::SecreTarySelect->new();
 132         my $STApreds = $STSobj->categorize( \@STAarray );
 133
 134         my $result_string   = "";
 135         my $show_max_length = 62;
 136
 137         my @sort_STApreds = ($sort_it)
 138                 ? sort {
 139                         $b->[1] =~ /^ \s* (\S+) \s+ (-?[0-9.]+) /xms; #
 140                                 my $score_b = $2;
 141                         $a->[1] =~ /^ \s* (\S+) \s+ (-?[0-9.]+) /xms;
 142                         my $score_a = $2;
 143                         return $score_b <=> $score_a;
 144                 } @$STApreds
 145         : @$STApreds;
 146
 147
 148
 149         my $STresults = [];
 150         foreach (@sort_STApreds) {
 151                 my $STA        = $_->[0];
 152                 my $pred_string = $_->[1];
 153
 154                 $pred_string =~ / ^ \s* (\S+) \s+ (\S+) \s*(.*) /xms;
 155                 my $prediction = substr("$1   ", 0, 3); # 'YES' or 'NO '
 156                         my $STscore = padtrunc($2, 8);
 157                 my $solution = $3;
 158
 159                 next if ( $prediction eq 'NO ' and $show_only_sp );
 160
 161                 my ( $score, $start, $end ) = ( '        ', '      ', '      ' );
 162                 if ( $solution =~ /^ \s* (\S+) \s+ (\S+) \s+ (\S+)/xms ) {
 163                         ( $score, $start, $end ) = ( $1, $2, $3 );
 164
 165                 }
 166
 167                 my $id       = padtrunc( $STA->sequence_id(), 15 );
 168                 my $sequence = $STA->sequence();
 169                 my $cleavage = $STA->cleavage_prediction();
 170                 my ( $sp_length, $hstart, $cstart, $typical ) = @$cleavage;
 171                 my $hstartp1 = padtrunc( $hstart + 1, 4 );
 172                 my $cstartp1 = padtrunc( $cstart + 1, 4 );
 173                 $sp_length = padtrunc( $sp_length, 4 );
 174                 my $orig_length = length $sequence;
 175
 176                 my $pred_array_ref =
 177                         [ $id, $prediction, $STscore, $sp_length, $sequence, $hstart, $cstart ];
 178                 push @$STresults, $pred_array_ref;
 179         }
 180
 181                 my $temp_file_handle = File::Temp->new(
 182                                 TEMPLATE => 'secretary_output_XXXXXX',
 183                                 DIR      => $c->get_conf('basepath') . $c->get_conf('tempfiles_subdir'),
 184                                 UNLINK   => 0
 185                                 );
 186
 187 for(@$STresults){
 188                         print $temp_file_handle join('  ', @$_[0,1,2,3,4]), "\n";
 189                 }
 190
 191         return ( $STresults,  $temp_file_handle );
 192 }
 193
 194 sub process_input {
 195
 196 # process fasta input to get hash with ids for keys, sequences for values.
 197 # expects fasta format, but can handle just sequence with no >id line, for first
 198 # sequence only.
 199
 200         my $max_sequences_to_do = 10000;
 201         my $input               = shift;
 202         my @id_sequence_array;
 203         my $min_sequence_length = 8; # this is the minimal length of sequence string which will be recognized as a sequence if no fasta idline is present.
 204                 my @fastas  = ();
 205         my $anon_seq_count = 0;
 206
 207         $input =~ s/\r//g;           #remove weird line endings.
 208                 $input =~ s/\A \s*//xms;     # remove initial whitespace
 209                 $input =~ s/ \s* \z//xms;    # remove final whitespace
 210                 if ( $input =~ s/\A ([^>]+) //xms )
 211                 {                            # if >= 1 chars before first > capture them.
 212                         my $fasta = uc $1;
 213 # if letters and spaces optionally with * as last non whitespace char,
 214 # and starts with at least $min_sequence_length letters, treat as sequence
 215                         if ( $fasta =~ /\A [A-Z]{$min_sequence_length,} [A-Z\s]* [*]? \s* \z/xms )
 216                         {                        # looks like sequence with no identifier
 217         $fasta =~ s/\s* \z//xms;
 218                                 $fasta = 'sequence_' . $anon_seq_count . "\n" . $fasta . "\n";
 219                                 push @fastas, $fasta;
 220                                 $anon_seq_count++;
 221                         }
 222
 223 # otherwise stuff ahead of first > is considered junk, discarded ($1 not used)
 224                 }
 225         $input =~ s/\A > //xms; # eliminate initial >
 226                 @fastas = (@fastas, split(">", $input));
 227
 228         @fastas = @fastas[0..$max_sequences_to_do-1] if(scalar @fastas > $max_sequences_to_do); # keep just the first $max_sequence_to_do
 229
 230
 231                 foreach my $fasta (@fastas) {
 232                         $fasta = '>' . $fasta;
 233                         next if ( $fasta =~ /\A\z/xms );
 234
 235                         my $id;
 236                         $fasta =~ s/\A \s+ //xms;    # delete initial whitespace
 237                                 if ( $fasta =~ s/\A > (\S+) [^\n]* \n //xms ) {    # line starts with >
 238                                         $id = $1;
 239                 }
 240                                 else {
 241                                         $fasta =~ s/\A > \s*//xms;   # handles case of > not followed by id.
 242                                                 $id = 'sequence_' . $anon_seq_count;
 243                                         $anon_seq_count++;
 244                                 }
 245                         $fasta =~ s/\s//xmsg;            # remove whitespace from sequence;
 246                         $fasta =~ s/\* \z//xms;          # remove final * if present.
 247                                 $fasta = uc $fasta;
 248                         push @id_sequence_array, "$id $fasta";
 249                 }
 250         return \@id_sequence_array;
 251 }
 252
 253 sub padtrunc {    #return a string of length $length, truncating or
 254 # padding on right with spaces as necessary
 255         my $str    = shift;
 256         my $length = shift;
 257         while ( length $str < $length ) { $str .= "                    "; }
 258         return substr( $str, 0, $length );
 259 }
 260
 261 1;
 262