fixed error message and validation
[sgn.git] / lib / SGN / Controller / InSilicoPcr.pm
blob54355ffc0b9c9e22aa4fe590dd95ba54eab9d1da
2 package SGN::Controller::InSilicoPcr;
4 use Moose;
5 use File::Temp qw | tempfile |;
6 use File::Basename qw | basename |;
7 use Storable qw | nstore retrieve |;
8 use Bio::Graphics::Gel;
9 use File::NFSLock qw | uncache |;
10 use File::Copy qw | copy |;
11 use Data::Dumper;
13 use Config::Any;
14 use Try::Tiny;
15 use Tie::UrlEncoder; our %urlencode;
16 use File::Spec qw | catfile |;
17 use File::Slurp qw | read_file write_file |;
18 use CXGN::Tools::Run;
19 use Bio::Seq;
20 use CXGN::Page::FormattingHelpers qw/ html_break_string/;
23 BEGIN { extends 'Catalyst::Controller'; }
25 sub AUTO {
26 my $self = shift;
27 my $c = shift;
28 SGN::Schema::BlastDb->dbpath($c->config->{blast_db_path});
31 sub index :Path('/tools/in_silico_pcr/') :Args(0) {
32 my $self = shift;
33 my $c = shift;
35 my $db_id = $c->req->param('db_id');
37 my $seq = $c->req->param('seq');
38 my $schema = $c->dbic_schema("SGN::Schema");
40 my $group_rs = $schema->resultset("BlastDbGroup")->search( name => "In Silico PCR datasets");
41 # my $group_rs = $schema->resultset("BlastDbGroup")->search( undef, { order_by=>'ordinal' });
43 my $databases = {};
44 my $dataset_groups = [];
46 my $preselected_database = $c->config->{preselected_blastdb};
47 my $preselected_category = '';
49 if ($db_id) {
50 my $rs = $schema->resultset("BlastDb")->search( { blast_db_id => $db_id }, { join => 'blast_db_group' });
52 if ($rs == 0) {
53 $c->throw( is_error => 0, message => "The blast database with id $db_id could not be found.");
56 $preselected_database = $rs->first()->blast_db_id(); # first database of the category
57 $preselected_category = $rs->first()->blast_db_group_id();
60 foreach my $g ($group_rs->all()) {
61 my @blast_dbs = $g->blast_dbs();
62 push @$dataset_groups, [ $g->blast_db_group_id, $g->name() ];
64 my @dbs_AoA;
66 foreach my $db (@blast_dbs) {
67 push @dbs_AoA, [ $db->blast_db_id(), $db->title(), $db->type() ];
70 my @arr = sort {$a->[1] cmp $b->[1]} @dbs_AoA;
71 $databases->{ $g->blast_db_group_id } = \@arr;
74 $c->stash->{preselected_database} = $preselected_database;
75 $c->stash->{preselected_category} = $preselected_category;
76 $c->stash->{preload_id} = $c->req->param('preload_id');
77 $c->stash->{preload_type} = $c->req->param('preload_type');
79 $c->stash->{databases} = $databases;
80 $c->stash->{dataset_groups} = $dataset_groups;
82 $c->stash->{template} = '/tools/in_silico_pcr/in_silico_pcr.mas';
86 sub _reverse_complement{
87 my $seq = shift;
88 my $rev_seq = reverse $seq;
89 $rev_seq =~ tr/ACGTacgt/TGCAtgca/;
91 return $rev_seq;
95 sub run_pcr_blast :Path('/tools/pcr_results') :Args(0) {
96 my ($self, $c) = @_;
98 my @errors; #to store erros as they happen
100 my $fprimer = $c->req->param("fprimer");
101 my $rprimer = $c->req->param("rprimer");
102 my $productLength = $c->req->param("productLength");
103 my $allowedMismatches = $c->req->param('allowedMismatches');
104 my $frevcom = $c->req->param('frevcom'); #forward primer reverse complement
105 my $rrevcom = $c->req->param('rrevcom'); #reverse primer reverse complement
107 my $params = $c->req->params();
109 #getting the length of the primers
110 my $flength = length($fprimer);
111 my $rlength = length($rprimer);
112 $params->{flength} = $flength;
113 $params->{rlength} = $rlength;
115 #reverse complement if checked
116 if ($frevcom){
117 $fprimer = _reverse_complement($fprimer);
119 if ($rrevcom){
120 $rprimer = _reverse_complement($rprimer);
123 # print STDERR "fprimer: $fprimer, rprimer: $rprimer\n";
124 # print STDERR "DB id: $blast_db_id\n";
126 #validating productLength
127 if ($productLength <= 0 or $productLength !~ /^[\d]*$/g) {
128 push (@errors , "Max Product Length should be a positive digit\n");
131 #validating AllowedMismatches
132 if ($allowedMismatches < 0 or $allowedMismatches !~ /^[\d]*$/g) {
133 push (@errors , "Allowed mismatches should be a positive digit\n");
136 # return errors
137 if (scalar (@errors) > 0){
138 $c->stash->{errors} = join("<BR>" , @errors);
139 $c->stash->{template} = '/tools/in_silico_pcr/insilicopcr_output.mas';
140 return;
143 # giving the primers a fasta format
144 $fprimer = ">FORWARD-PRIMER\n$fprimer";
145 $rprimer = ">REVERSE-PRIMER\n$rprimer";
147 my $sequence = "$fprimer\n$rprimer\n";
148 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
150 # create blast input and output file
151 my $blast_tmp_output = $c->config->{cluster_shared_tempdir}."/blast";
152 mkdir $blast_tmp_output if ! -d $blast_tmp_output;
154 my ($seq_fh, $seqfile) = tempfile(
155 "blast_XXXXXX",
156 DIR=> $blast_tmp_output,
159 #print STDERR "Opening file for sequence ($seqfile)... ";
160 open(my $FH, ">", $seqfile) || die "Can't open file for query ($seqfile)\n";
161 print $FH $sequence if $sequence;
162 close($FH);
164 my $jobid = basename($seqfile);
165 print STDERR "JOB ID CREATED: $jobid\n";
167 print STDERR "Done.\n";
171 # get matrix (note: matrix not needed for blasn, comment out (Lukas, 5/2020)
172 #my $m = $params->{matrix};
173 ##$m =~ /^(BLOSUM|PAM)\d+$/ or $c->throw( is_error => 0, message => "invalid matrix '$m'" );
175 # get evalue
176 my $evalue = $params->{evalue} ? $params->{evalue} : 1;
178 # get word_size
179 my $word_size = $params->{word_size} ? $params->{word_size} : 7;
181 # get filter
182 my $filter = $params->{filterq} ? 'yes' : 'no';
184 # get blast database
185 my $schema = $c->dbic_schema("SGN::Schema");
186 my $bdb = $schema->resultset("BlastDb")->find($params->{database} ) or die "could not find bdb with file_base '$params->{database}'";
187 my $basename = File::Spec->catfile($c->config->{blast_db_path},$bdb->file_base());
190 my $result_file = $seqfile.".out";
191 # my $result_file = File::Spec->catfile($c->config->{basepath}, $c->tempfiles_subdir('blast'), $jobid.".out");
193 # create blast command
194 my $blast_cmd = "blastn -query $seqfile -db $basename -word_size $word_size -outfmt 6 -dust $filter -evalue $evalue -out $result_file";
196 print STDERR "COMMAND: $blast_cmd\n";
197 my $blast_error = system($blast_cmd);
199 if ($blast_error) {
200 print STDERR "An error occurred! $blast_error\n";
201 $c->stash->{rest} = { error => $blast_error };
204 my $pcr_img_path = $c->tempfiles_subdir('temp_images');
205 my $gel_img_tempdir = $c->path_to( $c->tempfiles_subdir('temp_images') );
207 my ($pcr_seq,$gel_url) = _blast_to_pcr($result_file,$params,$gel_img_tempdir,$basename);
210 # print STDERR "result_file: ".read_file($result_file)."\n";
211 print STDERR "gel_img_tempdir: $gel_img_tempdir\n";
212 print STDERR "gel_url: ".$pcr_img_path."/".$gel_url."\n";
214 $c->stash->{blast_res} = read_file($result_file);
215 $c->stash->{pcr_seq} = $pcr_seq;
216 $c->stash->{gel_url} = $pcr_img_path."/".$gel_url;
217 $c->stash->{template} = '/tools/in_silico_pcr/insilicopcr_output.mas';
220 sub _blast_to_pcr {
221 my $blast_file = shift;
222 my $params = shift;
223 my $gel_img_tempdir = shift;
224 my $basename = shift;
226 #Parsing the blast m8 result file
228 #Report parsing method was taken from /util/blast_result_handle.pl by Aure
230 my (%query_id, %subject_id, %identity, %align_length, %mismatches, %gap_openings, %q_start, %q_end, %s_start, %s_end,
231 %e_value, %hit_score, %orientation);
233 my (@fprimer_ids ,@rprimer_ids);
235 open(my $res_fh, "<", $blast_file) or die "$! opening $blast_file for reading";
237 my $line=0;
239 while (<$res_fh>) {
240 $line++;
242 my @data=split(/\t/, $_);
244 #separating forward primers from reverse primers using 2 arrays
245 push (@fprimer_ids , $line) if ($data[0] eq 'FORWARD-PRIMER');
246 push (@rprimer_ids , $line) if ($data[0] eq 'REVERSE-PRIMER');
248 # print STDERR "$data[1]\n";
250 $query_id{$line}=$data[0];
251 $subject_id{$line}=$data[1];
252 $identity{$line}=$data[2];
253 $align_length{$line}=$data[3];
254 $mismatches{$line}=$data[4];
255 $gap_openings{$line}=$data[5];
256 $q_start{$line}=$data[6];
257 $q_end{$line}=$data[7];
258 $s_start{$line}=$data[8];
259 $s_end{$line}=$data[9];
260 $e_value{$line}=$data[10];
261 $hit_score{$line}=$data[11];
263 #finding the orientation of the strand "+" is 5'->3' and "-" 3'->5'
264 $orientation{$line}= ($s_end{$line}-$s_start{$line} > 0)? '+' : '-';
267 close $res_fh;
270 #Finding Results
271 my @pcr_results; #is an array of array references [forward Primer # , reverse primer #, + or - for parent orientation]
273 foreach my $forward (@fprimer_ids){
275 #print STDERR "fwd: ".$subject_id{$forward}."\t".$s_start{$forward}."\t".$align_length{$forward}."\t".$mismatches {$forward}."\n";
276 # print STDERR "params: product length: ".$params->{productLength}."\tmm: ".$params->{allowedMismatches}."\tfwd length: ".$params->{flength}."\trev length: ".$params->{rlength}."\n";
278 foreach my $reverse (@rprimer_ids){
279 #print STDERR "rev: ".$subject_id{$reverse}."\t".$s_start{$reverse}."\t".$align_length{$reverse}."\t".$mismatches {$reverse}."\n";
281 if ($subject_id{$forward} eq $subject_id{$reverse} #both on the same subject seq
282 and $s_start{$reverse}- $s_start{$forward}<= $params->{productLength} #product Length is within user's choice
283 and $mismatches {$forward} <= $params->{allowedMismatches} #Allowed mismatches by user
284 and $mismatches {$reverse} <= $params->{allowedMismatches}
285 and $align_length{$forward} == $params->{flength} #primers match exact length
286 and $align_length {$reverse} == $params->{rlength}
288 #print STDERR "\ninside the if\n";
289 #print STDERR "orientation fwd: ".$orientation{$forward}." s_end fwd: ".$s_end{$forward}."\n";
290 #print STDERR "orientation rev: ".$orientation{$reverse}." s_end rev: ".$s_end{$reverse}."\n";
292 #if the product is in the + strand of parent seq add a + sign in the array
294 if ( $orientation{$forward} eq '+' #forward is on the + strand
295 and $orientation{$reverse} eq '-' #reverse is on the - strand b/c its a complement
296 and $s_end{$forward} < $s_end{$reverse} #end of forward located upstream of beginning of reverse
298 # print STDERR "first if";
299 push (@pcr_results , [$forward,$reverse, '+']) ;
302 #if the product is in the - strand of the parent seq add a - sign in the array
303 elsif ( $orientation{$forward} eq '-' #forward is on the - strand (complemet here)
304 and $orientation{$reverse} eq '+' #reverse is on the + strand
305 and $s_end{$forward} > $s_end{$reverse} #end of forward located downstream of beginning of reverse
308 # print STDERR "scond if\n";
309 push (@pcr_results , [$forward,$reverse, '-']);
311 else {
312 # print STDERR "\nin else\n";
315 }#end of the 4each loop
319 ##############################################################################################################################
321 my $fs = Bio::BLAST2::Database->open(full_file_basename => "$basename",);
323 my $find_seq;
324 my $find_subseq;
325 my $find_id;
326 my $pcr_seq;
328 my @product_sizes; #for the gel
331 if (scalar(@pcr_results) == 0 ){
332 print STDERR "No PCR Product Found\n";
334 $pcr_seq = "No PCR Product Found";
335 my $pcr_img_name = "insilicopcr_no_result.png";
337 system("cp static/img/$pcr_img_name $gel_img_tempdir");
338 return ($pcr_seq,$pcr_img_name);
340 else{
342 foreach my $result (@pcr_results){
344 #finding parent sequence
345 $find_id = $subject_id{$result->[0]};
346 $find_seq = $fs->get_sequence($find_id);
347 # $find_seq = $bdb->get_sequence($find_id);
348 # print STDERR "id: $find_id\n";
349 # print STDERR "seq: $find_seq\n";
351 # print STDERR "s_sstart1: ".$s_start{$result->[1]}." s_start0: ".$s_start{$result->[0]}." res2: ".$result->[2]."\n";
353 #finding the pcr result sequence
354 $find_subseq = $find_seq->subseq($s_start{$result->[0]},$s_start{$result->[1]}) if $result->[2] eq '+';
355 $find_subseq = $find_seq->subseq($s_start{$result->[1]},$s_start{$result->[0]}) if $result->[2] eq '-';
357 ######################################################################################
359 #generating sequence object for the result to be able to find its molecular weight
360 my $seq_obj = Bio::Seq->new(-seq => $find_subseq ,
361 -alphabet => 'dna'
364 my $seq_size = $seq_obj->length;
365 push (@product_sizes , $seq_size);
367 #finding the ID of the sequence and adding + sign if it is on the plus strand and - if its on minus strand and some coordinates
368 $find_id = $find_seq->id();
369 $find_id .= $result->[2] eq '+' ? ' strand = plus, ' : ' strand = minus, ';
370 $find_id .= " start = $s_start{$result->[0]}, end = $s_start{$result->[1]}, size = $seq_size bp";
371 #######################################################################################
373 #reverse complementing $find_subseq if the orientation is '-'
374 $pcr_seq = $seq_obj->revcom->seq if $result->[2] eq '-';
376 $pcr_seq = ">$find_id\n$pcr_seq"
379 ##############################################################################################################################
380 #Generating a gel of the results
382 my $gel = Bio::Graphics::Gel->new('pcr' => \@product_sizes,
383 -lane_length => 400,
384 -lane_width => 100,
385 -band_thickness => 2,
386 # -min_frag => 1000,
387 -font_size => 16,
388 -gelcolor => [240,240,240],
389 -bandcolor => [170,170,170]
392 my $gel_img = $gel->img->png;
394 # create the gel img temp name
395 my ($fh ,$temp_file) = tempfile( DIR => $gel_img_tempdir, TEMPLATE=>"gel_XXXXXX", SUFFIX => ".png");
396 print $fh $gel_img;
398 my $pcr_img_name = basename ($temp_file);
400 return ($pcr_seq,$pcr_img_name);