2 package SGN
::Controller
::InSilicoPcr
;
5 use File
::Temp qw
| tempfile
|;
6 use File
::Basename qw
| basename
|;
7 use Storable qw
| nstore retrieve
|;
8 use Bio
::Graphics
::Gel
;
9 use File
::NFSLock qw
| uncache
|;
10 use File
::Copy qw
| copy
|;
15 use Tie
::UrlEncoder
; our %urlencode;
16 use File
::Spec qw
| catfile
|;
17 use File
::Slurp qw
| read_file write_file
|;
21 use CXGN
::Page
::FormattingHelpers qw
/ html_break_string/;
24 BEGIN { extends
'Catalyst::Controller'; }
29 SGN
::Schema
::BlastDb
->dbpath($c->config->{blast_db_path
});
32 sub index :Path
('/tools/in_silico_pcr/') :Args
(0) {
36 my $db_id = $c->req->param('db_id');
38 my $seq = $c->req->param('seq');
39 my $schema = $c->dbic_schema("SGN::Schema");
41 my $group_rs = $schema->resultset("BlastDbGroup")->search( name
=> "Genome Sequences");
42 # my $group_rs = $schema->resultset("BlastDbGroup")->search( undef, { order_by=>'ordinal' });
45 my $dataset_groups = [];
47 my $preselected_database = $c->config->{preselected_blastdb
};
48 my $preselected_category = '';
51 my $rs = $schema->resultset("BlastDb")->search( { blast_db_id
=> $db_id }, { join => 'blast_db_group' });
54 $c->throw( is_error
=> 0, message
=> "The blast database with id $db_id could not be found.");
57 $preselected_database = $rs->first()->blast_db_id(); # first database of the category
58 $preselected_category = $rs->first()->blast_db_group_id();
61 foreach my $g ($group_rs->all()) {
62 my @blast_dbs = $g->blast_dbs();
63 push @
$dataset_groups, [ $g->blast_db_group_id, $g->name() ];
67 foreach my $db (@blast_dbs) {
68 push @dbs_AoA, [ $db->blast_db_id(), $db->title(), $db->type() ];
71 my @arr = sort {$a->[1] cmp $b->[1]} @dbs_AoA;
72 $databases->{ $g->blast_db_group_id } = \
@arr;
75 $c->stash->{preselected_database
} = $preselected_database;
76 $c->stash->{preselected_category
} = $preselected_category;
77 $c->stash->{preload_id
} = $c->req->param('preload_id');
78 $c->stash->{preload_type
} = $c->req->param('preload_type');
80 $c->stash->{databases
} = $databases;
81 $c->stash->{dataset_groups
} = $dataset_groups;
83 $c->stash->{template
} = '/tools/in_silico_pcr/in_silico_pcr.mas';
87 sub _reverse_complement
{
89 my $rev_seq = reverse $seq;
90 $rev_seq =~ tr/ACGTacgt/TGCAtgca/;
96 sub run_pcr_blast
:Path
('/tools/pcr_results') :Args
(0) {
99 my @errors; #to store erros as they happen
101 my $fprimer = $c->req->param("fprimer");
102 my $rprimer = $c->req->param("rprimer");
103 my $productLength = $c->req->param("productLength");
104 my $allowedMismatches = $c->req->param('allowedMismatches');
105 my $frevcom = $c->req->param('frevcom'); #forward primer reverse complement
106 my $rrevcom = $c->req->param('rrevcom'); #reverse primer reverse complement
108 my $params = $c->req->params();
110 #getting the length of the primers
111 my $flength = length($fprimer);
112 my $rlength = length($rprimer);
113 $params->{flength
} = $flength;
114 $params->{rlength
} = $rlength;
116 #reverse complement if checked
118 $fprimer = _reverse_complement
($fprimer);
121 $rprimer = _reverse_complement
($rprimer);
124 # print STDERR "fprimer: $fprimer, rprimer: $rprimer\n";
125 # print STDERR "DB id: $blast_db_id\n";
127 #validating productLength
128 if ($productLength <= 0 or $productLength !~ /^[\d]*$/g) {
129 push (@errors , "Max Product Length should be a positive digit\n");
132 #validating AllowedMismatches
133 if ($allowedMismatches < 0 or $allowedMismatches !~ /^[\d]*$/g) {
134 push (@errors , "Allowed mismatches should be a positive digit\n");
138 if (scalar (@errors) > 0){
139 $c->stash->{errors
} = join("<BR>" , @errors);
140 $c->stash->{template
} = '/tools/in_silico_pcr/insilicopcr_output.mas';
144 # giving the primers a fasta format
145 $fprimer = ">FORWARD-PRIMER\n$fprimer";
146 $rprimer = ">REVERSE-PRIMER\n$rprimer";
148 my $sequence = "$fprimer\n$rprimer\n";
149 $sequence =~ s/^\s+|\s+$|\n\s*\n//g; #< trim out leading and trailing whitespace and blank lines
151 # create blast input and output file
152 my $blast_tmp_output = $c->config->{cluster_shared_tempdir
}."/blast";
153 mkdir $blast_tmp_output if ! -d
$blast_tmp_output;
155 my ($seq_fh, $seqfile) = tempfile
(
157 DIR
=> $blast_tmp_output,
160 print STDERR
"Opening file for sequence ($seqfile)... ";
161 open(my $FH, ">", $seqfile) || die "Can't open file for query ($seqfile)\n";
162 print $FH $sequence if $sequence;
165 my $jobid = basename
($seqfile);
166 print STDERR
"JOB ID CREATED: $jobid\n";
168 print STDERR
"Done.\n";
173 my $m = $params->{matrix
};
174 $m =~ /^(BLOSUM|PAM)\d+$/ or $c->throw( is_error
=> 0, message
=> "invalid matrix '$m'" );
177 my $evalue = $params->{evalue
} ?
$params->{evalue
} : 1;
180 my $filter = $params->{filterq
} ?
'T' : 'F';
183 my $schema = $c->dbic_schema("SGN::Schema");
184 my $bdb = $schema->resultset("BlastDb")->find($params->{database
} ) or die "could not find bdb with file_base '$params->{database}'";
185 my $basename = File
::Spec
->catfile($c->config->{blast_db_path
},$bdb->file_base());
188 my $result_file = $seqfile.".out";
189 # my $result_file = File::Spec->catfile($c->config->{basepath}, $c->tempfiles_subdir('blast'), $jobid.".out");
191 # create blast command
192 my $blast_cmd = "blastall -p blastn -i $seqfile -d $basename -m 8 -M $m -F $filter -e $evalue -o $result_file";
194 print STDERR
"COMMAND: $blast_cmd\n";
195 my $blast_error = system($blast_cmd);
198 print STDERR
"An error occurred! $blast_error\n";
199 $c->stash->{rest
} = { error
=> $blast_error };
202 my $pcr_img_path = $c->tempfiles_subdir('temp_images');
203 my $gel_img_tempdir = $c->path_to( $c->tempfiles_subdir('temp_images') );
205 my ($pcr_seq,$gel_url) = _blast_to_pcr
($result_file,$params,$gel_img_tempdir,$basename);
208 # print STDERR "result_file: ".read_file($result_file)."\n";
209 print STDERR
"gel_img_tempdir: $gel_img_tempdir\n";
210 print STDERR
"gel_url: ".$pcr_img_path."/".$gel_url."\n";
212 $c->stash->{blast_res
} = read_file
($result_file);
213 $c->stash->{pcr_seq
} = $pcr_seq;
214 $c->stash->{gel_url
} = $pcr_img_path."/".$gel_url;
215 $c->stash->{template
} = '/tools/in_silico_pcr/insilicopcr_output.mas';
219 my $blast_file = shift;
221 my $gel_img_tempdir = shift;
222 my $basename = shift;
224 #Parsing the blast m8 result file
226 #Report parsing method was taken from /util/blast_result_handle.pl by Aure
228 my (%query_id, %subject_id, %identity, %align_length, %mismatches, %gap_openings, %q_start, %q_end, %s_start, %s_end,
229 %e_value, %hit_score, %orientation);
231 my (@fprimer_ids ,@rprimer_ids);
233 open(my $res_fh, "<", $blast_file) or die "$! opening $blast_file for reading";
240 my @data=split(/\t/, $_);
242 #separating forward primers from reverse primers using 2 arrays
243 push (@fprimer_ids , $line) if ($data[0] eq 'FORWARD-PRIMER');
244 push (@rprimer_ids , $line) if ($data[0] eq 'REVERSE-PRIMER');
246 # print STDERR "$data[1]\n";
248 $query_id{$line}=$data[0];
249 $subject_id{$line}=$data[1];
250 $identity{$line}=$data[2];
251 $align_length{$line}=$data[3];
252 $mismatches{$line}=$data[4];
253 $gap_openings{$line}=$data[5];
254 $q_start{$line}=$data[6];
255 $q_end{$line}=$data[7];
256 $s_start{$line}=$data[8];
257 $s_end{$line}=$data[9];
258 $e_value{$line}=$data[10];
259 $hit_score{$line}=$data[11];
261 #finding the orientation of the strand "+" is 5'->3' and "-" 3'->5'
262 $orientation{$line}= ($s_end{$line}-$s_start{$line} > 0)?
'+' : '-';
269 my @pcr_results; #is an array of array references [forward Primer # , reverse primer #, + or - for parent orientation]
271 foreach my $forward (@fprimer_ids){
273 # print STDERR "fwd: ".$subject_id{$forward}."\t".$s_start{$forward}."\t".$align_length{$forward}."\t".$mismatches {$forward}."\n";
274 # print STDERR "params: product length: ".$params->{productLength}."\tmm: ".$params->{allowedMismatches}."\tfwd length: ".$params->{flength}."\trev length: ".$params->{rlength}."\n";
276 foreach my $reverse (@rprimer_ids){
277 # print STDERR "rev: ".$subject_id{$reverse}."\t".$s_start{$reverse}."\t".$align_length{$reverse}."\t".$mismatches {$reverse}."\n";
279 if ($subject_id{$forward} eq $subject_id{$reverse} #both on the same subject seq
280 and $s_start{$reverse}- $s_start{$forward}<= $params->{productLength
} #product Length is within user's choice
281 and $mismatches {$forward} <= $params->{allowedMismatches
} #Allowed mismatches by user
282 and $mismatches {$reverse} <= $params->{allowedMismatches
}
283 and $align_length{$forward} == $params->{flength
} #primers match exact length
284 and $align_length {$reverse} == $params->{rlength
}
286 # print STDERR "\ninside the if\n";
287 # print STDERR "orientation fwd: ".$orientation{$forward}." s_end fwd: ".$s_end{$forward}."\n";
288 # print STDERR "orientation rev: ".$orientation{$reverse}." s_end rev: ".$s_end{$reverse}."\n";
290 #if the product is in the + starnd of parent seq add a + sign in the array
292 if ( $orientation{$forward} eq '+' #forward is on the + strand
293 and $orientation{$reverse} eq '-' #reverse is on the - strand b/c its a complement
294 and $s_end{$forward} < $s_end{$reverse} #end of forward located upstream of beginning of reverse
296 # print STDERR "first if";
297 push (@pcr_results , [$forward,$reverse, '+']) ;
300 #if the product is in the - strand of the parent seq add a - sign in the array
301 elsif ( $orientation{$forward} eq '-' #forward is on the - strand (complemet here)
302 and $orientation{$reverse} eq '+' #reverse is on the + strand
303 and $s_end{$forward} > $s_end{$reverse} #end of forward located downstream of beginning of reverse
306 # print STDERR "scond if\n";
307 push (@pcr_results , [$forward,$reverse, '-']);
310 # print STDERR "\nin else\n";
313 }#end of the 4each loop
317 ##############################################################################################################################
319 my $fs = Bio
::BLAST
::Database
->open(full_file_basename
=> "$basename",);
326 my @product_sizes; #for the gel
329 if (scalar(@pcr_results) == 0 ){
330 print STDERR
"No PCR Product Found\n";
332 $pcr_seq = "No PCR Product Found";
333 my $pcr_img_name = "insilicopcr_no_result.png";
335 system("cp static/img/$pcr_img_name $gel_img_tempdir");
336 return ($pcr_seq,$pcr_img_name);
340 foreach my $result (@pcr_results){
342 #finding parent sequence
343 $find_id = $subject_id{$result->[0]};
344 $find_seq = $fs->get_sequence($find_id);
345 # $find_seq = $bdb->get_sequence($find_id);
346 # print STDERR "id: $find_id\n";
347 # print STDERR "seq: $find_seq\n";
349 # print STDERR "s_sstart1: ".$s_start{$result->[1]}." s_start0: ".$s_start{$result->[0]}." res2: ".$result->[2]."\n";
351 #finding the pcr result sequence
352 $find_subseq = $find_seq->subseq($s_start{$result->[0]},$s_start{$result->[1]}) if $result->[2] eq '+';
353 $find_subseq = $find_seq->subseq($s_start{$result->[1]},$s_start{$result->[0]}) if $result->[2] eq '-';
355 ######################################################################################
357 #generating sequence object for the result to be able to find its molecular weight
358 my $seq_obj = Bio
::Seq
->new(-seq
=> $find_subseq ,
362 my $seq_size = $seq_obj->length;
363 push (@product_sizes , $seq_size);
365 #finding the ID of the sequence and adding + sign if it is on the plus strand and - if its on minus strand and some coordinates
366 $find_id = $find_seq->id();
367 $find_id .= $result->[2] eq '+' ?
' strand = plus, ' : ' strand = minus, ';
368 $find_id .= " start = $s_start{$result->[0]}, end = $s_start{$result->[1]}, size = $seq_size bp";
369 #######################################################################################
371 #reverse complementing $find_subseq if the orientation is '-'
372 $pcr_seq = $seq_obj->revcom->seq if $result->[2] eq '-';
374 $pcr_seq = ">$find_id\n$pcr_seq"
377 ##############################################################################################################################
378 #Generating a gel of the results
380 my $gel = Bio
::Graphics
::Gel
->new('pcr' => \
@product_sizes,
383 -band_thickness
=> 2,
386 -gelcolor
=> [240,240,240],
387 -bandcolor
=> [170,170,170]
390 my $gel_img = $gel->img->png;
392 # create the gel img temp name
393 my ($fh ,$temp_file) = tempfile
( DIR
=> $gel_img_tempdir, TEMPLATE
=>"gel_XXXXXX", SUFFIX
=> ".png");
396 my $pcr_img_name = basename
($temp_file);
398 return ($pcr_seq,$pcr_img_name);