Merge pull request #5191 from solgenomics/topic/quality_control
[sgn.git] / lib / SGN / Controller / motifs_finder.pm
blob91715048f5c4b2c303cdaffe9f37aa0c9777ae18
1 package SGN::Controller::motifs_finder;
3 use Moose;
4 use namespace::autoclean;
5 use File::Temp qw | tempfile |;
6 use File::Basename;
7 use Data::Dumper;
10 BEGIN { extends 'Catalyst::Controller'; }
12 =head1 NAME
14 motifs_finder::Controller::motifs_finder - Catalyst Controller
16 =head1 DESCRIPTION
18 Catalyst Controller.
20 =head1 METHODS
22 =cut
24 =head2 index
26 =cut
28 sub index :Path('/tools/motifs_finder/') :Args(0) {
29 my ( $self, $c ) = @_;
31 $c->stash->{template} = '/tools/motifs_finder/index.mas';
34 sub name_var :Path('/test/') :Args(0) {
35 my ($self, $c) = @_;
37 # get variables from catalyst object
38 my $params = $c->req->body_params();
39 my $sequence = $c->req->param("sequence");
40 my $widths_of_motifs = $c->req->param("widths_of_motifs");
41 my $numbers_of_sites = $c->req->param("numbers_of_sites");
42 my $seq_file = $c->req->param("sequence_file");
43 my $no_of_seeds = $c->req->param("number_of_seeds");
44 my $fragmentation = $c->req->param("fragmentation");
45 my $rev_complement = $c->req->param("rev_complement");
46 my $weblogo_output = $c->config->{tmp_weblogo_path};
47 my $basePath = $c->config->{tempfiles_base_motifs_finder};
48 my $cluster_shared_bindir = $c->config->{cluster_motifs_finder};
50 # validate the Nucleic Acid in sequence. To ensure the return of line is not seen as a space.
51 my @seq;
52 my @errors;
54 # to generate temporary file name for the analysis
55 my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/static/documents/tempfiles/motifs_finder/");
56 #my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/motifs_finder/");
58 print STDERR Dumper($filename);
59 if ($sequence !~ /^>/ && $seq_file eq '') {
60 push ( @errors , "Please, paste sequences or attach sequence file.<br/>Ensure your sequence conform with 'usage help' description.\n");
63 # To send error file to index.mas
64 if (scalar (@errors) > 0){
65 my $user_errors = join("<br />", @errors);
66 $c->stash->{error_msg} = join("<br/>", @errors);
67 $c->stash->{template} = '/tools/motifs_finder/index.mas';
68 return;
70 else {
71 # If no error prepare the sequence file for sampling
72 if ($sequence =~ /^>/) {
73 $sequence =~ s/[ \,\-\.\#\(\)\%\'\"\[\]\{\}\:\;\=\+\\\/]/_/gi;
74 @seq = split(/\s/,$sequence);
76 #to open and write and print the input file
77 open (my $out_fh, ">", $filename."_input.txt") || die ("\nERROR: the file ".$filename."_input.txt could not be found\n");
78 print $out_fh join("\n",@seq);
80 # to run Gibbs motifs sampler
81 my $err = system("$cluster_shared_bindir/Gibbs.linux ".$filename."_input.txt $widths_of_motifs $numbers_of_sites -W 0.8 -w 0.1 -p 50 -j 5 -i 500 $fragmentation $rev_complement -Z -S $no_of_seeds -C 0.5 -y -nopt -o ".$filename."_output -n");
82 print STDOUT "print $err\n";
84 elsif ($seq_file) {
85 my $err = system("$cluster_shared_bindir/Gibbs.linux $seq_file $widths_of_motifs $numbers_of_sites -W 0.8 -w 0.1 -p 50 -j 5 -i 500 $fragmentation $rev_complement -Z -S $no_of_seeds -C 0.5 -y -nopt -o ".$filename."_output -n");
86 print STDOUT "print $err\n";
89 open (my $output_fh, "<", $filename."_output") || die ("\nERROR: the file ".$filename."_output could not be found\n"); # open sampler output file and write into a FH
91 # Creating motifs fasta file for weblogo use and other files that are made into tables in the output.mas
93 my $switch = 0;
94 my $motif = 0;
95 my $switch_logo = 0;
96 my $logo = 0;
97 my @string_result;
98 my $logo_file;
99 my $wl_out_fh;
100 my @motif_element;
101 my @logo_image;
102 my @logofile_id;
103 my $lf_id;
104 my @motif_width;
105 my @aa;
106 my @motif_table_file;
107 my $motif_tab_fh;
108 my $motif_tab;
109 my $freq_tab_switch = 0;
110 my $freq_tab_fh;
111 my $freq_tab = 0;
112 my $freq_tab_file;
113 my @freq_tab;
114 my $prob_tab_switch = 0;
115 my $prob_tab_fh;
116 my $prob_tab = 0;
117 my $prob_tab_file;
118 my @prob_tab;
119 my $BGPM_tab_switch = 0;
120 my $BGPM_tab_fh;
121 my $BGPM_tab = 0;
122 my $BGPM_tab_file;
123 my @BGPM_tab;
124 my $sum_indv_tab_switch = 0;
125 my $sum_indv_tab_fh;
126 my $sum_indv_tab = 0;
127 my $sum_indv_tab_file;
128 my @sum_indv_tab;
129 my $sum = 0;
130 my $switch_sum = 0;
131 my @sum;
133 while (my $line = <$output_fh>) {
134 chomp $line;
135 push @string_result, $line;
136 if ($line =~ m/^Log\sBackground\sportion\sof\sMap\s\=\s/){
137 $sum = 1;
139 if ($sum == 1) {
140 $switch_sum++;
141 push @sum, $line;
142 if ($line =~ m/^Elapsed\stime:\s+/) {
143 $sum = 0;
146 if ($motif == 1){
147 $switch++;
148 if ($logo == 1 && $line !~ m/^\s+\*+/ ) {
149 $switch_logo++;
150 my @a = split(/\s+/,$line);
151 print $wl_out_fh ">seq_$switch_logo\n$a[5]\n";
152 @aa = split(/\s+/,$line);
153 print $motif_tab_fh "$line\n";
155 if ($line =~ m/^Num\sMotifs/ ) {
156 $logo = 1;
157 open ($wl_out_fh, ">", $logo_file) || die ("\nERROR: the file $logo_file could not be found\n");
158 push @motif_element, $logo_file;
159 push @logofile_id, $lf_id;
160 @motif_width = split (/,/,$widths_of_motifs);
161 print "logo file ID: @logofile_id\n";
162 print "motif lenght: @motif_width\n";
163 open ($motif_tab_fh, ">", $motif_tab) || die ("\nERROR: the file $motif_tab could not be found\n");
164 push @motif_table_file, $motif_tab;
166 elsif ($line =~ m/^\s+\*+/ ) {
167 $logo = 0;
168 my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/static/documents/tempfiles/motifs_finder/");
169 #my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/motifs_finder/");
171 if ($freq_tab == 1) {
172 $freq_tab_switch++;
173 print $freq_tab_fh "$line\n";
175 if ($line =~ m/^Motif\smodel/ ) {
176 $freq_tab = 1;
177 open ($freq_tab_fh, ">", $freq_tab_file) || die ("\nERROR: the file $freq_tab could not be found\n");
178 push @freq_tab, $freq_tab_file;
179 print "FREQ TABLE: $freq_tab[0]\n";
181 elsif ($line =~ m/^site\s+/ ) {
182 $freq_tab = 0;
183 my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/static/documents/tempfiles/motifs_finder/");
184 #my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/motifs_finder/");
186 if ($prob_tab == 1 && $line !~ m/^Background\sprobability\smodel/) {
187 $prob_tab_switch++;
188 print $prob_tab_fh "$line\n";
190 if ($line =~ m/^Motif\sprobability\smodel/ ) {
191 $prob_tab = 1;
192 open ($prob_tab_fh, ">", $prob_tab_file) || die ("\nERROR: the file $prob_tab_file could not be found\n");
193 push @prob_tab, $prob_tab_file;
195 elsif ($line =~ m/^Background\sprobability\smodel/ ) {
196 $prob_tab = 0;
197 #my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/motifs_finder/");
198 my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/static/documents/tempfiles/motifs_finder/");
200 if ($BGPM_tab == 1) {
201 $BGPM_tab_switch++;
202 print $BGPM_tab_fh "$line\n";
204 if ($line =~ m/^Background\sprobability\smodel/ ) {
205 $BGPM_tab = 1;
206 open ($BGPM_tab_fh, ">", $BGPM_tab_file) || die ("\nERROR: the file $BGPM_tab_file could not be found\n");
207 push @BGPM_tab, $BGPM_tab_file;
209 elsif ($line =~ m/^\s+\d\.\d+\s\d\.\d+\s/ ) {
210 $BGPM_tab = 0;
211 my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/static/documents/tempfiles/motifs_finder/");
212 #my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/motifs_finder/");
214 if ($sum_indv_tab == 1 ) {
215 $sum_indv_tab_switch++;
216 print $sum_indv_tab_fh "$line\n";
217 print "FREQ LINES: $line\n";
219 if ($line =~ m/^Column\s\d\s:\s+Sequence\sDescription\sfrom\s/ ) {
220 $sum_indv_tab = 1;
221 open ($sum_indv_tab_fh, ">", $sum_indv_tab_file) || die ("\nERROR: the file $sum_indv_tab_file could not be found\n");
222 push @sum_indv_tab, $sum_indv_tab_file;
224 elsif ($line =~ m/^Log\sFragmentation\sportion\sof\sMAP\sfor\smotif\s/ ) {
225 $sum_indv_tab = 0;
226 my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/static/documents/tempfiles/motifs_finder/");
227 #my ($fh, $filename) =tempfile("XXXXX", DIR => "$basePath/motifs_finder/");
230 if ($line =~ m/^\s+MOTIF\s+([a-z])/){
231 $motif = 1;
232 $logo_file = $filename."_".$1."_wl_input.fasta";
233 $lf_id = $1;
234 $motif_tab = $filename."_".$1;
235 $freq_tab_file = $filename."_freq_".$1;
236 $prob_tab_file = $filename."_prob_".$1;
237 $BGPM_tab_file = $filename."_BGPM_".$1;
238 $sum_indv_tab_file = $filename."_sum_indv_".$1;
240 elsif ($line =~ m/^Log\s+Fragmentation\s+portion\s+/ ) {
241 $motif = 0;
242 close $wl_out_fh;
243 close $motif_tab_fh;
244 close $freq_tab_fh;
245 close $prob_tab_fh;
246 close $BGPM_tab_fh;
247 close $sum_indv_tab_fh;
251 my @values_motif;
252 my $motif_table_value;
253 foreach $filename (@motif_table_file){
254 $motif_table_value = get_result_table($filename);
255 push @values_motif, $motif_table_value;
257 my $freq_tab_value;
258 my @value_freq_tab;
259 foreach $filename (@freq_tab) {
260 $freq_tab_value = get_freq_table($filename);
261 push @value_freq_tab, $freq_tab_value;
263 my $prob_tab_value;
264 my @value_prob_tab;
266 foreach $filename (@prob_tab) {
267 $prob_tab_value = get_prob_table($filename);
268 push @value_prob_tab, $prob_tab_value;
270 my $BGPM_tab_value;
271 my @value_BGPM_tab;
272 foreach $filename (@BGPM_tab) {
273 $BGPM_tab_value = get_BGPM_table($filename);
274 push @value_BGPM_tab, $BGPM_tab_value;
276 my $sum_indv_tab_value;
277 my @value_sum_indv_tab;
278 foreach $filename (@sum_indv_tab) {
279 $sum_indv_tab_value = get_sum_indv_table($filename);
280 push @value_sum_indv_tab, $sum_indv_tab_value;
283 # To run weblogo
284 my $cmd;
285 foreach $filename (@motif_element){
286 $cmd = "$cluster_shared_bindir/weblogo/seqlogo -F PNG -d 0.5 -T 1 -b -e -B 2 -h 5 -w 18 -y bits -a -M -n -Y -c -f $filename -o ".$filename."_weblogo";
287 push (@logo_image, basename($filename."_weblogo.png"));
288 my $error = system($cmd);
291 print STDERR Dumper(\@logo_image);
292 $c->stash->{sum} = join("<br/>", @sum);
293 $c->stash->{sum_indv_tab} = \@value_sum_indv_tab;
294 $c->stash->{BGPM_tab} = \@value_BGPM_tab;
295 $c->stash->{prob_tab} = \@value_prob_tab;
296 $c->stash->{freq_tab} = \@value_freq_tab;
297 $c->stash->{motif_tab} = \@values_motif;
298 $c->stash->{tfile} = join("\n", @string_result);
299 $c->stash->{res} = join("<br/>", @string_result);
300 $c->stash->{outfile} = $filename."_output";
301 $c->stash->{parameters} = "$sequence $widths_of_motifs $numbers_of_sites @string_result";
302 $c->stash->{logo} = \@logo_image;
303 $c->stash->{logoID} = \@logofile_id;
304 $c->stash->{logowidth} = \@motif_width;
306 $c->stash->{template} = 'tools/motifs_finder/motif_output.mas';
310 sub download_file :Path('/result/') :Args(0) {
311 my ($self, $c) = @_;
312 my $params = $c->req->body_params();
313 my $filename = $c->req->param("file_name");
314 my $result_file = $c->req->param("output_file");
316 if ($result_file) {
317 $result_file =~ s/<br\/>/\n/gi;
320 #----------------------------------- send file
321 $c->res->content_type('text/plain');
322 $c->res->header('Content-Disposition', qq[attachment; filename="$filename"]);
323 $c->res->body($result_file);
324 #$c->stash->{template} = '/output.mas';
327 sub get_result_table {
328 my $motif_table_file = shift;
329 my $motif_table_path = $motif_table_file;
331 #print "MOTIF TAB1 PATH: $motif_table_path\n";
332 open (my $tab_fh, "<", $motif_table_path) || die ("\nERROR: the file $motif_table_path could not be found\n");
333 my $html_table .= "<div class='container'>\n";
334 $html_table .= "<table>\n";
335 $html_table .= "<table class='table table-bordered' border=1 cellpadding=0>";
336 $html_table .= "<tr>\n";
337 $html_table .= "<th>Seq Num</th>\t";
338 $html_table .= "<th>Site Num</th>\t";
339 $html_table .= "<th>Left Loc</th>\t";
340 # $html_table .= "<th></th>\t";
341 $html_table .= "<th>Motifs</th>\t";
342 # $html_table .= "<th></th>\t";
343 $html_table .= "<th>Right Loc</th>\t";
344 $html_table .= "<th>Motif Prob</th>\t";
345 $html_table .= "<th>F/R Motif</th>\t";
346 $html_table .= " <th>Seq Desc</th>\t";
347 $html_table .= " </tr>\n";
349 while (my $inpbuf = <$tab_fh>){
350 chomp $inpbuf;
351 $inpbuf =~ s/,//g;
352 my ($kk,$seq_num,$site_num,$left_loc,$emty1,$motif,$emty2,$right_loc,$prob,$f_motif,$seq_desc) = split(/\s+/,$inpbuf);
353 $html_table .= "<tr>\n";
354 $html_table .= "<td>$seq_num</td>\t";
355 $html_table .= "<td>$site_num</td>\t";
356 $html_table .= "<td>$left_loc</td>\t";
357 # $html_table .= "<td>$emty1</td>\t";
358 $html_table .= "<td>$motif</td>\t";
359 # $html_table .= "<td>$emty2</td>\t";
360 $html_table .= "<td>$right_loc</td>\t";
361 $html_table .= "<td>$prob</td>\t";
362 $html_table .= "<td>$f_motif</td>\t";
363 $html_table .= " <td>$seq_desc</td>\t";
364 $html_table .= " </tr>\n";
367 close $tab_fh;
368 $html_table .= "</table>\n";
369 $html_table .= "</div>";
372 sub get_freq_table {
373 my $freq_tab = shift;
374 my $freq_table_path = $freq_tab;
375 open (my $tab_fh, "<", $freq_table_path) || die ("\nERROR: the file $freq_table_path could not be found\n");
376 my $html_freq_table .= "<div class='container'>\n";
377 $html_freq_table .= "<table>\n";
378 $html_freq_table .= "<table class='table table-bordered' border=1 cellpadding=0>";
379 #$html_freq_table .= "<div id=prob>";
380 #$html_freq_table .= "<table style=float:left>";
381 $html_freq_table .= "<tr>\n";
382 $html_freq_table .= "<th>Position #</th>\t";
383 $html_freq_table .= "<th>A</th>\t";
384 $html_freq_table .= "<th>T</th>\t";
385 $html_freq_table .= "<th>C</th>\t";
386 $html_freq_table .= "<th>G</th>\t";
387 $html_freq_table .= "<th>Info</th>\t";
388 $html_freq_table .= "</tr>\n";
390 while (my $inpbuf = <$tab_fh>){
391 chomp $inpbuf;
392 $inpbuf =~ s/_//g;
393 $inpbuf =~ s/\|//g;
394 $inpbuf =~ s/^\s+//g;
395 if ($inpbuf =~ m/^\S+/ && $inpbuf !~ m/^Pos.\s+/){
396 my ($pos_num,$A,$T,$C,$G,$info,) = split(/\s+/,$inpbuf);
397 $html_freq_table .= "<tr>\n";
398 $html_freq_table .= " <td>$pos_num</td>\t";
399 $html_freq_table .= " <td>$A</td>\t";
400 $html_freq_table .= " <td>$T</td>\t";
401 $html_freq_table .= " <td>$C</td>\t";
402 $html_freq_table .= " <td>$G</td>\t";
403 $html_freq_table .= " <td>$info</td>\t";
404 $html_freq_table .= " <tr>\n";
408 close $tab_fh;
409 $html_freq_table .= "</table>\n";
410 $html_freq_table .= "</div>\n";
413 sub get_prob_table {
414 my $prob_tab = shift;
415 my $prob_table_path = $prob_tab;
416 open (my $tab_fh, "<", $prob_table_path) || die ("\nERROR: the file $prob_table_path could not be found\n");
417 my $html_prob_table .= "<div class='container'>\n";
418 $html_prob_table .= "<table>\n";
419 $html_prob_table .= "<table class='table table-bordered' border=1 cellpadding=0>";
420 $html_prob_table .= "<div id=prob>";
421 $html_prob_table .= "<tr>\n";
422 $html_prob_table .= "<th>Position #</th>\t";
423 $html_prob_table .= "<th>A</th>\t";
424 $html_prob_table .= "<th>T</th>\t";
425 $html_prob_table .= "<th>C</th>\t";
426 $html_prob_table .= "<th>G</th>\t";
427 $html_prob_table .= "</tr>\n";
429 while (my $inpbuf = <$tab_fh>){
430 chomp $inpbuf;
431 $inpbuf =~ s/_//g;
432 $inpbuf =~ s/\|//g;
433 $inpbuf =~ s/^\s+//g;
434 if ($inpbuf =~ m/^\S+/ && $inpbuf !~ m/^Pos.\s+/){
435 #if ($inpbuf =~ m/^Background\sprobability\smodel/){
436 #$inpbuf =~ s/\s//g;
437 #$inpbuf =~ s/Backgroundprobabilitymodel/BP-Model/g;
439 my ($pos_num,$A,$T,$C,$G) = split(/\s+/,$inpbuf);
440 $html_prob_table .= " <tr>\n";
441 $html_prob_table .= " <td>$pos_num</td>\t";
442 $html_prob_table .= " <td>$A</td>\t";
443 $html_prob_table .= " <td>$T</td>\t";
444 $html_prob_table .= " <td>$C</td>\t";
445 $html_prob_table .= " <td>$G</td>\t";
446 $html_prob_table .= " <tr>\n";
450 close $tab_fh;
451 $html_prob_table .= "</table>\n";
452 $html_prob_table .= "</div>\n";
456 sub get_BGPM_table {
457 my $BGPM_tab = shift;
458 my $BGPM_table_path = $BGPM_tab;
459 open (my $tab_fh, "<", $BGPM_table_path) || die ("\nERROR: the file $BGPM_table_path could not be found\n");
460 my $html_BGPM_table .= "<div class='container'>\n";
461 $html_BGPM_table .= "<table style='width:100%'>\n";
462 $html_BGPM_table .= "<table class='table table-bordered' border=1 cellpadding=0>";
463 $html_BGPM_table .= "<thead>";
464 $html_BGPM_table .= " <tr>\n";
465 $html_BGPM_table .= "<th style='width:10%'>A</th>\t";
466 $html_BGPM_table .= " <th style='width:10%'>T</th>\t";
467 $html_BGPM_table .= " <th style='width:10%'>C</th>\t";
468 $html_BGPM_table .= " <th style='width:10%'>G</th>\t";
469 $html_BGPM_table .= " </tr>\n";
470 $html_BGPM_table .= " </thead>\n";
472 while (my $inpbuf = <$tab_fh>){
473 chomp $inpbuf;
474 $inpbuf =~ s/^\s+//g;
475 my ($A,$T,$C,$G) = split(/\s+/,$inpbuf);
476 $html_BGPM_table .= "<tbody>\n";
477 $html_BGPM_table .= "<tr>\n";
478 $html_BGPM_table .= "<td>$A</td>\t";
479 $html_BGPM_table .= "<td>$T</td>\t";
480 $html_BGPM_table .= "<td>$C</td>\t";
481 $html_BGPM_table .= "<td>$G</td>\t";
482 $html_BGPM_table .= "<tr>\n";
483 $html_BGPM_table .= "</tbody>\n";
486 close $tab_fh;
487 $html_BGPM_table .= "</table>\n";
488 $html_BGPM_table .= "</div>";
491 sub get_sum_indv_table {
492 my $sum_indv_tab = shift;
493 my $sum_indv_table_path = $sum_indv_tab;
494 open (my $tab_fh, "<", $sum_indv_table_path) || die ("\nERROR: the file $sum_indv_table_path could not be found\n");
495 my $html_sum_indv_table;
496 while ( my $inpbuf = <$tab_fh>){
497 chomp $inpbuf;
498 if ($inpbuf =~ m/^\S+/ ){
499 my $html_inpbuf = join("<br/>", $inpbuf);
500 $html_sum_indv_table .= "$html_inpbuf<br/>\n";
504 close $tab_fh;
505 $html_sum_indv_table;
510 =encoding utf8
512 =head1 AUTHOR
514 Alex Ogbonna (aco46@cornell.edu)
516 =head1 LICENSE
518 This library is free software. You can redistribute it and/or modify
519 it under the same terms as Perl itself.
521 =cut
523 __PACKAGE__->meta->make_immutable;