2 package SGN
::Controller
::AJAX
::Vigs
;
11 use Bio
::BLAST2
::Database
;
13 use File
::Temp qw
| tempfile
|;
14 use CXGN
::Graphics
::VigsGraph
;
16 BEGIN { extends
'Catalyst::Controller::REST' }
19 default => 'application/json',
21 map => { 'application/json' => 'JSON' },
26 # check input data, create Bowtie2 input data and run Bowtie2
27 sub run_bowtie2
:Path
('/tools/vigs/result') :Args
(0) {
30 # to store erros as they happen
33 # get variables from catalyst object
34 my $params = $c->req->body_params();
35 my $sequence = $c->req->param("sequence");
36 my $fragment_size = $c->req->param("fragment_size");
37 my $seq_fragment = $c->req->param("seq_fragment");
38 my $missmatch = $c->req->param("missmatch");
39 my $db_id = $c->req->param("database");
41 # clean the sequence and check if there are more than one sequence pasted
42 if ($sequence =~ tr/>/>/ > 1) {
43 push ( @errors , "Please, paste only one sequence.\n");
45 my $id = "pasted_sequence";
48 if ($sequence =~ /^>/) {
49 $sequence =~ s/[ \,\-\.\#\(\)\%\'\"\[\]\{\}\:\;\=\+\\\/]/_
/gi
;
50 @seq = split(/\s/,$sequence);
52 if ($seq[0] =~ />(\S+)/) {
56 $sequence = join("",@seq);
57 } elsif ($sequence =~ tr/acgtACGT/acgtACGT/ < 30) {
59 # save pasted gene name
60 my $pasted_gene_name = $sequence;
61 $sequence =~ s/\.\d//;
62 $sequence =~ s/\.\d//;
64 # print STDERR "seq: $sequence\n";
66 # get databases path from the configuration file
67 my $db_path = $c->config->{vigs_db_path
};
69 # get database names from their path
70 # my @tmp_dbs = glob("$db_path/*.rev.1.bt2");
71 my @tmp_dbs = glob("$db_path/*.rev.1.ebwt");
73 # find the pasted gene name in the BLAST dbs and leave the loop when the name is found
74 foreach my $db_path (@tmp_dbs) {
75 # $db_path =~ s/\.rev\.1\.bt2//;
76 $db_path =~ s/\.rev\.1\.ebwt//;
77 # print STDERR "DB: $db_path\n";
79 my $fs = Bio
::BLAST2
::Database
->open(full_file_basename
=> "$db_path",);
81 if ($fs->get_sequence($sequence)) {
82 my $seq_obj = $fs->get_sequence($sequence);
83 $sequence = $seq_obj->seq();
87 # print STDERR "seq: $sequence\n";
89 if ($sequence =~ tr/acgtACGT/acgtACGT/ < 30) {
90 push ( @errors , "Your input sequence is not valid: $pasted_gene_name\n");
94 $sequence =~ s/[^ACGT]+//gi;
97 # Check input sequence and fragment size
98 if (length($sequence) < 100) {
99 push ( @errors , "You should paste a valid sequence (100 bp or longer) in the VIGS Tool Sequence window.\n");
101 elsif ($sequence =~ /([^ACGT]+)/i) {
102 push (@errors, "Unexpected characters found in the sequence: $1\n");
104 elsif (length($sequence) < $fragment_size) {
105 push (@errors, "n-mer size must be lower or equal to sequence length.\n");
108 if (!$fragment_size ||$fragment_size < 18 || $fragment_size > 24 ) {
109 push (@errors, "n-mer size ($fragment_size) value must be between 18-24 bp.\n");
111 if (!$seq_fragment || $seq_fragment < 100 || $seq_fragment > length($sequence)) {
112 push (@errors, "Wrong fragment size ($seq_fragment), it must be higher than 100 bp and lower than sequence length\n");
114 if ($missmatch =~ /[^\d]/ || $missmatch < 0 || $missmatch > 2 ) {
115 push (@errors, "miss-match value ($missmatch) must be between 0-2\n");
117 # if ($missmatch =~ /[^\d]/ || $missmatch < 0 || $missmatch > 1 ) {
118 # push (@errors, "miss-match value ($missmatch) must be between 0-1\n");
121 # Send error message to the web if something is wrong
122 if (scalar (@errors) > 0){
123 my $user_errors = join("<br />", @errors);
124 $c->stash->{rest
} = {error
=> $user_errors};
128 # generate temporary file name for analysis with Bowtie2.
129 my ($seq_fh, $seq_filename) = tempfile
("vigsXXXXXX", DIR
=> $c->config->{'cluster_shared_tempdir'},);
131 # Lets create the fragment fasta file
132 my $query = Bio
::Seq
->new(-seq
=>$sequence, -id
=> $id || "temp");
133 my $io = Bio
::SeqIO
->new(-format
=>'fasta', -file
=>">".$seq_filename.".fragments");
134 foreach my $i (1..$query->length()-$fragment_size) {
135 my $subseq = $query->subseq($i, $i + $fragment_size -1);
136 my $subseq_obj = Bio
::Seq
->new(-seq
=>$subseq, -display_id
=>"tmp_$i");
137 $io->write_seq($subseq_obj);
140 $c->stash->{query
} = $query;
143 # Lets create the query fasta file
144 my $query_file = $seq_filename;
145 my $seq = Bio
::Seq
->new(-seq
=>$sequence, -id
=> $id || "temp");
146 $io = Bio
::SeqIO
->new(-format
=>'fasta', -file
=>">".$query_file);
148 $io->write_seq($seq);
151 if (! -e
$query_file) { die "Query file failed to be created."; }
153 # get arguments to Run Bowtie2
154 # print STDERR "DATABASE SELECTED: $db_id\n";
156 my $basename = $c->config->{vigs_db_path
};
157 my $database = $db_id;
158 my $database_fullpath = File
::Spec
->catfile($basename, $database);
159 my $database_title = $db_id;
162 my $bowtie2_path = $c->config->{cluster_shared_bindir
};
165 # my @command = (File::Spec->catfile($bowtie2_path, "bowtie2"),
171 # " -L ".$fragment_size,
174 # " -x ".$database_fullpath,
176 # " -U ".$query_file.".fragments",
177 # " -S ".$query_file.".bt2.out",
180 # print STDERR "Bowtie2 COMMAND: ".(join " ",@command)."\n";
181 # my $err = system(@command);
183 # bowtie command allowing 2 missmatch
184 my $err = system("$bowtie2_path/bowtie --all -v 2 --threads 1 --seedlen $fragment_size --sam --sam-nohead $database_fullpath -f $query_file.fragments $query_file.bt2.out");
188 $c->stash->{rest
} = {error
=> "Bowtie execution failed"};
191 $id = $urlencode{basename
($seq_filename)};
192 $c->stash->{rest
} = {jobid
=>basename
($seq_filename),
193 seq_length
=> length($sequence),
194 db_name
=> $database_title,
200 sub get_expression_hash
{
201 my $expr_file = shift;
205 open (my $expr_fh, $expr_file);
206 my @file = <$expr_fh>;
209 my $first_line = shift(@file);
211 $first_line =~ s/\"//g;
212 my @header = split(/\t/,$first_line);
213 $expr_values{"header"} = \
@header;
215 # print "header: ".Dumper(@header)."\n";
218 foreach my $line (@file) {
221 my @line_cols = split(/\t/, $line);
222 my $gene_id = shift(@line_cols);
224 $gene_id =~ s/\.\d//;
225 $gene_id =~ s/\.\d//;
227 $expr_values{$gene_id} = \
@line_cols;
233 sub view
:Path
('/tools/vigs/view') Args
(0) {
237 my $sequence = $c->req->param("sequence");
238 my $seq_filename = $c->req->param("id");
239 my $fragment_size = $c->req->param("fragment_size") || 21;
240 my $seq_fragment = $c->req->param("seq_fragment") || 300;
241 my $missmatch = $c->req->param("missmatch") || 0;
242 my $coverage = $c->req->param("targets") || 0;
243 my $db = $c->req->param("database")||undef;
244 my $expr_file = $c->req->param("expr_file") || undef;
245 my $expr_hash = undef;
246 my $status = $c->req->param("status") || 1;
248 if (defined($expr_file)) {
249 my $expr_dir = $c->generated_file_uri('expr_files', $expr_file);
250 my $expr_path = $c->path_to($expr_dir);
252 $expr_hash = get_expression_hash
($expr_path);
253 # print STDERR "hash header: ".Dumper($$expr_hash{"header"})."\n";
256 $seq_filename = File
::Spec
->catfile($c->config->{cluster_shared_tempdir
}, $seq_filename);
257 $seq_filename =~ s/\%2F/\//g
;
259 my $io = Bio
::SeqIO
->new(-file
=>$seq_filename, -format
=>'fasta');
260 my $query = $io->next_seq();
265 my $basename = $c->config->{vigs_db_path
};
267 my $bdb_full_name = File
::Spec
->catfile($basename, $database);
269 # send variables to VigsGraph
270 my $vg = CXGN
::Graphics
::VigsGraph
->new();
271 $vg->bwafile($seq_filename.".bt2.out");
272 $vg->fragment_size($fragment_size);
273 $vg->seq_fragment($seq_fragment);
274 $vg->query_seq($sequence);
276 if (defined($expr_hash)) {
277 $vg->expr_hash($expr_hash);
280 # parse Bowtie 2 result file
282 $vg->parse($missmatch);
286 # get best region and scores
287 my @regions = [0,0,0,1,1,1];
288 my @best_region = [1,1];
289 my $seq_length = length($query->seq());
291 if ($coverage == 0) {
292 $coverage = $vg->get_best_coverage;
295 while (!$regions[1] || $regions[1] <= 0) {
297 @regions = $vg->longest_vigs_sequence($coverage, $seq_length);
299 print STDERR
"score: $regions[1], loop iteration: $counter, coverage: $coverage\n";
304 if ($regions[1] <= 0) {
305 $coverage = $coverage + 1;
310 @regions = $vg->longest_vigs_sequence($coverage, $seq_length);
314 @best_region = [$regions[4], $regions[5]];
316 if ($coverage == 0) {
320 # get si_rna coords and image height
321 my $matches_AoA = $vg->get_matches();
322 my $img_height = $vg->get_img_height();
324 # get fasta for best sequence
326 $tmp_str = substr($query->seq(), $regions[4], $regions[5]-$regions[4]+1);
327 my @seq60 = $tmp_str =~ /(.{1,60})/g;
328 my $seq_str = join('<br />',@seq60);
330 # add expression values to subjects names
332 if (defined($$expr_hash{"header"})) {
333 $expr_msg = [$vg->add_expression_values($expr_hash)];
335 $expr_msg = [$vg->subjects_by_match_count(0, $vg->matches())];
339 $c->stash->{rest
} = {
340 score
=> sprintf("%.2f",($regions[1]*100/$seq_fragment)/$coverage),
341 coverage
=> $coverage,
342 f_size
=> $seq_fragment,
343 cbr_start
=> ($regions[4]+1),
344 cbr_end
=> ($regions[5]+1),
345 expr_msg
=> $expr_msg,
346 ids
=> [ $vg->subjects_by_match_count($bdb_full_name, $vg->matches()) ],
347 best_seq
=> $seq_str,
348 query_seq
=> $query->seq(),
349 all_scores
=> $regions[2],
350 matches_aoa
=> $matches_AoA,
351 missmatch
=> $missmatch,
352 img_height
=> ($img_height+52)};
357 return join '&', map "$urlencode{$_}=$urlencode{$args{$_}}", distinct evens
@_;
361 sub upload_expression_file_for_vigs
: Path
('/ajax/upload_expression_file') : ActionClass
('REST') { }
363 sub upload_expression_file_for_vigs_POST
: Args
(0) {
366 my $upload = $c->req->upload("expression_file");
367 my $expr_file = undef;
369 if (defined($upload)) {
370 $expr_file = $upload->tempname;
371 $expr_file =~ s/\/tmp\///;
373 my $expr_dir = $c->generated_file_uri('expr_files', $expr_file);
374 my $final_path = $c->path_to($expr_dir);
376 write_file
($final_path, $upload->slurp);
379 $c->stash->{rest
} = {
380 expr_file
=> $expr_file,