Merge pull request #5205 from solgenomics/topic/generic_trial_upload
[sgn.git] / lib / SGN / Controller / AJAX / Vigs.pm
blob7db1f24490509c230129f1910811797515120aca
2 package SGN::Controller::AJAX::Vigs;
4 use Moose;
6 use File::Basename;
7 use File::Slurp;
8 use File::Spec;
10 use Bio::SeqIO;
11 use Bio::BLAST2::Database;
12 use Data::Dumper;
13 use File::Temp qw | tempfile |;
14 use CXGN::Graphics::VigsGraph;
16 BEGIN { extends 'Catalyst::Controller::REST' }
18 __PACKAGE__->config(
19 default => 'application/json',
20 stash_key => 'rest',
21 map => { 'application/json' => 'JSON' },
24 our %urlencode;
26 # check input data, create Bowtie2 input data and run Bowtie2
27 sub run_bowtie2 :Path('/tools/vigs/result') :Args(0) {
28 my ($self, $c) = @_;
30 # to store erros as they happen
31 my @errors;
33 # get variables from catalyst object
34 my $params = $c->req->body_params();
35 my $sequence = $c->req->param("sequence");
36 my $fragment_size = $c->req->param("fragment_size");
37 my $seq_fragment = $c->req->param("seq_fragment");
38 my $missmatch = $c->req->param("missmatch");
39 my $db_id = $c->req->param("database");
41 # clean the sequence and check if there are more than one sequence pasted
42 if ($sequence =~ tr/>/>/ > 1) {
43 push ( @errors , "Please, paste only one sequence.\n");
45 my $id = "pasted_sequence";
46 my @seq = [];
48 if ($sequence =~ /^>/) {
49 $sequence =~ s/[ \,\-\.\#\(\)\%\'\"\[\]\{\}\:\;\=\+\\\/]/_/gi;
50 @seq = split(/\s/,$sequence);
52 if ($seq[0] =~ />(\S+)/) {
53 shift(@seq);
54 $id = $1;
56 $sequence = join("",@seq);
57 } elsif ($sequence =~ tr/acgtACGT/acgtACGT/ < 30) {
59 # save pasted gene name
60 my $pasted_gene_name = $sequence;
61 $sequence =~ s/\.\d//;
62 $sequence =~ s/\.\d//;
64 # print STDERR "seq: $sequence\n";
66 # get databases path from the configuration file
67 my $db_path = $c->config->{vigs_db_path};
69 # get database names from their path
70 # my @tmp_dbs = glob("$db_path/*.rev.1.bt2");
71 my @tmp_dbs = glob("$db_path/*.rev.1.ebwt");
73 # find the pasted gene name in the BLAST dbs and leave the loop when the name is found
74 foreach my $db_path (@tmp_dbs) {
75 # $db_path =~ s/\.rev\.1\.bt2//;
76 $db_path =~ s/\.rev\.1\.ebwt//;
77 # print STDERR "DB: $db_path\n";
79 my $fs = Bio::BLAST2::Database->open(full_file_basename => "$db_path",);
81 if ($fs->get_sequence($sequence)) {
82 my $seq_obj = $fs->get_sequence($sequence);
83 $sequence = $seq_obj->seq();
84 last;
87 # print STDERR "seq: $sequence\n";
89 if ($sequence =~ tr/acgtACGT/acgtACGT/ < 30) {
90 push ( @errors , "Your input sequence is not valid: $pasted_gene_name\n");
93 } else {
94 $sequence =~ s/[^ACGT]+//gi;
97 # Check input sequence and fragment size
98 if (length($sequence) < 100) {
99 push ( @errors , "You should paste a valid sequence (100 bp or longer) in the VIGS Tool Sequence window.\n");
101 elsif ($sequence =~ /([^ACGT]+)/i) {
102 push (@errors, "Unexpected characters found in the sequence: $1\n");
104 elsif (length($sequence) < $fragment_size) {
105 push (@errors, "n-mer size must be lower or equal to sequence length.\n");
108 if (!$fragment_size ||$fragment_size < 18 || $fragment_size > 24 ) {
109 push (@errors, "n-mer size ($fragment_size) value must be between 18-24 bp.\n");
111 if (!$seq_fragment || $seq_fragment < 100 || $seq_fragment > length($sequence)) {
112 push (@errors, "Wrong fragment size ($seq_fragment), it must be higher than 100 bp and lower than sequence length\n");
114 if ($missmatch =~ /[^\d]/ || $missmatch < 0 || $missmatch > 2 ) {
115 push (@errors, "miss-match value ($missmatch) must be between 0-2\n");
117 # if ($missmatch =~ /[^\d]/ || $missmatch < 0 || $missmatch > 1 ) {
118 # push (@errors, "miss-match value ($missmatch) must be between 0-1\n");
121 # Send error message to the web if something is wrong
122 if (scalar (@errors) > 0){
123 my $user_errors = join("<br />", @errors);
124 $c->stash->{rest} = {error => $user_errors};
125 return;
128 # generate temporary file name for analysis with Bowtie2.
129 my ($seq_fh, $seq_filename) = tempfile("vigsXXXXXX", DIR=> $c->config->{'cluster_shared_tempdir'},);
131 # Lets create the fragment fasta file
132 my $query = Bio::Seq->new(-seq=>$sequence, -id=> $id || "temp");
133 my $io = Bio::SeqIO->new(-format=>'fasta', -file=>">".$seq_filename.".fragments");
134 foreach my $i (1..$query->length()-$fragment_size) {
135 my $subseq = $query->subseq($i, $i + $fragment_size -1);
136 my $subseq_obj = Bio::Seq->new(-seq=>$subseq, -display_id=>"tmp_$i");
137 $io->write_seq($subseq_obj);
140 $c->stash->{query} = $query;
141 $io->close();
143 # Lets create the query fasta file
144 my $query_file = $seq_filename;
145 my $seq = Bio::Seq->new(-seq=>$sequence, -id=> $id || "temp");
146 $io = Bio::SeqIO->new(-format=>'fasta', -file=>">".$query_file);
148 $io->write_seq($seq);
149 $io->close();
151 if (! -e $query_file) { die "Query file failed to be created."; }
153 # get arguments to Run Bowtie2
154 # print STDERR "DATABASE SELECTED: $db_id\n";
156 my $basename = $c->config->{vigs_db_path};
157 my $database = $db_id;
158 my $database_fullpath = File::Spec->catfile($basename, $database);
159 my $database_title = $db_id;
161 # run bowtie2
162 my $bowtie2_path = $c->config->{cluster_shared_bindir};
164 # bowtie2 command
165 # my @command = (File::Spec->catfile($bowtie2_path, "bowtie2"),
166 # " --threads 1",
167 # " --very-fast",
168 # " --no-head",
169 # " --omit-sec-seq",
170 # " --end-to-end",
171 # " -L ".$fragment_size,
172 # " -N 1",
173 # " -a",
174 # " -x ".$database_fullpath,
175 # " -f",
176 # " -U ".$query_file.".fragments",
177 # " -S ".$query_file.".bt2.out",
178 # );
180 # print STDERR "Bowtie2 COMMAND: ".(join " ",@command)."\n";
181 # my $err = system(@command);
183 # bowtie command allowing 2 missmatch
184 my $err = system("$bowtie2_path/bowtie --all -v 2 --threads 1 --seedlen $fragment_size --sam --sam-nohead $database_fullpath -f $query_file.fragments $query_file.bt2.out");
187 if ($err) {
188 $c->stash->{rest} = {error => "Bowtie execution failed"};
190 else {
191 $id = $urlencode{basename($seq_filename)};
192 $c->stash->{rest} = {jobid =>basename($seq_filename),
193 seq_length => length($sequence),
194 db_name => $database_title,
200 sub get_expression_hash {
201 my $expr_file = shift;
203 my %expr_values;
205 open (my $expr_fh, $expr_file);
206 my @file = <$expr_fh>;
208 # get header
209 my $first_line = shift(@file);
210 chomp($first_line);
211 $first_line =~ s/\"//g;
212 my @header = split(/\t/,$first_line);
213 $expr_values{"header"} = \@header;
215 # print "header: ".Dumper(@header)."\n";
217 # save gene data
218 foreach my $line (@file) {
219 chomp($line);
220 $line =~ s/\"//g;
221 my @line_cols = split(/\t/, $line);
222 my $gene_id = shift(@line_cols);
224 $gene_id =~ s/\.\d//;
225 $gene_id =~ s/\.\d//;
227 $expr_values{$gene_id} = \@line_cols;
230 return \%expr_values
233 sub view :Path('/tools/vigs/view') Args(0) {
234 my $self = shift;
235 my $c = shift;
237 my $sequence = $c->req->param("sequence");
238 my $seq_filename = $c->req->param("id");
239 my $fragment_size = $c->req->param("fragment_size") || 21;
240 my $seq_fragment = $c->req->param("seq_fragment") || 300;
241 my $missmatch = $c->req->param("missmatch") || 0;
242 my $coverage = $c->req->param("targets") || 0;
243 my $db = $c->req->param("database")||undef;
244 my $expr_file = $c->req->param("expr_file") || undef;
245 my $expr_hash = undef;
246 my $status = $c->req->param("status") || 1;
248 if (defined($expr_file)) {
249 my $expr_dir = $c->generated_file_uri('expr_files', $expr_file);
250 my $expr_path = $c->path_to($expr_dir);
252 $expr_hash = get_expression_hash($expr_path);
253 # print STDERR "hash header: ".Dumper($$expr_hash{"header"})."\n";
256 $seq_filename = File::Spec->catfile($c->config->{cluster_shared_tempdir}, $seq_filename);
257 $seq_filename =~ s/\%2F/\//g;
259 my $io = Bio::SeqIO->new(-file=>$seq_filename, -format=>'fasta');
260 my $query = $io->next_seq();
262 my %matches;
263 my @queries = ();
265 my $basename = $c->config->{vigs_db_path};
266 my $database = $db;
267 my $bdb_full_name = File::Spec->catfile($basename, $database);
269 # send variables to VigsGraph
270 my $vg = CXGN::Graphics::VigsGraph->new();
271 $vg->bwafile($seq_filename.".bt2.out");
272 $vg->fragment_size($fragment_size);
273 $vg->seq_fragment($seq_fragment);
274 $vg->query_seq($sequence);
276 if (defined($expr_hash)) {
277 $vg->expr_hash($expr_hash);
280 # parse Bowtie 2 result file
281 if ($status == 1) {
282 $vg->parse($missmatch);
286 # get best region and scores
287 my @regions = [0,0,0,1,1,1];
288 my @best_region = [1,1];
289 my $seq_length = length($query->seq());
291 if ($coverage == 0) {
292 $coverage = $vg->get_best_coverage;
294 my $counter = 0;
295 while (!$regions[1] || $regions[1] <= 0) {
296 $counter++;
297 @regions = $vg->longest_vigs_sequence($coverage, $seq_length);
299 print STDERR "score: $regions[1], loop iteration: $counter, coverage: $coverage\n";
301 if ($counter >= 3) {
302 last;
304 if ($regions[1] <= 0) {
305 $coverage = $coverage + 1;
309 else {
310 @regions = $vg->longest_vigs_sequence($coverage, $seq_length);
314 @best_region = [$regions[4], $regions[5]];
316 if ($coverage == 0) {
317 $coverage = 1;
320 # get si_rna coords and image height
321 my $matches_AoA = $vg->get_matches();
322 my $img_height = $vg->get_img_height();
324 # get fasta for best sequence
325 my $tmp_str="";
326 $tmp_str = substr($query->seq(), $regions[4], $regions[5]-$regions[4]+1);
327 my @seq60 = $tmp_str =~ /(.{1,60})/g;
328 my $seq_str = join('<br />',@seq60);
330 # add expression values to subjects names
331 my $expr_msg;
332 if (defined($$expr_hash{"header"})) {
333 $expr_msg = [$vg->add_expression_values($expr_hash)];
334 } else {
335 $expr_msg = [$vg->subjects_by_match_count(0, $vg->matches())];
338 # return variables
339 $c->stash->{rest} = {
340 score => sprintf("%.2f",($regions[1]*100/$seq_fragment)/$coverage),
341 coverage => $coverage,
342 f_size => $seq_fragment,
343 cbr_start => ($regions[4]+1),
344 cbr_end => ($regions[5]+1),
345 expr_msg => $expr_msg,
346 ids => [ $vg->subjects_by_match_count($bdb_full_name, $vg->matches()) ],
347 best_seq => $seq_str,
348 query_seq => $query->seq(),
349 all_scores => $regions[2],
350 matches_aoa => $matches_AoA,
351 missmatch => $missmatch,
352 img_height => ($img_height+52)};
355 sub hash2param {
356 my %args = @_;
357 return join '&', map "$urlencode{$_}=$urlencode{$args{$_}}", distinct evens @_;
361 sub upload_expression_file_for_vigs : Path('/ajax/upload_expression_file') : ActionClass('REST') { }
363 sub upload_expression_file_for_vigs_POST : Args(0) {
364 my ($self, $c) = @_;
366 my $upload = $c->req->upload("expression_file");
367 my $expr_file = undef;
369 if (defined($upload)) {
370 $expr_file = $upload->tempname;
371 $expr_file =~ s/\/tmp\///;
373 my $expr_dir = $c->generated_file_uri('expr_files', $expr_file);
374 my $final_path = $c->path_to($expr_dir);
376 write_file($final_path, $upload->slurp);
379 $c->stash->{rest} = {
380 expr_file => $expr_file,
381 success => "1",