clean
[sgn.git] / lib / SGN / Controller / Bulk.pm
blobc5b3a3891fe8ea7aed7edce97c3aa51034bbf577
1 package SGN::Controller::Bulk;
2 use 5.010;
3 use Moose;
4 use namespace::autoclean;
5 use Cache::File;
6 use Digest::SHA1 qw/sha1_hex/;
7 use File::Path qw/make_path/;
8 use CXGN::Page::FormattingHelpers qw/modesel simple_selectbox_html /;
9 use CXGN::Tools::Text qw/trim/;
10 use SGN::View::Feature qw/mrna_cds_protein_sequence get_descriptions/;
11 #use Carp::Always;
13 BEGIN { extends 'Catalyst::Controller' }
15 has feature_cache => (
16 isa => 'Cache::File',
17 lazy_build => 1,
18 is => 'ro',
21 has gene_cache => (
22 isa => 'Cache::File',
23 lazy_build => 1,
24 is => 'ro',
28 sub index : Path('/tools/bulk/') :Args(0) {
29 my $self = shift;
30 my $c = shift;
32 my $mode = $c->req->param("mode");
33 my $debug = $c->req->param("debug");
35 $c->stash->{mode} = $mode;
36 $c->stash->{debug} = $debug;
38 $c->stash->{template} = '/tools/bulk/index.mas';
41 sub clone_tab : Path('/tools/bulk/tabs/clone_tab') Args(0) {
42 my $self = shift;
43 my $c = shift;
45 $c->stash->{ug_build_select} = $self->ug_build_selectbox($c);
47 $c->stash->{template} = '/tools/bulk/tabs/clone_tab.mas';
51 sub array_tab: Path('/tools/bulk/tabs/array_tab') Args(0) {
52 my $self = shift;
53 my $c = shift;
55 $c->stash->{ug_select} = $self->ug_build_selectbox($c);
56 $c->stash->{output_list} = $self->output_list();
57 $c->stash->{template} = '/tools/bulk/tabs/array_tab.mas';
60 sub unigene_tab : Path('/tools/bulk/tabs/unigene_tab') Args(0) {
61 my $self = shift;
62 my $c = shift;
64 $c->stash->{ug_build_select} = $self->ug_build_selectbox($c);
65 $c->stash->{template} = '/tools/bulk/tabs/unigene_tab.mas';
68 sub bac_tab : Path('/tools/bulk/tabs/bac_tab') Args(0) {
69 my $self = shift;
70 my $c = shift;
72 $c->stash->{template} = '/tools/bulk/tabs/bac_tab.mas';
75 sub bac_end_tab: Path('/tools/bulk/tabs/bac_end_tab') Args(0) {
76 my $self = shift;
77 my $c = shift;
79 $c->stash->{template} = '/tools/bulk/tabs/bac_end_tab.mas';
82 sub ftp_tab : Path('/tools/bulk/tabs/ftp_tab') Args(0) {
83 my $self = shift;
84 my $c = shift;
86 $c->stash->{template} = '/tools/bulk/tabs/ftp_tab.mas';
89 sub converter_tab : Path('/tools/bulk/tabs/converter_tab') Args(0) {
90 my $self = shift;
91 my $c = shift;
93 $c->stash->{template} = '/tools/bulk/tabs/converter_tab.mas';
96 sub _build_feature_cache {
97 my $self = shift;
99 my $app = $self->_app;
100 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk feature/));
102 _new_cache_file($app, $cache_dir);
105 sub _build_gene_cache {
106 my $self = shift;
108 my $app = $self->_app;
109 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk gene/));
111 _new_cache_file($app, $cache_dir);
114 sub _new_cache_file {
115 my ($app, $cache_dir) = @_;
116 $app->log->debug("Bulk: creating new cache in $cache_dir") if $app->debug;
117 return Cache::File->new(
118 cache_root => $cache_dir,
119 default_expires => 'never',
120 # TODO: how big can the output of 10K identifiers be?
121 size_limit => 10_000_000,
122 removal_strategy => 'Cache::RemovalStrategy::LRU',
123 # temporary, until we figure out locking issue
124 lock_level => Cache::File::LOCK_NFS,
129 =head1 NAME
131 SGN::Controller::Bulk - Bulk Download Controller
133 =head1 DESCRIPTION
135 Catalyst Controller which takes care of bulk downloads. Currently
136 supports features and genes.
138 =cut
140 sub bulk_download_stats :Local {
141 my ( $self, $c ) = @_;
143 $c->log->debug("calculating bulk download stats") if $c->debug;
145 my $seqs = scalar @{$c->stash->{sequences} || []};
146 my $seq_ids = scalar @{$c->stash->{sequence_identifiers} || []};
147 my $stats = <<STATS;
148 A total of $seqs matching features were found for $seq_ids identifiers provided.
149 STATS
151 $c->stash( bulk_download_stats => $stats );
152 $c->stash( bulk_download_success => $seqs );
155 # sub bulk_js_menu :Local {
156 # my ( $self, $c ) = @_;
158 # my $mode = $c->stash->{bulk_js_menu_mode} || '';
159 # # define urls of modes
160 # my @mode_links = (
161 # [ '/bulk/input.pl?mode=clone_search', 'Clone&nbsp;name<br />(SGN-C)' ],
162 # [ '/bulk/input.pl?mode=microarray', 'Array&nbsp;spot&nbsp;ID<br />(SGN-S)' ],
163 # [ '/bulk/input.pl?mode=unigene', 'Unigene&nbsp;ID<br />(SGN-U)' ],
164 # [ '/bulk/input.pl?mode=bac', 'BACs' ],
165 # [ '/bulk/input.pl?mode=bac_end', 'BAC&nbsp;ends' ],
166 # [ '/bulk/input.pl?mode=ftp', 'Full&nbsp;datasets<br />(FTP)' ],
167 # [ '/bulk/input.pl?mode=unigene_convert', 'Unigene ID Converter<br />(SGN-U)' ],
168 # [ '/bulk/feature', 'Features' ],
169 # [ '/bulk/gene', 'Genes' ],
170 # );
172 # ### figure out which mode we're in ###
173 # my $modenum =
174 # $mode =~ /clone_search/i ? 0
175 # : $mode =~ /array/i ? 1
176 # : $mode =~ /unigene_convert/i ? 6
177 # : $mode =~ /unigene/i ? 2
178 # : $mode =~ /bac_end/i ? 4
179 # : $mode =~ /bac/i ? 3
180 # : $mode =~ /ftp/i ? 5
181 # : $mode =~ /feature/i ? 7
182 # : $mode =~ /gene/i ? 8
183 # : 0; # clone search is default
185 # $c->stash( bulk_js_menu =>
186 # $c->view('BareMason')->render( $c, '/page/page_title.mas', { title => 'Bulk download' })
187 # .<<EOH
188 # <div style="margin-bottom: 1em">Download Unigene or BAC information using a list of identifiers, or complete datasets with FTP.</div>
189 # EOH
190 # .modesel( \@mode_links, $modenum ),
191 # );
195 sub bulk_gene :Path('/bulk/gene') : Args(0) {
196 my ( $self, $c ) = @_;
198 # $c->forward('bulk_js_menu');
200 if( my $ids = $c->req->params->{'ids'} ) {
201 $c->stash( prefill_ids => $ids );
204 $c->stash( template => '/bulk_gene.mas');
207 sub gene_tab : Path('/tools/bulk/tabs/gene_tab') Args(0) {
208 my $self = shift;
209 my $c = shift;
211 $c->stash->{template} = '/tools/bulk/tabs/gene_tab.mas';
215 sub bulk_gene_type_validate :Local :Args(0) {
216 my ( $self, $c ) = @_;
217 my $req = $c->req;
218 my $type = $req->param('gene_type');
220 unless ($type && $type ~~ [qw/cdna cds protein/]) {
221 $c->throw_client_error(
222 public_message => 'Invalid data type chosen',
223 http_status => 200,
228 sub bulk_gene_submit :Path('/bulk/gene/submit') :Args(0) {
229 my ( $self, $c ) = @_;
230 my $req = $c->req;
231 my $ids = $req->param('ids');
232 my $type = $req->param('gene_type');
233 my $mode = $req->param('mode') || 'gene';
235 # $c->stash( bulk_js_menu_mode => $mode );
236 # $c->forward('bulk_js_menu');
238 $c->log->debug("submitting query with type=$type") if $c->debug;
240 $c->forward('bulk_gene_type_validate');
242 if( $c->req->param('gene_file') ) {
243 my ($upload) = $c->req->upload('gene_file');
244 # always append contents of file with newline to form input to
245 # prevent smashing identifiers together
246 $ids = "$ids\n" . $upload->slurp if $upload;
249 # Must calculate this after looking at file contents
250 # Take into account data type, because different data types for the same sequence list
251 # produce different results
252 my $sha1 = sha1_hex("$type $ids");
253 $c->stash( sha1 => $sha1 );
255 # remove leading and trailing whitespace
256 $ids = trim($ids);
258 unless ($ids) {
259 $c->throw_client_error(
260 public_message => 'At least one identifier must be given',
261 http_status => 200,
265 $c->forward('cache_gene_sequences');
267 $c->stash( bulk_download_stats => <<STATS);
268 Insert stats
269 STATS
270 $c->stash( template => '/tools/bulk/display/bulk_gene_download.mas');
273 sub cache_gene_sequences :Local :Args(0) {
274 my ($self, $c) = @_;
275 my $req = $c->req;
276 my $ids = $req->param('ids');
277 my $type = $req->param('gene_type');
278 my $sha1 = $c->stash->{sha1};
280 my $success = 0;
281 my @gene_ids = split /\s+/, $ids;
282 my $schema = $c->dbic_schema('Bio::Chado::Schema', 'sgn_chado');
284 my $genes_by_name =
285 $schema->resultset('Sequence::Feature')
286 ->search({
287 "me.name" => \@gene_ids,
288 'me.type_id' => $schema->get_cvterm_or_die('sequence:gene')->cvterm_id,
290 my $genes_by_synonym =
291 $schema->resultset('Sequence::Synonym')
292 ->search({ 'me.name' => \@gene_ids })
293 ->search_related('feature_synonyms')
294 ->search_related('feature',{
295 'feature.type_id' => $schema->get_cvterm_or_die('sequence:gene')->cvterm_id,
298 my %seen_mrna;
299 my @mrnas =
300 grep !$seen_mrna{$_->feature_id}++,
301 map {
302 $_->search_related( 'feature_relationship_objects', {
303 'feature_relationship_objects.type_id' => $schema->get_cvterm_or_die('relationship:part_of')->cvterm_id,
305 ->search_related( 'subject', {
306 'subject.type_id' => $schema->get_cvterm_or_die('sequence:mRNA')->cvterm_id,
308 { prefetch => 'featureprops' },
310 } ( $genes_by_name, $genes_by_synonym );
312 $c->stash(
313 gene_mrnas => \@mrnas,
314 bulk_download_success => scalar(@mrnas),
316 $c->forward('convert_sequences_to_bioperl_objects');
317 $c->forward('populate_gene_sequences');
318 $c->forward('freeze_sequences');
321 sub convert_sequences_to_bioperl_objects :Local {
322 my ($self, $c) = @_;
323 my @mrnas = @{$c->stash->{gene_mrnas}};
324 my @seqs = (map { mrna_cds_protein_sequence($_) } @mrnas );
325 $c->stash( gene_sequences => \@seqs );
328 sub freeze_sequences :Local {
329 my ($self, $c) = @_;
330 # cache the sequences
331 $self->gene_cache->freeze( $c->stash->{sha1} , $c->stash->{gene_mps} || [ ] );
334 sub populate_gene_sequences :Local {
335 my ($self, $c) = @_;
336 my $req = $c->req;
337 my $type = $req->param('gene_type');
338 my $type_index = {
339 cdna => 0,
340 cds => 1,
341 protein => 2,
343 my @mps;
345 push @mps, map {
346 my $index = $type_index->{$type};
347 $c->log->debug("found $type with index $index") if $c->debug;
349 unless (defined $index) {
350 $c->throw_client_error(
351 public_message => 'Invalid data type',
352 http_status => 200,
356 my $o = $_->[$index];
357 unless (defined $o) {
358 () # if it's not defined, we don't have that type of seq for this gene
359 } elsif( $o->isa('DBIx::Class::Row') ) {
360 $c->log->debug("Downgrading from BCS to Bioperl object " . $o->name) if $c->debug;
361 my @desc = get_descriptions($o,'plain');
362 my $g = Bio::PrimarySeq->new(
363 -id => $o->primary_id,
364 -desc => join(', ', @desc),
365 -seq => $o->seq,
367 } else {
370 } @{ $c->stash->{gene_sequences} };
371 $c->stash( gene_mps => [ @mps ] );
374 sub bulk_gene_download :Path('/bulk/gene/download') :Args(1) {
375 my ( $self, $c, $sha1 ) = @_;
377 my $app = $self->_app;
378 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk gene/));
380 $sha1 =~ s/\.(fasta|txt)$//g;
382 my $seqs = $self->gene_cache->thaw($sha1)
383 or $c->throw_404('Bulk dataset not found');
385 $c->stash->{sequences} = $seqs;
386 $c->forward('View::SeqIO');
389 sub bulk_feature :Path('/tools/bulk/tabs/feature_tab') :Args(0) {
390 my ( $self, $c ) = @_;
391 my $mode = $c->req->params->{'mode'} || 'feature';
393 # $c->stash( bulk_js_menu_mode => $mode );
395 if( my $ids = $c->req->params->{'ids'} ) {
396 $c->stash( prefill_ids => $ids );
399 #$c->forward('bulk_js_menu');
401 $c->stash( template => '/tools/bulk/tabs/feature_tab.mas');
403 # trigger cache creation
404 $self->feature_cache->get("");
407 sub bulk_feature_download :Path('/bulk/feature/download') :Args(1) {
408 my ( $self, $c, $sha1 ) = @_;
410 my $app = $self->_app;
411 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk feature/));
413 $sha1 =~ s/\.(fasta|txt)$//g;
415 my $seqs = $self->feature_cache->thaw($sha1)
416 or $c->throw_404('Bulk dataset not found');
418 $c->stash( sequences => $seqs->[1] );
420 $c->forward( 'View::SeqIO' );
423 sub bulk_feature_submit :Path('/bulk/feature/submit') :Args(0) {
424 my ( $self, $c ) = @_;
426 my $req = $c->req;
427 my $ids = $req->param('ids') || '';
428 my $mode = $req->param('mode') || 'feature';
430 # $c->stash( bulk_js_menu_mode => $mode );
432 if( $c->req->param('feature_file') ) {
433 my ($upload) = $c->req->upload('feature_file');
434 # always append contents of file with newline to form input to
435 # prevent smashing identifiers together
436 $ids = "$ids\n" . $upload->slurp if $upload;
439 # Must calculate this after looking at file contents
440 my $sha1 = sha1_hex($ids);
442 # remove leading and trailing whitespace
443 $ids = trim($ids);
445 unless ($ids) {
446 $c->throw_client_error(public_message => 'At least one identifier must be given');
449 $c->stash( sequence_identifiers => [ split /\s+/, $ids ] );
451 $c->stash( bulk_query => 1 );
453 $c->log->debug("fetching sequences") if $c->debug;
454 $c->forward('Controller::Sequence', 'fetch_sequences');
456 $c->log->debug("freezing sequences") if $c->debug;
457 $self->feature_cache->freeze( $sha1 , [ $c->stash->{sequence_identifiers}, $c->stash->{sequences} ] );
459 # $c->forward('bulk_js_menu');
460 $c->forward('bulk_download_stats');
462 $c->stash( template => '/tools/bulk/display/feature_download.mas', sha1 => $sha1 );
466 sub ug_build_selectbox {
467 my $self = shift;
468 my $c = shift;
469 my $filter_sub = shift;
470 my %builds;
471 my $sth = $c->dbc->dbh()->prepare(
472 q|SELECT ub.unigene_build_id,
473 ub.organism_group_id,
474 ub.build_nr,
475 g.group_id,
476 g.comment
477 FROM sgn.unigene_build as ub, sgn.groups as g
478 WHERE ub.organism_group_id=g.group_id
479 AND g.type=1
480 AND ub.status='C'
483 $sth->execute();
484 while ( my @row = $sth->fetchrow_array() ) {
485 if ($filter_sub) {
486 next unless $filter_sub->(@row);
489 my ( $unigene_build_id, $organism_group_id, $build_nr, $group_id,
490 $species )
491 = @row;
492 $species =~ s/(\S)[a-z]+\s([a-z]+)/uc($1).'. '.$2/ei
493 ; #< abbreviate the species names
494 $builds{$unigene_build_id} = "$species (build $build_nr)";
497 return simple_selectbox_html(
498 name => 'build_id',
499 label => 'Only include unigene build:',
500 choices => [
501 [ all => 'include all' ],
502 ( map [ $_, $builds{$_} ], keys %builds ),
509 sub output_list {
510 return <<OUTPUT_LIST
511 "<b>Please select the information you would like for each identifier:</b><br />
512 <input type="checkbox" name="clone_name" checked="checked" /> clone name<br />
513 <input type="checkbox" name="SGN_C" checked="checked" /> clone id (SGN-C)<br />
514 <input type="checkbox" name="SGN_T" checked="checked" /> sequence read id (SGN-T)<br />
515 <input type="checkbox" name="SGN_E" checked="checked" /> est id (SGN-E)<br />
516 <input type="checkbox" name="build_nr" checked="checked" /> unigene build nr<br />
517 <input type="checkbox" name="SGN_U" checked="checked" /> unigene id (SGN-U)<br />
518 <input type="checkbox" name="chipname" checked="checked" /> chipname<br />
519 <input type="checkbox" name="SGN_S" checked="checked" /> microarray spot id (SGN-S)<br />
520 <input type="checkbox" name="TUS" checked="checked" /> TUS number (used to order clones)<br />
521 <input type="checkbox" name="manual_annotation" /> manual annotation<br />
522 <input type="checkbox" name="automatic_annotation" /> automatic (BLAST) annotation<br />
523 <input type="checkbox" name="sequence" onclick="check_fasta_option()" /> sequence<br />
524 &nbsp;&nbsp;&nbsp;<input type="radio" name="seq_type" value="est_seq" checked="checked" /> EST sequence<br />
525 &nbsp;&nbsp;&nbsp;<input type="radio" name="seq_type" value="unigene_seq" /> Unigene sequence<br />";
527 OUTPUT_LIST
533 =head1 AUTHOR
535 Jonathan "Duke" Leto
537 =head1 LICENSE
539 This library is free software. You can redistribute it and/or modify
540 it under the same terms as Perl itself.
542 =cut
544 __PACKAGE__->meta->make_immutable;