modified autogenerated name method
[sgn.git] / lib / SGN / Controller / Bulk.pm
blob055ac857e4f7ee22406b73053508a5af1c5717dc
1 package SGN::Controller::Bulk;
2 use 5.010;
3 use Moose;
4 use namespace::autoclean;
5 use Cache::File;
6 use Digest::SHA qw/sha1_hex/;
7 use File::Path qw/make_path/;
8 use CXGN::Page::FormattingHelpers qw/modesel simple_selectbox_html /;
9 use CXGN::Tools::Text qw/trim/;
10 use SGN::View::Feature qw/mrna_cds_protein_sequence get_descriptions/;
11 #use Carp::Always;
13 BEGIN { extends 'Catalyst::Controller' }
15 has feature_cache => (
16 isa => 'Cache::File',
17 lazy_build => 1,
18 is => 'ro',
21 has gene_cache => (
22 isa => 'Cache::File',
23 lazy_build => 1,
24 is => 'ro',
28 sub index : Path('/tools/bulk/') :Args(0) {
29 my $self = shift;
30 my $c = shift;
32 my $mode = $c->req->param("mode");
33 my $debug = $c->req->param("debug");
35 $c->stash->{mode} = $mode;
36 $c->stash->{debug} = $debug;
38 $c->stash->{template} = '/tools/bulk/index.mas';
41 sub clone_tab : Path('/tools/bulk/tabs/clone_tab') Args(0) {
42 my $self = shift;
43 my $c = shift;
45 $c->stash->{ug_build_select} = $self->ug_build_selectbox($c);
47 $c->stash->{template} = '/tools/bulk/tabs/clone_tab.mas';
51 sub array_tab: Path('/tools/bulk/tabs/array_tab') Args(0) {
52 my $self = shift;
53 my $c = shift;
55 $c->stash->{ug_select} = $self->ug_build_selectbox($c);
56 $c->stash->{output_list} = $self->output_list();
57 $c->stash->{template} = '/tools/bulk/tabs/array_tab.mas';
60 sub unigene_tab : Path('/tools/bulk/tabs/unigene_tab') Args(0) {
61 my $self = shift;
62 my $c = shift;
64 $c->stash->{ug_build_select} = $self->ug_build_selectbox($c);
65 $c->stash->{template} = '/tools/bulk/tabs/unigene_tab.mas';
68 sub bac_tab : Path('/tools/bulk/tabs/bac_tab') Args(0) {
69 my $self = shift;
70 my $c = shift;
72 $c->stash->{template} = '/tools/bulk/tabs/bac_tab.mas';
75 sub bac_end_tab: Path('/tools/bulk/tabs/bac_end_tab') Args(0) {
76 my $self = shift;
77 my $c = shift;
79 $c->stash->{template} = '/tools/bulk/tabs/bac_end_tab.mas';
82 sub ftp_tab : Path('/tools/bulk/tabs/ftp_tab') Args(0) {
83 my $self = shift;
84 my $c = shift;
86 $c->stash->{template} = '/tools/bulk/tabs/ftp_tab.mas';
89 sub converter_tab : Path('/tools/bulk/tabs/converter_tab') Args(0) {
90 my $self = shift;
91 my $c = shift;
93 $c->stash->{template} = '/tools/bulk/tabs/converter_tab.mas';
96 sub _build_feature_cache {
97 my $self = shift;
99 my $app = $self->_app;
100 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk feature/));
102 _new_cache_file($app, $cache_dir);
105 sub _build_gene_cache {
106 my $self = shift;
108 my $app = $self->_app;
109 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk gene/));
111 _new_cache_file($app, $cache_dir);
114 sub _new_cache_file {
115 my ($app, $cache_dir) = @_;
116 $app->log->debug("Bulk: creating new cache in $cache_dir") if $app->debug;
117 return Cache::File->new(
118 cache_root => $cache_dir,
119 default_expires => 'never',
120 # TODO: how big can the output of 10K identifiers be?
121 size_limit => 10_000_000,
122 removal_strategy => 'Cache::RemovalStrategy::LRU',
123 # temporary, until we figure out locking issue
124 lock_level => Cache::File::LOCK_NFS,
129 =head1 NAME
131 SGN::Controller::Bulk - Bulk Download Controller
133 =head1 DESCRIPTION
135 Catalyst Controller which takes care of bulk downloads. Currently
136 supports features and genes.
138 =cut
140 sub bulk_download_stats :Local {
141 my ( $self, $c ) = @_;
143 $c->log->debug("calculating bulk download stats") if $c->debug;
145 my $seqs = scalar @{$c->stash->{sequences} || []};
146 my $seq_ids = scalar @{$c->stash->{sequence_identifiers} || []};
147 my $stats = <<STATS;
148 A total of $seqs matching features were found for $seq_ids identifiers provided.
149 STATS
151 $c->stash( bulk_download_stats => $stats );
152 $c->stash( bulk_download_success => $seqs );
155 # sub bulk_js_menu :Local {
156 # my ( $self, $c ) = @_;
158 # my $mode = $c->stash->{bulk_js_menu_mode} || '';
159 # # define urls of modes
160 # my @mode_links = (
161 # [ '/bulk/input.pl?mode=clone_search', 'Clone&nbsp;name<br />(SGN-C)' ],
162 # [ '/bulk/input.pl?mode=microarray', 'Array&nbsp;spot&nbsp;ID<br />(SGN-S)' ],
163 # [ '/bulk/input.pl?mode=unigene', 'Unigene&nbsp;ID<br />(SGN-U)' ],
164 # [ '/bulk/input.pl?mode=bac', 'BACs' ],
165 # [ '/bulk/input.pl?mode=bac_end', 'BAC&nbsp;ends' ],
166 # [ '/bulk/input.pl?mode=ftp', 'Full&nbsp;datasets<br />(FTP)' ],
167 # [ '/bulk/input.pl?mode=unigene_convert', 'Unigene ID Converter<br />(SGN-U)' ],
168 # [ '/bulk/feature', 'Features' ],
169 # [ '/bulk/gene', 'Genes' ],
170 # );
172 # ### figure out which mode we're in ###
173 # my $modenum =
174 # $mode =~ /clone_search/i ? 0
175 # : $mode =~ /array/i ? 1
176 # : $mode =~ /unigene_convert/i ? 6
177 # : $mode =~ /unigene/i ? 2
178 # : $mode =~ /bac_end/i ? 4
179 # : $mode =~ /bac/i ? 3
180 # : $mode =~ /ftp/i ? 5
181 # : $mode =~ /feature/i ? 7
182 # : $mode =~ /gene/i ? 8
183 # : 0; # clone search is default
185 # $c->stash( bulk_js_menu =>
186 # $c->view('BareMason')->render( $c, '/page/page_title.mas', { title => 'Bulk download' })
187 # .<<EOH
188 # <div style="margin-bottom: 1em">Download Unigene or BAC information using a list of identifiers, or complete datasets with FTP.</div>
189 # EOH
190 # .modesel( \@mode_links, $modenum ),
191 # );
195 sub bulk_gene :Path('/bulk/gene') : Args(0) {
196 my ( $self, $c ) = @_;
198 # $c->forward('bulk_js_menu');
200 if( my $ids = $c->req->params->{'ids'} ) {
201 $c->stash( prefill_ids => $ids );
204 $c->stash( template => '/bulk_gene.mas');
207 sub gene_tab : Path('/tools/bulk/tabs/gene_tab') Args(0) {
208 my $self = shift;
209 my $c = shift;
211 $c->stash->{template} = '/tools/bulk/tabs/gene_tab.mas';
215 sub bulk_gene_type_validate :Local :Args(0) {
216 my ( $self, $c ) = @_;
217 my $req = $c->req;
218 my $type = $req->param('gene_type');
220 unless ($type && $type ~~ [qw/cdna cds protein/]) {
221 $c->throw_client_error(
222 public_message => 'Invalid data type chosen',
223 http_status => 200,
228 sub bulk_gene_submit :Path('/bulk/gene/submit') :Args(0) {
229 my ( $self, $c ) = @_;
230 my $req = $c->req;
231 my $ids = $req->param('ids');
232 my $type = $req->param('gene_type');
233 my $mode = $req->param('mode') || 'gene';
235 # $c->stash( bulk_js_menu_mode => $mode );
236 # $c->forward('bulk_js_menu');
238 $c->log->debug("submitting query with type=$type") if $c->debug;
240 $c->forward('bulk_gene_type_validate');
242 if( $c->req->param('gene_file') ) {
243 my ($upload) = $c->req->upload('gene_file');
244 # always append contents of file with newline to form input to
245 # prevent smashing identifiers together
246 $ids = "$ids\n" . $upload->slurp if $upload;
249 # Must calculate this after looking at file contents
250 # Take into account data type, because different data types for the same sequence list
251 # produce different results
252 my $sha1 = sha1_hex("$type $ids");
253 $c->stash( sha1 => $sha1 );
255 # remove leading and trailing whitespace
256 $ids = trim($ids);
258 unless ($ids) {
259 $c->throw_client_error(
260 public_message => 'At least one identifier must be given',
261 http_status => 200,
265 $c->forward('cache_gene_sequences');
267 $c->stash( bulk_download_stats => <<STATS);
268 Insert stats
269 STATS
270 $c->stash( template => '/tools/bulk/display/bulk_gene_download.mas');
273 sub cache_gene_sequences :Local :Args(0) {
274 my ($self, $c) = @_;
275 my $req = $c->req;
276 my $ids = $req->param('ids');
277 my $type = $req->param('gene_type');
278 my $sha1 = $c->stash->{sha1};
280 my $success = 0;
281 my @gene_ids = split /\s+/, $ids;
282 my $sp_person_id = $c->user() ? $c->user->get_object()->get_sp_person_id() : undef;
283 my $schema = $c->dbic_schema('Bio::Chado::Schema', 'sgn_chado', $sp_person_id);
285 my $genes_by_name =
286 $schema->resultset('Sequence::Feature')
287 ->search({
288 "me.name" => \@gene_ids,
289 'me.type_id' => $schema->get_cvterm_or_die('sequence:gene')->cvterm_id,
291 my $genes_by_synonym =
292 $schema->resultset('Sequence::Synonym')
293 ->search({ 'me.name' => \@gene_ids })
294 ->search_related('feature_synonyms')
295 ->search_related('feature',{
296 'feature.type_id' => $schema->get_cvterm_or_die('sequence:gene')->cvterm_id,
299 my %seen_mrna;
300 my @mrnas =
301 grep !$seen_mrna{$_->feature_id}++,
302 map {
303 $_->search_related( 'feature_relationship_objects', {
304 'feature_relationship_objects.type_id' => $schema->get_cvterm_or_die('relationship:part_of')->cvterm_id,
306 ->search_related( 'subject', {
307 'subject.type_id' => $schema->get_cvterm_or_die('sequence:mRNA')->cvterm_id,
309 { prefetch => 'featureprops' },
311 } ( $genes_by_name, $genes_by_synonym );
313 $c->stash(
314 gene_mrnas => \@mrnas,
315 bulk_download_success => scalar(@mrnas),
317 $c->forward('convert_sequences_to_bioperl_objects');
318 $c->forward('populate_gene_sequences');
319 $c->forward('freeze_sequences');
322 sub convert_sequences_to_bioperl_objects :Local {
323 my ($self, $c) = @_;
324 my @mrnas = @{$c->stash->{gene_mrnas}};
325 my @seqs = (map { mrna_cds_protein_sequence($_) } @mrnas );
326 $c->stash( gene_sequences => \@seqs );
329 sub freeze_sequences :Local {
330 my ($self, $c) = @_;
331 # cache the sequences
332 $self->gene_cache->freeze( $c->stash->{sha1} , $c->stash->{gene_mps} || [ ] );
335 sub populate_gene_sequences :Local {
336 my ($self, $c) = @_;
337 my $req = $c->req;
338 my $type = $req->param('gene_type');
339 my $type_index = {
340 cdna => 0,
341 cds => 1,
342 protein => 2,
344 my @mps;
346 push @mps, map {
347 my $index = $type_index->{$type};
348 $c->log->debug("found $type with index $index") if $c->debug;
350 unless (defined $index) {
351 $c->throw_client_error(
352 public_message => 'Invalid data type',
353 http_status => 200,
357 my $o = $_->[$index];
358 unless (defined $o) {
359 () # if it's not defined, we don't have that type of seq for this gene
360 } elsif( $o->isa('DBIx::Class::Row') ) {
361 $c->log->debug("Downgrading from BCS to Bioperl object " . $o->name) if $c->debug;
362 my @desc = get_descriptions($o,'plain');
363 my $g = Bio::PrimarySeq->new(
364 -id => $o->primary_id,
365 -desc => join(', ', @desc),
366 -seq => $o->seq,
368 } else {
371 } @{ $c->stash->{gene_sequences} };
372 $c->stash( gene_mps => [ @mps ] );
375 sub bulk_gene_download :Path('/bulk/gene/download') :Args(1) {
376 my ( $self, $c, $sha1 ) = @_;
378 my $app = $self->_app;
379 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk gene/));
381 $sha1 =~ s/\.(fasta|txt)$//g;
383 my $seqs = $self->gene_cache->thaw($sha1)
384 or $c->throw_404('Bulk dataset not found');
386 $c->stash->{sequences} = $seqs;
387 $c->forward('View::SeqIO');
390 sub bulk_feature :Path('/tools/bulk/tabs/feature_tab') :Args(0) {
391 my ( $self, $c ) = @_;
392 my $mode = $c->req->params->{'mode'} || 'feature';
394 # $c->stash( bulk_js_menu_mode => $mode );
396 if( my $ids = $c->req->params->{'ids'} ) {
397 $c->stash( prefill_ids => $ids );
400 #$c->forward('bulk_js_menu');
402 $c->stash( template => '/tools/bulk/tabs/feature_tab.mas');
404 # trigger cache creation
405 $self->feature_cache->get("");
408 sub bulk_feature_download :Path('/bulk/feature/download') :Args(1) {
409 my ( $self, $c, $sha1 ) = @_;
411 my $app = $self->_app;
412 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw/cache bulk feature/));
414 $sha1 =~ s/\.(fasta|txt)$//g;
416 my $seqs = $self->feature_cache->thaw($sha1)
417 or $c->throw_404('Bulk dataset not found');
419 $c->stash( sequences => $seqs->[1] );
421 $c->forward( 'View::SeqIO' );
424 sub bulk_feature_submit :Path('/bulk/feature/submit') :Args(0) {
425 my ( $self, $c ) = @_;
427 my $req = $c->req;
428 my $ids = $req->param('ids') || '';
429 my $mode = $req->param('mode') || 'feature';
431 # $c->stash( bulk_js_menu_mode => $mode );
433 if( $c->req->param('feature_file') ) {
434 my ($upload) = $c->req->upload('feature_file');
435 # always append contents of file with newline to form input to
436 # prevent smashing identifiers together
437 $ids = "$ids\n" . $upload->slurp if $upload;
440 # Must calculate this after looking at file contents
441 my $sha1 = sha1_hex($ids);
443 # remove leading and trailing whitespace
444 $ids = trim($ids);
446 unless ($ids) {
447 $c->throw_client_error(public_message => 'At least one identifier must be given');
450 $c->stash( sequence_identifiers => [ split /\s+/, $ids ] );
452 $c->stash( bulk_query => 1 );
454 $c->log->debug("fetching sequences") if $c->debug;
455 $c->forward('Controller::Sequence', 'fetch_sequences');
457 $c->log->debug("freezing sequences") if $c->debug;
458 $self->feature_cache->freeze( $sha1 , [ $c->stash->{sequence_identifiers}, $c->stash->{sequences} ] );
460 # $c->forward('bulk_js_menu');
461 $c->forward('bulk_download_stats');
463 $c->stash( template => '/tools/bulk/display/feature_download.mas', sha1 => $sha1 );
467 sub ug_build_selectbox {
468 my $self = shift;
469 my $c = shift;
470 my $filter_sub = shift;
471 my %builds;
472 my $sth = $c->dbc->dbh()->prepare(
473 q|SELECT ub.unigene_build_id,
474 ub.organism_group_id,
475 ub.build_nr,
476 g.group_id,
477 g.comment
478 FROM sgn.unigene_build as ub, sgn.groups as g
479 WHERE ub.organism_group_id=g.group_id
480 AND g.type=1
481 AND ub.status='C'
484 $sth->execute();
485 while ( my @row = $sth->fetchrow_array() ) {
486 if ($filter_sub) {
487 next unless $filter_sub->(@row);
490 my ( $unigene_build_id, $organism_group_id, $build_nr, $group_id,
491 $species )
492 = @row;
493 $species =~ s/(\S)[a-z]+\s([a-z]+)/uc($1).'. '.$2/ei
494 ; #< abbreviate the species names
495 $builds{$unigene_build_id} = "$species (build $build_nr)";
498 return simple_selectbox_html(
499 name => 'build_id',
500 label => 'Only include unigene build:',
501 choices => [
502 [ all => 'include all' ],
503 ( map [ $_, $builds{$_} ], keys %builds ),
510 sub output_list {
511 return <<OUTPUT_LIST
512 "<b>Please select the information you would like for each identifier:</b><br />
513 <input type="checkbox" name="clone_name" checked="checked" /> clone name<br />
514 <input type="checkbox" name="SGN_C" checked="checked" /> clone id (SGN-C)<br />
515 <input type="checkbox" name="SGN_T" checked="checked" /> sequence read id (SGN-T)<br />
516 <input type="checkbox" name="SGN_E" checked="checked" /> est id (SGN-E)<br />
517 <input type="checkbox" name="build_nr" checked="checked" /> unigene build nr<br />
518 <input type="checkbox" name="SGN_U" checked="checked" /> unigene id (SGN-U)<br />
519 <input type="checkbox" name="chipname" checked="checked" /> chipname<br />
520 <input type="checkbox" name="SGN_S" checked="checked" /> microarray spot id (SGN-S)<br />
521 <input type="checkbox" name="TUS" checked="checked" /> TUS number (used to order clones)<br />
522 <input type="checkbox" name="manual_annotation" /> manual annotation<br />
523 <input type="checkbox" name="automatic_annotation" /> automatic (BLAST) annotation<br />
524 <input type="checkbox" name="sequence" onclick="check_fasta_option()" /> sequence<br />
525 &nbsp;&nbsp;&nbsp;<input type="radio" name="seq_type" value="est_seq" checked="checked" /> EST sequence<br />
526 &nbsp;&nbsp;&nbsp;<input type="radio" name="seq_type" value="unigene_seq" /> Unigene sequence<br />";
528 OUTPUT_LIST
534 =head1 AUTHOR
536 Jonathan "Duke" Leto
538 =head1 LICENSE
540 This library is free software. You can redistribute it and/or modify
541 it under the same terms as Perl itself.
543 =cut
545 __PACKAGE__->meta->make_immutable;