1 package SGN
::Controller
::Bulk
;
4 use namespace
::autoclean
;
6 use Digest
::SHA1 qw
/sha1_hex/;
7 use File
::Path qw
/make_path/;
8 use CXGN
::Page
::FormattingHelpers qw
/modesel simple_selectbox_html /;
9 use CXGN
::Tools
::Text qw
/trim/;
10 use SGN
::View
::Feature qw
/mrna_cds_protein_sequence get_descriptions/;
13 BEGIN { extends
'Catalyst::Controller' }
15 has feature_cache
=> (
28 sub index : Path
('/tools/bulk/') :Args
(0) {
32 my $mode = $c->req->param("mode");
33 my $debug = $c->req->param("debug");
35 $c->stash->{mode
} = $mode;
36 $c->stash->{debug
} = $debug;
38 $c->stash->{template
} = '/tools/bulk/index.mas';
41 sub clone_tab
: Path
('/tools/bulk/tabs/clone_tab') Args
(0) {
45 $c->stash->{ug_build_select
} = $self->ug_build_selectbox($c);
47 $c->stash->{template
} = '/tools/bulk/tabs/clone_tab.mas';
51 sub array_tab
: Path
('/tools/bulk/tabs/array_tab') Args
(0) {
55 $c->stash->{ug_select
} = $self->ug_build_selectbox($c);
56 $c->stash->{output_list
} = $self->output_list();
57 $c->stash->{template
} = '/tools/bulk/tabs/array_tab.mas';
60 sub unigene_tab
: Path
('/tools/bulk/tabs/unigene_tab') Args
(0) {
64 $c->stash->{ug_build_select
} = $self->ug_build_selectbox($c);
65 $c->stash->{template
} = '/tools/bulk/tabs/unigene_tab.mas';
68 sub bac_tab
: Path
('/tools/bulk/tabs/bac_tab') Args
(0) {
72 $c->stash->{template
} = '/tools/bulk/tabs/bac_tab.mas';
75 sub bac_end_tab
: Path
('/tools/bulk/tabs/bac_end_tab') Args
(0) {
79 $c->stash->{template
} = '/tools/bulk/tabs/bac_end_tab.mas';
82 sub ftp_tab
: Path
('/tools/bulk/tabs/ftp_tab') Args
(0) {
86 $c->stash->{template
} = '/tools/bulk/tabs/ftp_tab.mas';
89 sub converter_tab
: Path
('/tools/bulk/tabs/converter_tab') Args
(0) {
93 $c->stash->{template
} = '/tools/bulk/tabs/converter_tab.mas';
96 sub _build_feature_cache
{
99 my $app = $self->_app;
100 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw
/cache bulk feature/));
102 _new_cache_file
($app, $cache_dir);
105 sub _build_gene_cache
{
108 my $app = $self->_app;
109 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw
/cache bulk gene/));
111 _new_cache_file
($app, $cache_dir);
114 sub _new_cache_file
{
115 my ($app, $cache_dir) = @_;
116 $app->log->debug("Bulk: creating new cache in $cache_dir") if $app->debug;
117 return Cache
::File
->new(
118 cache_root
=> $cache_dir,
119 default_expires
=> 'never',
120 # TODO: how big can the output of 10K identifiers be?
121 size_limit
=> 10_000_000
,
122 removal_strategy
=> 'Cache::RemovalStrategy::LRU',
123 # temporary, until we figure out locking issue
124 lock_level
=> Cache
::File
::LOCK_NFS
,
131 SGN::Controller::Bulk - Bulk Download Controller
135 Catalyst Controller which takes care of bulk downloads. Currently
136 supports features and genes.
140 sub bulk_download_stats
:Local
{
141 my ( $self, $c ) = @_;
143 $c->log->debug("calculating bulk download stats") if $c->debug;
145 my $seqs = scalar @
{$c->stash->{sequences
} || []};
146 my $seq_ids = scalar @
{$c->stash->{sequence_identifiers
} || []};
148 A total of $seqs matching features were found for $seq_ids identifiers provided.
151 $c->stash( bulk_download_stats
=> $stats );
152 $c->stash( bulk_download_success
=> $seqs );
155 # sub bulk_js_menu :Local {
156 # my ( $self, $c ) = @_;
158 # my $mode = $c->stash->{bulk_js_menu_mode} || '';
159 # # define urls of modes
161 # [ '/bulk/input.pl?mode=clone_search', 'Clone name<br />(SGN-C)' ],
162 # [ '/bulk/input.pl?mode=microarray', 'Array spot ID<br />(SGN-S)' ],
163 # [ '/bulk/input.pl?mode=unigene', 'Unigene ID<br />(SGN-U)' ],
164 # [ '/bulk/input.pl?mode=bac', 'BACs' ],
165 # [ '/bulk/input.pl?mode=bac_end', 'BAC ends' ],
166 # [ '/bulk/input.pl?mode=ftp', 'Full datasets<br />(FTP)' ],
167 # [ '/bulk/input.pl?mode=unigene_convert', 'Unigene ID Converter<br />(SGN-U)' ],
168 # [ '/bulk/feature', 'Features' ],
169 # [ '/bulk/gene', 'Genes' ],
172 # ### figure out which mode we're in ###
174 # $mode =~ /clone_search/i ? 0
175 # : $mode =~ /array/i ? 1
176 # : $mode =~ /unigene_convert/i ? 6
177 # : $mode =~ /unigene/i ? 2
178 # : $mode =~ /bac_end/i ? 4
179 # : $mode =~ /bac/i ? 3
180 # : $mode =~ /ftp/i ? 5
181 # : $mode =~ /feature/i ? 7
182 # : $mode =~ /gene/i ? 8
183 # : 0; # clone search is default
185 # $c->stash( bulk_js_menu =>
186 # $c->view('BareMason')->render( $c, '/page/page_title.mas', { title => 'Bulk download' })
188 # <div style="margin-bottom: 1em">Download Unigene or BAC information using a list of identifiers, or complete datasets with FTP.</div>
190 # .modesel( \@mode_links, $modenum ),
195 sub bulk_gene
:Path
('/bulk/gene') : Args
(0) {
196 my ( $self, $c ) = @_;
198 # $c->forward('bulk_js_menu');
200 if( my $ids = $c->req->params->{'ids'} ) {
201 $c->stash( prefill_ids
=> $ids );
204 $c->stash( template
=> '/bulk_gene.mas');
207 sub gene_tab
: Path
('/tools/bulk/tabs/gene_tab') Args
(0) {
211 $c->stash->{template
} = '/tools/bulk/tabs/gene_tab.mas';
215 sub bulk_gene_type_validate
:Local
:Args
(0) {
216 my ( $self, $c ) = @_;
218 my $type = $req->param('gene_type');
220 unless ($type && $type ~~ [qw
/cdna cds protein/]) {
221 $c->throw_client_error(
222 public_message
=> 'Invalid data type chosen',
228 sub bulk_gene_submit
:Path
('/bulk/gene/submit') :Args
(0) {
229 my ( $self, $c ) = @_;
231 my $ids = $req->param('ids');
232 my $type = $req->param('gene_type');
233 my $mode = $req->param('mode') || 'gene';
235 # $c->stash( bulk_js_menu_mode => $mode );
236 # $c->forward('bulk_js_menu');
238 $c->log->debug("submitting query with type=$type") if $c->debug;
240 $c->forward('bulk_gene_type_validate');
242 if( $c->req->param('gene_file') ) {
243 my ($upload) = $c->req->upload('gene_file');
244 # always append contents of file with newline to form input to
245 # prevent smashing identifiers together
246 $ids = "$ids\n" . $upload->slurp if $upload;
249 # Must calculate this after looking at file contents
250 # Take into account data type, because different data types for the same sequence list
251 # produce different results
252 my $sha1 = sha1_hex
("$type $ids");
253 $c->stash( sha1
=> $sha1 );
255 # remove leading and trailing whitespace
259 $c->throw_client_error(
260 public_message
=> 'At least one identifier must be given',
265 $c->forward('cache_gene_sequences');
267 $c->stash( bulk_download_stats
=> <<STATS);
270 $c->stash( template
=> '/tools/bulk/display/bulk_gene_download.mas');
273 sub cache_gene_sequences
:Local
:Args
(0) {
276 my $ids = $req->param('ids');
277 my $type = $req->param('gene_type');
278 my $sha1 = $c->stash->{sha1
};
281 my @gene_ids = split /\s+/, $ids;
282 my $schema = $c->dbic_schema('Bio::Chado::Schema', 'sgn_chado');
285 $schema->resultset('Sequence::Feature')
287 "me.name" => \
@gene_ids,
288 'me.type_id' => $schema->get_cvterm_or_die('sequence:gene')->cvterm_id,
290 my $genes_by_synonym =
291 $schema->resultset('Sequence::Synonym')
292 ->search({ 'me.name' => \
@gene_ids })
293 ->search_related('feature_synonyms')
294 ->search_related('feature',{
295 'feature.type_id' => $schema->get_cvterm_or_die('sequence:gene')->cvterm_id,
300 grep !$seen_mrna{$_->feature_id}++,
302 $_->search_related( 'feature_relationship_objects', {
303 'feature_relationship_objects.type_id' => $schema->get_cvterm_or_die('relationship:part_of')->cvterm_id,
305 ->search_related( 'subject', {
306 'subject.type_id' => $schema->get_cvterm_or_die('sequence:mRNA')->cvterm_id,
308 { prefetch
=> 'featureprops' },
310 } ( $genes_by_name, $genes_by_synonym );
313 gene_mrnas
=> \
@mrnas,
314 bulk_download_success
=> scalar(@mrnas),
316 $c->forward('convert_sequences_to_bioperl_objects');
317 $c->forward('populate_gene_sequences');
318 $c->forward('freeze_sequences');
321 sub convert_sequences_to_bioperl_objects
:Local
{
323 my @mrnas = @
{$c->stash->{gene_mrnas
}};
324 my @seqs = (map { mrna_cds_protein_sequence
($_) } @mrnas );
325 $c->stash( gene_sequences
=> \
@seqs );
328 sub freeze_sequences
:Local
{
330 # cache the sequences
331 $self->gene_cache->freeze( $c->stash->{sha1
} , $c->stash->{gene_mps
} || [ ] );
334 sub populate_gene_sequences
:Local
{
337 my $type = $req->param('gene_type');
346 my $index = $type_index->{$type};
347 $c->log->debug("found $type with index $index") if $c->debug;
349 unless (defined $index) {
350 $c->throw_client_error(
351 public_message
=> 'Invalid data type',
356 my $o = $_->[$index];
357 unless (defined $o) {
358 () # if it's not defined, we don't have that type of seq for this gene
359 } elsif( $o->isa('DBIx::Class::Row') ) {
360 $c->log->debug("Downgrading from BCS to Bioperl object " . $o->name) if $c->debug;
361 my @desc = get_descriptions
($o,'plain');
362 my $g = Bio
::PrimarySeq
->new(
363 -id
=> $o->primary_id,
364 -desc
=> join(', ', @desc),
370 } @
{ $c->stash->{gene_sequences
} };
371 $c->stash( gene_mps
=> [ @mps ] );
374 sub bulk_gene_download
:Path
('/bulk/gene/download') :Args
(1) {
375 my ( $self, $c, $sha1 ) = @_;
377 my $app = $self->_app;
378 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw
/cache bulk gene/));
380 $sha1 =~ s/\.(fasta|txt)$//g;
382 my $seqs = $self->gene_cache->thaw($sha1)
383 or $c->throw_404('Bulk dataset not found');
385 $c->stash->{sequences
} = $seqs;
386 $c->forward('View::SeqIO');
389 sub bulk_feature
:Path
('/tools/bulk/tabs/feature_tab') :Args
(0) {
390 my ( $self, $c ) = @_;
391 my $mode = $c->req->params->{'mode'} || 'feature';
393 # $c->stash( bulk_js_menu_mode => $mode );
395 if( my $ids = $c->req->params->{'ids'} ) {
396 $c->stash( prefill_ids
=> $ids );
399 #$c->forward('bulk_js_menu');
401 $c->stash( template
=> '/tools/bulk/tabs/feature_tab.mas');
403 # trigger cache creation
404 $self->feature_cache->get("");
407 sub bulk_feature_download
:Path
('/bulk/feature/download') :Args
(1) {
408 my ( $self, $c, $sha1 ) = @_;
410 my $app = $self->_app;
411 my $cache_dir = $app->path_to($app->tempfiles_subdir(qw
/cache bulk feature/));
413 $sha1 =~ s/\.(fasta|txt)$//g;
415 my $seqs = $self->feature_cache->thaw($sha1)
416 or $c->throw_404('Bulk dataset not found');
418 $c->stash( sequences
=> $seqs->[1] );
420 $c->forward( 'View::SeqIO' );
423 sub bulk_feature_submit
:Path
('/bulk/feature/submit') :Args
(0) {
424 my ( $self, $c ) = @_;
427 my $ids = $req->param('ids') || '';
428 my $mode = $req->param('mode') || 'feature';
430 # $c->stash( bulk_js_menu_mode => $mode );
432 if( $c->req->param('feature_file') ) {
433 my ($upload) = $c->req->upload('feature_file');
434 # always append contents of file with newline to form input to
435 # prevent smashing identifiers together
436 $ids = "$ids\n" . $upload->slurp if $upload;
439 # Must calculate this after looking at file contents
440 my $sha1 = sha1_hex
($ids);
442 # remove leading and trailing whitespace
446 $c->throw_client_error(public_message
=> 'At least one identifier must be given');
449 $c->stash( sequence_identifiers
=> [ split /\s+/, $ids ] );
451 $c->stash( bulk_query
=> 1 );
453 $c->log->debug("fetching sequences") if $c->debug;
454 $c->forward('Controller::Sequence', 'fetch_sequences');
456 $c->log->debug("freezing sequences") if $c->debug;
457 $self->feature_cache->freeze( $sha1 , [ $c->stash->{sequence_identifiers
}, $c->stash->{sequences
} ] );
459 # $c->forward('bulk_js_menu');
460 $c->forward('bulk_download_stats');
462 $c->stash( template
=> '/tools/bulk/display/feature_download.mas', sha1
=> $sha1 );
466 sub ug_build_selectbox
{
469 my $filter_sub = shift;
471 my $sth = $c->dbc->dbh()->prepare(
472 q
|SELECT ub
.unigene_build_id
,
473 ub
.organism_group_id
,
477 FROM sgn
.unigene_build as ub
, sgn
.groups as g
478 WHERE ub
.organism_group_id
=g
.group_id
484 while ( my @row = $sth->fetchrow_array() ) {
486 next unless $filter_sub->(@row);
489 my ( $unigene_build_id, $organism_group_id, $build_nr, $group_id,
492 $species =~ s/(\S)[a-z]+\s([a-z]+)/uc($1).'. '.$2/ei
493 ; #< abbreviate the species names
494 $builds{$unigene_build_id} = "$species (build $build_nr)";
497 return simple_selectbox_html
(
499 label
=> 'Only include unigene build:',
501 [ all
=> 'include all' ],
502 ( map [ $_, $builds{$_} ], keys %builds ),
511 "<b>Please select the information you would like for each identifier:</b><br />
512 <input type="checkbox" name="clone_name" checked="checked" /> clone name<br />
513 <input type="checkbox" name="SGN_C" checked="checked" /> clone id (SGN-C)<br />
514 <input type="checkbox" name="SGN_T" checked="checked" /> sequence read id (SGN-T)<br />
515 <input type="checkbox" name="SGN_E" checked="checked" /> est id (SGN-E)<br />
516 <input type="checkbox" name="build_nr" checked="checked" /> unigene build nr<br />
517 <input type="checkbox" name="SGN_U" checked="checked" /> unigene id (SGN-U)<br />
518 <input type="checkbox" name="chipname" checked="checked" /> chipname<br />
519 <input type="checkbox" name="SGN_S" checked="checked" /> microarray spot id (SGN-S)<br />
520 <input type="checkbox" name="TUS" checked="checked" /> TUS number (used to order clones)<br />
521 <input type="checkbox" name="manual_annotation" /> manual annotation<br />
522 <input type="checkbox" name="automatic_annotation" /> automatic (BLAST) annotation<br />
523 <input type="checkbox" name="sequence" onclick="check_fasta_option()" /> sequence<br />
524 <input type="radio" name="seq_type" value="est_seq" checked="checked" /> EST sequence<br />
525 <input type="radio" name="seq_type" value="unigene_seq" /> Unigene sequence<br />";
539 This library is free software. You can redistribute it and/or modify
540 it under the same terms as Perl itself.
544 __PACKAGE__->meta->make_immutable;