can download plant phenotype data in the same way as plot phenotype data
[sgn.git] / lib / SGN / Controller / Sequence.pm
blob43914d7a003a9b8921e181032b5bf46672f789d8
1 package SGN::Controller::Sequence;
3 =head1 NAME
5 SGN::Controller::Sequence - Catalyst controller for dealing with sequences
7 =head1 DESCRIPTION
9 Right now, only knows how to fetch sequences from BCS features.
11 =cut
13 use Moose;
14 use namespace::autoclean;
16 use HTML::Entities;
18 BEGIN { extends 'Catalyst::Controller' }
20 =head1 PUBLIC ACTIONS
22 =head2 gmodrpc_fetch_seq
24 Public path: /gmodrpc/v1.1/fetch/seq/<name or id>.fasta?start..end
26 Just forwards to api_v1_single_sequence
28 =cut
30 sub gmodrpc_fetch_seq :Path('/gmodrpc/v1.1/fetch/seq') :Args(1) {
31 my ( $self, $c, $name ) = @_;
32 $c->forward( 'api_v1_single_sequence', [ $name ] );
35 =head2 api_v1_single_sequence
37 Public path: /api/v1/sequence/<name or id>.fasta?start..end
39 ?start..end is optional. If start E<gt> end, does reverse complement.
41 =cut
43 sub api_v1_single_sequence :Path('/api/v1/sequence/download/single') :Args(1) {
44 my ( $self, $c, $name ) = @_;
46 if( $name =~ s/\.([^\.]+)$// ) {
47 $c->stash->{seqio_format} = $1;
50 if( my $kw = $c->request->query_keywords ) {
51 $name .= ":$kw";
54 $c->stash->{sequence_identifiers} = [ $name ];
55 $c->forward( 'fetch_sequences' );
56 $c->forward( 'download_sequences' );
59 =head2 api_v1_multi_sequence
61 Public path: /api/v1/sequence/download/multi
63 Query params:
65 s: multi-valued, holds identifiers to download. Each
66 identifier may have a :start..end appended to take a
67 subsequence. (Reverse complement if start > end).
69 =cut
71 sub api_v1_multi_sequence :Path('/api/v1/sequence/download/multi') :Args(0) {
72 my ( $self, $c ) = @_;
74 $c->stash->{sequence_identifiers} = $c->req->parameters->{'s'};
75 $c->stash->{seqio_format} = $c->req->parameters->{'format'};
77 $c->forward( 'fetch_sequences' );
78 $c->forward( 'download_sequences' );
81 =head1 PRIVATE ACTIONS
83 =head2 download_sequences
85 =cut
87 sub download_sequences :Private {
88 my ( $self, $c ) = @_;
90 # set an appropriate download filename, and the appropriate
91 # headers to trigger a file download
92 $c->stash->{download_filename} = $c->stash->{sequences} && @{$c->stash->{sequences}} == 1
93 ? $c->stash->{sequences}->[0]->id.'.fasta'
94 : 'SGN_sequence_download.fasta';
95 $c->forward('/download/set_download_headers');
97 $c->forward( 'View::SeqIO' );
100 =head2 fetch_sequences
102 =cut
104 sub fetch_sequences :Private {
105 my ( $self, $c ) = @_;
106 my $sequence_idents = $c->stash->{sequence_identifiers};
107 $sequence_idents = [ $sequence_idents ] unless ref $sequence_idents;
109 # parse out region descriptions in any of the sequence idents
110 for my $id (@$sequence_idents) {
111 # full format looks like myseqID1123:455..43255. if start is
112 # greater than end, means revcom
113 if( $id =~ s/ : ([\d,]+) \.\. ([\d,]+) $ //x ) {
114 my ( $start, $end ) = ( $1, $2 );
115 s/,//g for $start, $end;
116 my $strand = '+';
117 if( $start > $end ) {
118 ( $start, $end ) = ( $end, $start );
119 $strand = '-';
121 $id = [ $id, $strand, $start, $end ];
122 } else {
123 $id = [ $id, undef, undef, undef ];
127 # find the feature(s) for each ID and convert them to
128 # Bio::PrimarySeqs
129 my @sequences;
130 for ( @$sequence_idents ) {
131 my ( $id, $strand, $start, $end ) = @$_;
132 my $rs = $self->_feature_rs( $c, $id );
134 my $found = 0;
135 while( my $feature = $rs->next ) {
136 push @sequences, $self->_feature_to_primaryseq( $feature, $strand, $start, $end );
137 $found = 1;
139 # if there is only one sequence identifier, not finding it should throw a 404
140 # otherwise, ignore it so bulk downloads via the multi api still work
141 # Bulk queries always ignore all invalid identifiers
142 if( @$sequence_idents == 1 and !$c->stash->{bulk_query} ){
143 $found or $c->throw_404( sprintf('No sequence found with id "%s"', encode_entities( $id )) );
147 $c->stash->{sequences} = \@sequences;
150 ######## HELPERS
152 # searches for features given a name or ID. Assumes it's a feature ID
153 # if all-numeric.
154 sub _feature_rs {
155 my ( $self, $c, $id ) = @_;
157 my $features_rs =
158 $c->dbic_schema('Bio::Chado::Schema','sgn_chado')
159 ->resultset('Sequence::Feature');
161 if( $id =~ /\D/ ) {
162 $features_rs = $features_rs->search({ name => $id });
163 } else {
164 $features_rs = $features_rs->search({ feature_id => $id });
167 return $features_rs;
170 # converts a feature to a Bio::PrimarySeq, applying a subsequence and
171 # revcom if necessary
172 sub _feature_to_primaryseq {
173 my ( $self, $feature, $strand, $start, $end ) = @_;
175 my $seq_id = $feature->name || 'feature_'.$feature->feature_id;
176 if( $start && $end ) {
177 $seq_id .= $strand && $strand eq '-' ? ":$end..$start" : ":$start..$end";
180 my $seq = Bio::PrimarySeq->new(
181 -id => $seq_id,
182 ( $feature->desc ? ( -desc => $feature->desc ) : () ),
183 -seq => $feature->subseq( ($start || 1), ($end || $feature->length) ),
185 $seq = $seq->revcom if $strand && $strand eq '-';
187 return $seq;
190 __PACKAGE__->meta->make_immutable;