Merge pull request #5215 from solgenomics/5154_analyses_dataset_details
[sgn.git] / lib / SGN / Feature / GBrowse2 / DataSource.pm
blob3646bdb0cd0794582a2446a327750d157463a0c5
1 package SGN::Feature::GBrowse2::DataSource;
2 use Moose;
3 use namespace::autoclean;
4 use Scalar::Util qw/ blessed /;
5 use Text::ParseWords;
6 use Path::Class ();
7 use URI::Escape;
8 use Class::Load ':all';
9 use Bio::Range;
11 extends 'SGN::Feature::GBrowse::DataSource';
13 has 'xref_discriminator' => (
14 is => 'ro',
15 isa => 'CodeRef',
16 lazy_build => 1,
17 ); sub _build_xref_discriminator {
18 my ( $self ) = @_;
19 return $self->gbrowse->config_master->code_setting( $self->name => 'restrict_xrefs' )
20 || sub { 1 }
23 has 'debug' => (
24 is => 'rw',
25 isa => 'Bool',
26 default => 0,
29 sub _build__databases {
30 my $self = shift;
31 local $_; #< Bio::Graphics::* sloppily clobbers $_
32 my $conf = $self->config;
33 my @dbs = grep /:database$/i, $self->config->configured_types;
34 return {
35 map {
36 my $dbname = $_;
37 my $adaptor = $conf->setting( $dbname => 'db_adaptor' )
38 or confess "no db adaptor for [$_] in ".$self->path->basename;
39 my @args = shellwords( $conf->setting( $dbname => 'db_args' ));
40 my $conn = eval {
41 Class::Load::load_class( $adaptor );
42 local $SIG{__WARN__} = sub { warn @_ if $self->debug };
43 $adaptor->new( @args );
45 if( $@ ) {
46 warn $self->gbrowse->feature_name.": database [$dbname] in ".$self->path->basename." not available\n";
47 warn $@ if $self->debug;
49 } else {
50 $dbname =~ s/:database$//;
51 $dbname => $conn
53 } @dbs
58 # can accept either plaintext queries, or hashrefs describing features in the DB to search for
59 sub xrefs {
60 my ($self, $q) = @_;
62 return unless $self->xref_discriminator->($q);
64 if( my $ref = ref $q ) {
65 return unless $ref eq 'HASH';
67 # search for features in all our DBs
68 return $self->_make_feature_xrefs([
69 map $_->features( %$q ), $self->databases
70 ]);
71 } else {
72 # search for a region on a reference sequence specified like seq_name:23423..66666
73 if( my ($ref_name,$start,$end) = $q =~ /^ \s* ([^:]+) \s* : (\d+) \s* .. \s* (\d+) $/x) {
75 sub _uniq_features(@) {
76 my %seen;
77 grep !($seen{ $_->seq_id.':'.$_->name.':'.$_->start.'..'.$_->end }++), @_;
80 return
81 # make xrefs for the given range of each of the ref features
82 map $self->_make_region_xref({
83 features => [$_],
84 range => Bio::Range->new( -start => $start, -end => $end )
85 }),
86 # remove any duplicate ref features
87 _uniq_features
88 # make sure they are all actually reference features
89 grep { $_->seq_id eq $_->display_name }
90 # search for features with the ref seq name
91 map $self->_search_db( $_, $ref_name ),
92 # for each database
93 $self->databases;
95 } else {
96 # search for features by text in all our DBs
97 return $self->_make_feature_xrefs([
98 map $self->_search_db($_,$q),
99 $self->databases
104 return;
106 sub _search_db {
107 my ( $self, $db, $name ) = @_;
108 my $f =
109 $db->can('get_features_by_alias')
110 || $db->can('get_feature_by_name')
111 or return;
112 return $db->$f( $name );
115 sub _make_feature_xrefs {
116 my ( $self, $features ) = @_;
118 # group the features by source sequence
119 my %src_sequence_matches;
120 push @{$src_sequence_matches{$_->seq_id}{features}}, $_ for @$features;
122 # group the features for each src sequence into non-overlapping regions
123 for my $src ( values %src_sequence_matches ) {
125 # if the features are Bio::DB::GFF::Features, union() is buggy, so convert them
126 # to Bio::Range to perform the union calculation
127 my $ranges = $src->{features}->[0]->isa('Bio::DB::GFF::Feature')
128 ? [ map Bio::Range->new( -start => $_->start, -end => $_->end ), @{$src->{features}} ]
129 : $src->{features};
130 my @regions = map { {range => $_} } Bio::Range->unions( @$ranges );
132 # assign the features to each region
133 FEATURE:
134 foreach my $feature (@{$src->{features}}) {
135 foreach my $region (@regions) {
136 if( $feature->overlaps( $region->{range} )) {
137 push @{$region->{features}}, $feature;
138 next FEATURE;
142 $src->{regions} = \@regions;
143 delete $src->{features}; #< not needed anymore
146 # make CrossReference object for each region
147 return map $self->_make_region_xref( $_ ),
148 map @{$_->{regions}},
149 values %src_sequence_matches;
152 sub _make_cross_ref {
153 shift;
154 return (__PACKAGE__.'::CrossReference')->new( @_ );
157 sub _make_region_xref {
158 my ( $self, $region ) = @_;
160 my $features = $region->{features};
161 my $first_feature = $features->[0];
162 my $range = $region->{range};
164 my ( $start, $end ) = ( $range->start, $range->end );
165 ( $start, $end ) = ( $end, $start ) if $start > $end;
167 my @highlight =
168 @$features == 1 # highlight our feature or region if we can
169 ? ( h_feat => $first_feature->display_name )
170 : ( h_region => $first_feature->seq_id.":$start..$end" );
172 my $is_whole_sequence = # is the region the whole reference sequence?
173 ( scalar @$features == 1
174 && $first_feature->seq_id eq $first_feature->display_name
175 && $start == 1
176 && $first_feature->can('length') && $end == $first_feature->length
179 my $region_string =
180 $is_whole_sequence
181 # if so, just use the seq name as the region string
182 ? $first_feature->seq_id
183 # otherwise, print coords on the region string
184 : $first_feature->seq_id.":$start..$end";
186 my @features_to_print = grep $_->display_name ne $_->seq_id, @{$region->{features}};
188 return $self->_make_cross_ref(
189 text => join( ' ',
190 $self->description.' - ',
191 "view $region_string",
192 ( @features_to_print
193 ? ' ('.join(', ', map $_->display_name || $_->primary_id, @features_to_print).')'
194 : ()
197 url =>
198 $self->view_url({
199 name => $region_string,
200 @highlight,
202 preview_image_url =>
203 $self->image_url({
204 name => $region_string,
205 format => 'GD',
208 seqfeatures => $features,
209 seq_id => $first_feature->seq_id,
210 is_whole_sequence => $is_whole_sequence,
211 start => $start,
212 end => $end,
213 feature => $self->gbrowse,
214 data_source => $self,
218 sub image_url {
219 my ( $self, $q ) = @_;
220 $q ||= {};
221 $q->{width} ||= 600;
222 $q->{keystyle} ||= 'between',
223 $q->{grid} ||= 1;
224 return $self->_url( 'gbrowse_img', $q );
228 package SGN::Feature::GBrowse2::DataSource::CrossReference;
229 use Moose;
230 use MooseX::Types::URI qw/ Uri /;
231 extends 'SGN::SiteFeatures::CrossReference';
233 with 'SGN::SiteFeatures::CrossReference::WithPreviewImage',
234 'SGN::SiteFeatures::CrossReference::WithSeqFeatures';
236 has 'data_source' => ( is => 'ro', required => 1 );
238 has 'is_whole_sequence' => (
239 is => 'ro',
240 isa => 'Bool',
241 documentation => 'true if this cross-reference points to the entire reference sequence',
244 has 'seq_id' => (
245 is => 'ro',
246 isa => 'Str',
249 has $_ => ( is => 'ro', isa => 'Int' )
250 for 'start', 'end';
252 __PACKAGE__->meta->make_immutable;