1 package SGN
::View
::Feature
;
8 use List
::Util qw
/ sum /;
9 use List
::MoreUtils qw
/ any uniq /;
13 use CXGN
::Tools
::Text qw
/commify_number/;
14 use CXGN
::Tools
::Identifiers
;
18 related_stats feature_table
21 organism_link feature_length
22 mrna_cds_protein_sequence
23 description_featureprop_types
33 cvterm_name
( shift->type, @_ );
37 my ($cvt, $caps) = @_;
38 ( my $n = $cvt->name ) =~ s/_/ /g;
40 $n =~ s/(\S+)/lc($1) eq $1 ? ucfirst($1) : $1/e;
45 sub description_featureprop_types
{
46 shift->result_source->schema
47 ->resultset('Cv::Cvterm')
50 'functional_description',
57 sub get_descriptions
{
58 my ( $feature, $plain ) = @_;
61 description_featureprop_types
( $feature )
62 ->get_column('cvterm_id')
66 $feature->search_related('featureprops',
67 { type_id
=> { -in => $desc_types } },
68 { order_by
=> 'rank' },
74 return @descriptions if defined $plain;
76 s/(\S+)/my $id = $1; CXGN::Tools::Identifiers::link_identifier($id) || $id/ge for @descriptions;
81 sub location_string_html
{
82 my ( $id, $start, $end, $strand ) = @_;
85 $id = feature_link
($loc->srcfeature);
86 $start = $loc->fmin+1;
88 $strand = $loc->strand;
90 ( $start, $end ) = ( $end, $start ) if $strand && $strand == -1;
91 return "$id:$start..$end";
95 my ( $id, $start, $end, $strand ) = @_;
98 $id = $loc->srcfeature->name;
99 $start = $loc->fmin+1;
101 $strand = $loc->strand;
103 ( $start, $end ) = ( $end, $start ) if $strand && $strand == -1;
104 return "$id:$start..$end";
107 sub location_list_html
{
108 my ($feature, $featurelocs) = @_;
109 my @coords = map { location_string_html
($_) }
110 ( #$featurelocs ? $featurelocs->all
111 # : $feature->featureloc_features->all );
112 $featurelocs ?
$featurelocs->search({locgroup
=> 0,},)->all
113 : $feature->featureloc_features->search({locgroup
=> 0,},)->all)
114 or return '<span class="ghosted">none</span>';
118 my ($feature, $featurelocs) = @_;
119 print STDERR
"\n\nLOCATON LIST\n\n";
120 return map { ($_->srcfeature ?
$_->srcfeature->name : '<span class="ghosted">null</span>') . ':' . ($_->fmin+1) . '..' . $_->fmax }
121 ( #$featurelocs ? $featurelocs->all
122 # : $feature->featureloc_features->all );
123 $featurelocs ?
$featurelocs->search({locgroup
=> 0,},)->all
124 : $feature->featureloc_features->search({locgroup
=> 0,},)->all );
130 my $total = scalar @
$features;
131 for my $f (@
$features) {
132 $stats->{cvterm_link
($f->type)}++;
135 for my $k (sort keys %$stats) {
136 push @
$data, [ $stats->{$k}, $k ];
138 if( 1 < scalar keys %$stats ) {
139 push @
$data, [ $total, "<b>Total</b>" ];
145 my ( $features, $reference_sequence, $omit_columns ) = @_;
147 { no warnings
'uninitialized';
148 $omit_columns ||= [];
149 $omit_columns = [$omit_columns] unless ref $omit_columns eq 'ARRAY';
153 for my $f (sort { $a->name cmp $b->name } @
$features) {
155 $reference_sequence ?
( srcfeature_id
=> $reference_sequence->feature_id )
158 my @locations = $f->search_related('featureloc_features', {
162 { order_by
=> 'feature_id' }
166 # Add a row for every featureloc
167 my $first_location = 0;
168 for my $loc (@locations) {
169 my $ref = $loc->srcfeature;
170 my ($start,$end) = ($loc->fmin+1, $loc->fmax);
174 : ( organism_link
( $f->organism ),
175 cvterm_link
($f->type),
179 ($ref ?
$ref->name : '<span class="ghosted">null</span>').":$start..$end",
180 commify_number
( feature_length
( $f, $loc ) ) || undef,
181 $loc->strand ?
( $loc->strand == 1 ?
'+' : '-' ) : undef,
182 $loc->phase || undef,
187 my $nl = 'not located';
188 if( $reference_sequence ) {
189 $nl .= " on ".encode_entities
( $reference_sequence->name )
192 organism_link
( $f->organism ),
193 cvterm_link
($f->type),
195 qq|<span
class="ghosted">$nl</span
>|,
196 commify_number
( feature_length
( $f, undef ) ) || undef,
203 my @headings = ( "Organism", "Type", "Name", "Location", "Length", "Strand", "Phase" );
205 my @align = map 'l', @headings;
207 # omit any columns that are *all* undefined, or that we were
209 my @cols_to_omit = uniq
(
211 my %heading_index = do { my $i = 0; map { lc $_ => $i++ } @headings };
213 my $i = $heading_index{lc $_};
214 defined $i or die "$_ column not found";
220 for my $t ( [\
@headings], \
@data, [\
@align] ) {
221 for my $row ( @
$t ) {
222 splice( @
$row, $_, 1 ) for @cols_to_omit;
226 # make html for any other undef cells
229 $_ = '<span class="ghosted">n/a</span>' unless defined;
233 return ( headings
=> \
@headings, data
=> \
@data, __align
=> \
@align, __alt_freq
=> 0 , __border
=> 1 );
236 # try to figure out the "length" of a feature, which will vary for different features
238 my ( $feature, $location ) = @_;
240 $location = $location->first
241 if $location && $location->isa('DBIx::Class::ResultSet');
243 my $type = $feature->type;
244 my $type_name = $type->name;
246 # firstly, for any feature, trust the length of its residues if it has them
247 if( my $seqlen = $feature->seqlen || $feature->residues && length $feature->residues ) {
250 # for some features, can say that its length is the length of its location
251 elsif( any
{ $type_name eq $_ } qw( exon gene ) ) {
252 return unless $location;
253 return $location->fmax - $location->fmin;
258 sub _feature_search_string
{
260 return '' unless $fl;
261 return ($fl->srcfeature ?
$fl->srcfeature->name : '<span class="ghosted">null</span>') . ':'. ($fl->fmin+1) . '..' . $fl->fmax;
265 ### XXX TODO: A lot of these _link and sequence functions need to be
266 ### moved to controller code.
270 return '<span class="ghosted">null</span>' unless $feature;
271 my $id = $feature->feature_id;
272 my $name = $feature->name;
273 return qq{<a href
="/feature/$id/details">$name</a
>};
278 my $id = $organism->organism_id;
279 my $species = $organism->species;
280 return qq{<a
class="species_binomial" href
="/chado/organism.pl?organism_id=$id">$species</a
>};
284 my ( $cvt, $caps ) = @_;
285 my $name = cvterm_name
( $cvt, $caps );
287 return qq{<a href
="/cvterm/$id/view">$name</a
>};
290 sub mrna_cds_protein_sequence
{
291 my ($mrna_feature) = @_;
293 # if we were actually passed a polypeptide, get its mrna(s) and
295 if( $mrna_feature->type->name eq 'polypeptide' ) {
297 map mrna_cds_protein_sequence
( $_ ),
298 $mrna_feature->search_related('feature_relationship_subjects',
300 -in => $mrna_feature->result_source->schema
301 ->resultset('Cv::Cvterm')
302 ->search({name
=> 'derives_from'})
303 ->get_column('cvterm_id')
308 ->search_related('object');
311 my $description = join ', ', get_descriptions
( $mrna_feature, 'no html' );
312 my $peptide = _peptides_rs
( $mrna_feature )->first;
314 my @exon_locations = _exon_rs
( $mrna_feature )->all;
315 unless( @exon_locations ) {
316 # cannot calculate the cds and protein without exons, because
317 # UTRs can sometimes have introns in them. without knowing
318 # the exon structure, we don't know how much to cut off of the
321 $mrna_feature->subseq(1,1) ?
$mrna_feature : undef,
323 $peptide && $peptide->subseq(1,1) ?
$peptide : undef,
327 my $mrna_seq = $mrna_feature->subseq(1,1) ?
$mrna_feature : _make_mrna_seq
( $mrna_feature, $description, \
@exon_locations );
328 my $peptide_loc = $peptide && _peptide_loc
($peptide)->first;
330 # just return the mrna seq and nothing else if we have no peptide
331 # or the peptide is not located
332 unless( $peptide && $peptide_loc ) {
333 return [ $mrna_seq, undef, undef ] unless $peptide && $peptide_loc;
336 my $cds_seq = Bio
::PrimarySeq
->new(
337 -id
=> $mrna_seq->display_name,
338 -desc
=> $description,
339 -seq
=> $mrna_seq->seq,
341 my ( $trim_from_left, $trim_from_right ) = _calculate_cdna_utr_lengths
(
342 _loc2range
( $peptide_loc ),
343 [ map _loc2range
( $_), @exon_locations ],
346 if( $trim_from_left || $trim_from_right ) {
347 $cds_seq = $cds_seq->trunc( 1+$trim_from_left, $mrna_seq->length - $trim_from_right );
349 ##my $protein_seq = $cds_seq->translate;
350 ##Get the protein sequence from the peptide object (stored in the database in the residues field of the feature table)
351 ## No need to try to translade the CDS
352 my $protein_seq = Bio
::PrimarySeq
->new(
353 -id
=> $mrna_seq->display_name,
354 -desc
=> $description,
355 -seq
=> $peptide->residues,
358 return [ $mrna_seq, $cds_seq, $protein_seq ];
362 my ( $mrna_feat, $description, $exons ) = @_;
364 # NOTE: doing this subseq math in 0-based coords
365 my $span_start = $exons->[0]->fmin;
366 my $span_end = $exons->[-1]->fmax-1;
368 # 0 1 2 3 4 5 6 7 8 interbase (Chado)
370 # 0 1 2 3 4 5 6 7 0-based (substr)
371 # 1 2 3 4 5 6 7 8 1-based (BioPerl)
373 # recall: the exons are in sorted order
374 my $span_seq = $exons->[0]->srcfeature->subseq( $span_start+1, $span_end+1 ); #< 1-based
375 my $mrna_sequence = join '', map { substr($span_seq, $_->fmin - $span_start, $_->fmax - $_->fmin ) } @
$exons;
377 my $mrna_seq = Bio
::PrimarySeq
->new(
378 -id
=> $mrna_feat->name,
379 -desc
=> $description,
380 -seq
=> $mrna_sequence,
383 $mrna_seq = $mrna_seq->revcom if $exons->[0]->strand == -1;
390 return $loc->to_range if $loc->can('to_range');
391 return Bio
::Range
->new(
392 -start
=> $loc->fmin + 1,
394 -strand
=> $loc->strand,
398 # given the range of the peptide and the ranges of each of the exons
399 # (as Bio::RangeI's), calculate how many bases should be trimmed off
400 # of each end of the cDNA (i.e. mRNA) seq to get the CDS seq
401 sub _calculate_cdna_utr_lengths
{
402 my ( $peptide, $exons ) = @_;
404 my ( $trim_left, $trim_right ) = ( 0, 0 );
406 # calculate trim_fmin if necessary
407 if( $exons->[0]->start < $peptide->start ) {
412 $_->overlaps($peptide)
413 ?
$peptide->start - $_->start
416 grep $_->start < $peptide->start, # find exons that overlap the UTR
420 # calculate trim_fmax if necessary
421 if( $exons->[-1]->end > $peptide->end ) {
425 $_->overlaps($peptide)
426 ?
$_->end - $peptide->end
429 grep $_->end > $peptide->end, # find exons that overlap the UTR
433 return $exons->[0]->strand == -1 ?
($trim_right, $trim_left) : ( $trim_left, $trim_right );
437 my ( $mrna_feature ) = @_;
440 ->feature_relationship_objects({
442 -in => _cvterm_rs
( $mrna_feature, 'relationship', 'derives_from' )
443 ->get_column('cvterm_id')
447 ->search_related( 'subject', {
448 'subject.type_id' => {
449 -in => _cvterm_rs
( $mrna_feature, 'sequence', 'polypeptide' )
450 ->get_column('cvterm_id')
457 $rs->search_related( 'featureloc_features', {
458 #srcfeature_id => { -not => undef },
459 srcfeature_id
=> { -not => undef }, locgroup
=> 0
461 { # Don't prefetch srcfeatures, it significantly slows down the query
462 # prefetch => 'srcfeature',
469 my ( $mrna_feature ) = @_;
471 my $rs = $mrna_feature->feature_relationship_objects({
473 -in => _cvterm_rs
( $mrna_feature, 'relationship', 'part_of' )
474 ->get_column('cvterm_id')
481 ->search_related( 'subject', {
482 'subject.type_id' => {
483 -in => _cvterm_rs
( $mrna_feature, 'sequence', 'exon' )
484 ->get_column('cvterm_id')
489 prefetch
=> 'featureloc_features',
491 ->search_related( 'featureloc_features', {
492 #srcfeature_id => { -not => undef },
493 srcfeature_id
=> { -not => undef }, locgroup
=> 0
496 prefetch
=> 'srcfeature',
504 my ( $row, $cv, $cvt ) = @_;
506 return $row->result_source->schema
507 ->resultset('Cv::Cv')
508 ->search({ 'me.name' => $cv })
509 ->search_related('cvterms', {
510 'cvterms.name' => $cvt,