1 package SGN
::View
::Feature
;
8 use List
::Util qw
/ sum /;
9 use List
::MoreUtils qw
/ any uniq /;
13 use CXGN
::Tools
::Text qw
/commify_number/;
14 use CXGN
::Tools
::Identifiers
;
18 related_stats feature_table
21 organism_link feature_length
22 mrna_cds_protein_sequence
23 description_featureprop_types
36 sub feature_organisms
{
40 map [ $_->organism_id, $_->species ],
42 ->resultset('Sequence::Feature')
43 ->search_related('organism' , {}, {
44 select => [qw
[ organism
.organism_id species
]],
46 order_by
=> 'species',
55 map [$_->cvterm_id,$_->name],
57 ->resultset('Sequence::Feature')
61 { select => [qw
[ cvterm_id type
.name
]],
62 group_by
=> [qw
[ cvterm_id type
.name
]],
63 order_by
=> 'type.name',
68 unshift @
$ref , ['0', ''];
73 cvterm_name
( shift->type, @_ );
77 my ($cvt, $caps) = @_;
78 ( my $n = $cvt->name ) =~ s/_/ /g;
80 $n =~ s/(\S+)/lc($1) eq $1 ? ucfirst($1) : $1/e;
85 sub description_featureprop_types
{
86 shift->result_source->schema
87 ->resultset('Cv::Cvterm')
90 'functional_description',
97 sub get_descriptions
{
98 my ( $feature, $plain ) = @_;
101 description_featureprop_types
( $feature )
102 ->get_column('cvterm_id')
106 $feature->search_related('featureprops',
107 { type_id
=> { -in => $desc_types } },
108 { order_by
=> 'rank' },
110 ->get_column('value')
114 return @descriptions if defined $plain;
116 s/(\S+)/my $id = $1; CXGN::Tools::Identifiers::link_identifier($id) || $id/ge for @descriptions;
118 return @descriptions;
121 sub location_string_html
{
122 my ( $id, $start, $end, $strand ) = @_;
125 $id = feature_link
($loc->srcfeature);
126 $start = $loc->fmin+1;
128 $strand = $loc->strand;
130 ( $start, $end ) = ( $end, $start ) if $strand && $strand == -1;
131 return "$id:$start..$end";
134 sub location_string
{
135 my ( $id, $start, $end, $strand ) = @_;
138 $id = $loc->srcfeature->name;
139 $start = $loc->fmin+1;
141 $strand = $loc->strand;
143 ( $start, $end ) = ( $end, $start ) if $strand && $strand == -1;
144 return "$id:$start..$end";
147 sub location_list_html
{
148 my ($feature, $featurelocs) = @_;
149 my @coords = map { location_string_html
($_) }
150 ( #$featurelocs ? $featurelocs->all
151 # : $feature->featureloc_features->all );
152 $featurelocs ?
$featurelocs->search({locgroup
=> 0,},)->all
153 : $feature->featureloc_features->search({locgroup
=> 0,},)->all)
154 or return '<span class="ghosted">none</span>';
158 my ($feature, $featurelocs) = @_;
159 print STDERR
"\n\nLOCATON LIST\n\n";
160 return map { ($_->srcfeature ?
$_->srcfeature->name : '<span class="ghosted">null</span>') . ':' . ($_->fmin+1) . '..' . $_->fmax }
161 ( #$featurelocs ? $featurelocs->all
162 # : $feature->featureloc_features->all );
163 $featurelocs ?
$featurelocs->search({locgroup
=> 0,},)->all
164 : $feature->featureloc_features->search({locgroup
=> 0,},)->all );
170 my $total = scalar @
$features;
171 for my $f (@
$features) {
172 $stats->{cvterm_link
($f->type)}++;
175 for my $k (sort keys %$stats) {
176 push @
$data, [ $stats->{$k}, $k ];
178 if( 1 < scalar keys %$stats ) {
179 push @
$data, [ $total, "<b>Total</b>" ];
185 my ( $features, $reference_sequence, $omit_columns ) = @_;
187 { no warnings
'uninitialized';
188 $omit_columns ||= [];
189 $omit_columns = [$omit_columns] unless ref $omit_columns eq 'ARRAY';
193 for my $f (sort { $a->name cmp $b->name } @
$features) {
195 $reference_sequence ?
( srcfeature_id
=> $reference_sequence->feature_id )
198 my @locations = $f->search_related('featureloc_features', {
202 { order_by
=> 'feature_id' }
206 # Add a row for every featureloc
207 my $first_location = 0;
208 for my $loc (@locations) {
209 my $ref = $loc->srcfeature;
210 my ($start,$end) = ($loc->fmin+1, $loc->fmax);
214 : ( organism_link
( $f->organism ),
215 cvterm_link
($f->type),
219 ($ref ?
$ref->name : '<span class="ghosted">null</span>').":$start..$end",
220 commify_number
( feature_length
( $f, $loc ) ) || undef,
221 $loc->strand ?
( $loc->strand == 1 ?
'+' : '-' ) : undef,
222 $loc->phase || undef,
227 my $nl = 'not located';
228 if( $reference_sequence ) {
229 $nl .= " on ".encode_entities
( $reference_sequence->name )
232 organism_link
( $f->organism ),
233 cvterm_link
($f->type),
235 qq|<span
class="ghosted">$nl</span
>|,
236 commify_number
( feature_length
( $f, undef ) ) || undef,
243 my @headings = ( "Organism", "Type", "Name", "Location", "Length", "Strand", "Phase" );
245 my @align = map 'l', @headings;
247 # omit any columns that are *all* undefined, or that we were
249 my @cols_to_omit = uniq
(
251 my %heading_index = do { my $i = 0; map { lc $_ => $i++ } @headings };
253 my $i = $heading_index{lc $_};
254 defined $i or die "$_ column not found";
260 for my $t ( [\
@headings], \
@data, [\
@align] ) {
261 for my $row ( @
$t ) {
262 splice( @
$row, $_, 1 ) for @cols_to_omit;
266 # make html for any other undef cells
269 $_ = '<span class="ghosted">n/a</span>' unless defined;
273 return ( headings
=> \
@headings, data
=> \
@data, __align
=> \
@align, __alt_freq
=> 0 , __border
=> 1 );
276 # try to figure out the "length" of a feature, which will vary for different features
278 my ( $feature, $location ) = @_;
280 $location = $location->first
281 if $location && $location->isa('DBIx::Class::ResultSet');
283 my $type = $feature->type;
284 my $type_name = $type->name;
286 # firstly, for any feature, trust the length of its residues if it has them
287 if( my $seqlen = $feature->seqlen || $feature->residues && length $feature->residues ) {
290 # for some features, can say that its length is the length of its location
291 elsif( any
{ $type_name eq $_ } qw( exon gene ) ) {
292 return unless $location;
293 return $location->fmax - $location->fmin;
298 sub _feature_search_string
{
300 return '' unless $fl;
301 return ($fl->srcfeature ?
$fl->srcfeature->name : '<span class="ghosted">null</span>') . ':'. ($fl->fmin+1) . '..' . $fl->fmax;
305 ### XXX TODO: A lot of these _link and sequence functions need to be
306 ### moved to controller code.
310 return '<span class="ghosted">null</span>' unless $feature;
311 my $id = $feature->feature_id;
312 my $name = $feature->name;
313 return qq{<a href
="/feature/$id/details">$name</a
>};
318 my $id = $organism->organism_id;
319 my $species = $organism->species;
320 return qq{<a
class="species_binomial" href
="/chado/organism.pl?organism_id=$id">$species</a
>};
324 my ( $cvt, $caps ) = @_;
325 my $name = cvterm_name
( $cvt, $caps );
327 return qq{<a href
="/cvterm/$id/view">$name</a
>};
330 sub mrna_cds_protein_sequence
{
331 my ($mrna_feature) = @_;
333 # if we were actually passed a polypeptide, get its mrna(s) and
335 if( $mrna_feature->type->name eq 'polypeptide' ) {
337 map mrna_cds_protein_sequence
( $_ ),
338 $mrna_feature->search_related('feature_relationship_subjects',
340 -in => $mrna_feature->result_source->schema
341 ->resultset('Cv::Cvterm')
342 ->search({name
=> 'derives_from'})
343 ->get_column('cvterm_id')
348 ->search_related('object');
351 my $description = join ', ', get_descriptions
( $mrna_feature, 'no html' );
352 my $peptide = _peptides_rs
( $mrna_feature )->first;
354 my @exon_locations = _exon_rs
( $mrna_feature )->all;
355 unless( @exon_locations ) {
356 # cannot calculate the cds and protein without exons, because
357 # UTRs can sometimes have introns in them. without knowing
358 # the exon structure, we don't know how much to cut off of the
361 $mrna_feature->subseq(1,1) ?
$mrna_feature : undef,
363 $peptide && $peptide->subseq(1,1) ?
$peptide : undef,
367 my $mrna_seq = $mrna_feature->subseq(1,1) ?
$mrna_feature : _make_mrna_seq
( $mrna_feature, $description, \
@exon_locations );
368 my $peptide_loc = $peptide && _peptide_loc
($peptide)->first;
370 # just return the mrna seq and nothing else if we have no peptide
371 # or the peptide is not located
372 unless( $peptide && $peptide_loc ) {
373 return [ $mrna_seq, undef, undef ] unless $peptide && $peptide_loc;
376 my $cds_seq = Bio
::PrimarySeq
->new(
377 -id
=> $mrna_seq->display_name,
378 -desc
=> $description,
379 -seq
=> $mrna_seq->seq,
381 my ( $trim_from_left, $trim_from_right ) = _calculate_cdna_utr_lengths
(
382 _loc2range
( $peptide_loc ),
383 [ map _loc2range
( $_), @exon_locations ],
386 if( $trim_from_left || $trim_from_right ) {
387 $cds_seq = $cds_seq->trunc( 1+$trim_from_left, $mrna_seq->length - $trim_from_right );
390 ##Get the protein sequence from the peptide object (stored in the database in the residues field of the feature table)
391 my $protein_seq = Bio
::PrimarySeq
->new(
392 -id
=> $mrna_seq->display_name,
393 -desc
=> $description,
394 -seq
=> $peptide->residues,
397 #Get the protein seq from translated CDS if no residues are found for polypeptide in the DB
398 if ( !$protein_seq->seq ) {
399 $protein_seq = $cds_seq->translate;
402 return [ $mrna_seq, $cds_seq, $protein_seq ];
406 my ( $mrna_feat, $description, $exons ) = @_;
408 # NOTE: doing this subseq math in 0-based coords
409 my $span_start = $exons->[0]->fmin;
410 my $span_end = $exons->[-1]->fmax-1;
412 # 0 1 2 3 4 5 6 7 8 interbase (Chado)
414 # 0 1 2 3 4 5 6 7 0-based (substr)
415 # 1 2 3 4 5 6 7 8 1-based (BioPerl)
417 # recall: the exons are in sorted order
418 my $span_seq = $exons->[0]->srcfeature->subseq( $span_start+1, $span_end+1 ); #< 1-based
419 my $mrna_sequence = join '', map { substr($span_seq, $_->fmin - $span_start, $_->fmax - $_->fmin ) } @
$exons;
421 my $mrna_seq = Bio
::PrimarySeq
->new(
422 -id
=> $mrna_feat->name,
423 -desc
=> $description,
424 -seq
=> $mrna_sequence,
427 $mrna_seq = $mrna_seq->revcom if $exons->[0]->strand == -1;
434 return $loc->to_range if $loc->can('to_range');
435 return Bio
::Range
->new(
436 -start
=> $loc->fmin + 1,
438 -strand
=> $loc->strand,
442 # given the range of the peptide and the ranges of each of the exons
443 # (as Bio::RangeI's), calculate how many bases should be trimmed off
444 # of each end of the cDNA (i.e. mRNA) seq to get the CDS seq
445 sub _calculate_cdna_utr_lengths
{
446 my ( $peptide, $exons ) = @_;
448 my ( $trim_left, $trim_right ) = ( 0, 0 );
450 # calculate trim_fmin if necessary
451 if( $exons->[0]->start < $peptide->start ) {
456 $_->overlaps($peptide)
457 ?
$peptide->start - $_->start
460 grep $_->start < $peptide->start, # find exons that overlap the UTR
464 # calculate trim_fmax if necessary
465 if( $exons->[-1]->end > $peptide->end ) {
469 $_->overlaps($peptide)
470 ?
$_->end - $peptide->end
473 grep $_->end > $peptide->end, # find exons that overlap the UTR
477 return $exons->[0]->strand == -1 ?
($trim_right, $trim_left) : ( $trim_left, $trim_right );
481 my ( $mrna_feature ) = @_;
484 ->feature_relationship_objects({
486 -in => _cvterm_rs
( $mrna_feature, 'relationship', 'derives_from' )
487 ->get_column('cvterm_id')
491 ->search_related( 'subject', {
492 'subject.type_id' => {
493 -in => _cvterm_rs
( $mrna_feature, 'sequence', 'polypeptide' )
494 ->get_column('cvterm_id')
501 $rs->search_related( 'featureloc_features', {
502 #srcfeature_id => { -not => undef },
503 srcfeature_id
=> { -not => undef }, locgroup
=> 0
505 { # Don't prefetch srcfeatures, it significantly slows down the query
506 # prefetch => 'srcfeature',
513 my ( $mrna_feature ) = @_;
515 my $rs = $mrna_feature->feature_relationship_objects({
517 -in => _cvterm_rs
( $mrna_feature, 'relationship', 'part_of' )
518 ->get_column('cvterm_id')
525 ->search_related( 'subject', {
526 'subject.type_id' => {
527 -in => _cvterm_rs
( $mrna_feature, 'sequence', 'exon' )
528 ->get_column('cvterm_id')
533 prefetch
=> 'featureloc_features',
535 ->search_related( 'featureloc_features', {
536 #srcfeature_id => { -not => undef },
537 srcfeature_id
=> { -not => undef }, locgroup
=> 0
540 prefetch
=> 'srcfeature',
548 my ( $row, $cv, $cvt ) = @_;
550 return $row->result_source->schema
551 ->resultset('Cv::Cv')
552 ->search({ 'me.name' => $cv })
553 ->search_related('cvterms', {
554 'cvterms.name' => $cvt,