maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / DB / GFF / Aggregator / alignment.pm
blob439dfcf78bfe97464f1ae06f40f18f38c9b773ca
1 =head1 NAME
3 Bio::DB::GFF::Aggregator::alignment -- Alignment aggregator
5 =head1 SYNOPSIS
7 use Bio::DB::GFF;
9 # Open the sequence database
10 my $db = Bio::DB::GFF->new( -adaptor => 'dbi:mysql',
11 -dsn => 'dbi:mysql:elegans42',
12 -aggregator => ['alignment'],
15 -----------------------------
16 Aggregator method: alignment
17 Main method: (none)
18 Sub methods: nucleotide_match,EST_match,cDNA_match,expressed_sequence_match,
19 translated_nucleotide_match,protein_match,HSP
20 -----------------------------
22 =head1 DESCRIPTION
24 Bio::DB::GFF::Aggregator::alignment is one of the default aggregators,
25 and was written to be compatible with the C elegans GFF files. It
26 aggregates raw "similarity" features into composite features of type
27 "alignment". A better name for this class might be
28 "gapped_alignment."
30 This aggregator does not insist that there be a single top-level
31 feature that spans one end of the alignment to the other. As a
32 result, it can produce truncated alignments if the entire alignment is
33 not contained within the segment of interest.
35 =cut
37 package Bio::DB::GFF::Aggregator::alignment;
39 use strict;
42 use base qw(Bio::DB::GFF::Aggregator);
44 =head2 aggregate
46 Title : aggregate
47 Usage : $features = $a->aggregate($features,$factory)
48 Function: aggregate a feature list into composite features
49 Returns : an array reference containing modified features
50 Args : see L<Bio::DB::GFF::Aggregator>
51 Status : Public
53 Because of the large number of similarity features, the aggregate()
54 method is overridden in order to perform some optimizations.
56 =cut
58 # we look for features of type Sequence and add them to a pseudotype transcript
59 sub aggregate {
60 my $self = shift;
61 my $features = shift;
62 my $factory = shift;
64 my $matchsub = $self->match_sub($factory) or return;
65 my $passthru = $self->passthru_sub($factory);
66 my $method = $self->get_method;
68 my (%alignments,%targets,@result);
70 warn "running alignment aggregator" if $factory->debug;
71 for my $feature (@$features) {
73 if ($matchsub->($feature)) {
75 my $group = $feature->{group};
76 my $source = $feature->source;
77 unless (exists $alignments{$group,$source}) {
78 my $type = Bio::DB::GFF::Typename->new($method,$source);
80 my $f = $feature->clone;
81 # this is a violation of OO encapsulation, but need to do it this way
82 # to achieve desired performance
83 @{$f}{qw(type score phase)} = ($type,undef,undef);
85 $alignments{$group,$source} = $f or next;
88 my $main = $alignments{$group,$source};
89 $main->add_subfeature($feature);
90 push @result,$feature if $passthru && $passthru->($feature);
91 } else {
92 push @result,$feature;
96 warn "running aligner adjuster" if $factory->debug;
97 for my $alignment (values %alignments) {
98 $alignment->adjust_bounds;
99 $alignment->compound(1);
100 push @result,$alignment;
102 warn "aligner done" if $factory->debug;
103 @$features = @result;
106 =head2 method
108 Title : method
109 Usage : $aggregator->method
110 Function: return the method for the composite object
111 Returns : the string "alignment"
112 Args : none
113 Status : Public
115 =cut
117 sub method { 'alignment' }
119 =head2 part_names
121 Title : part_names
122 Usage : $aggregator->part_names
123 Function: return the methods for the sub-parts
124 Returns : the full list of aggregated methods
125 Args : none
126 Status : Public
128 =cut
130 sub part_names {
131 my $self = shift;
132 return qw(nucleotide_match EST_match cDNA_match
133 expressed_sequence_match
134 translated_nucleotide_match
135 protein_match HSP);