4 =head1 NAME CXGN::Cluster::ClusterSet
6 CXGN::Cluster::ClusterSet - a package to manage sets of preclusters
10 This package keeps track of all the sequence ids that are being clustered, and builds hashes for fast access to determine if a sequence is already in a cluster. It has a function, add_match, that takes two ids and adds them to existing clusters or creates a new cluster if applicable.
14 Lukas Mueller <lam87@cornell.edu>
20 package CXGN
::Cluster
::ClusterSet
;
22 use base
qw( CXGN::Cluster::Object );
26 Usage: my $cluster_set = CGN::Cluster::ClusterSet->new()
28 Ret: a cluster set object
36 my $self = $class -> SUPER
::new
(@_);
37 $self->reset_unique_key();
38 keys(%{$self->{key_hash
}})=100000;
44 Usage: $cluster_set -> add_match($query_id, $subject_id)
45 Desc: this function checks if the query and subject ids are
46 already in clusters; if they are in the same cluster
47 the function does nothing, if only subject id or query
48 id are in a cluster, the other is added to that same
49 cluster, and if both are in different clusters, the two
50 clusters are pulled together.
52 Args: two ids representing the match.
62 my $subject_id = shift;
63 # $self->set_debug(1);
64 if (!$query_id || !$subject_id) {
65 $self->debug("Ignoring $query_id and $subject_id - incomplete match.\n");
69 my $c1 = $self->get_cluster($query_id);
70 my $c2 = $self->get_cluster($subject_id);
71 if ( ($c1 && $c2) && ($c1 == $c2) ) {
72 $self->debug(" [ ignoring both in ".$c1->get_unique_key()."]\n");
73 # do nothing, because both have already been added
74 # to the same precluster
76 elsif ( ($c1 && $c2) && ($c1 != $c2) ) {
77 $self->debug("IDs already in distinct clusters. Combining...\n");
78 # we have a problem because the two have
79 # already been assigned to distinct sub-clusters.
80 # we need to pull the clusters together.
81 $self->debug("Before combining: "
82 .$c1->get_unique_key().":".$c1->get_size().
83 " ".$c2->get_unique_key().":".$c2->get_size()."\n");
85 $self->debug("After combining: "
86 .$c1->get_unique_key().":".$c1->get_size()."\n");
89 $self->debug("query $query_id already in cluster [".$c1->get_unique_key()."], adding $subject_id\n");
90 $c1->add_member($subject_id);
91 $self->debug("Now containing ".$c1->get_size()." members.\n");
94 $self->debug("subject $subject_id already in cluster [".$c2->get_unique_key()."], adding $query_id\n");
95 $c2->add_member($query_id);
98 $self->debug("creating new cluster...\n");
99 # there is no cluster yet...
100 # generate a new cluster
101 my $new = CXGN
::Cluster
::Precluster
->new($self);
102 $self->add_cluster($new);
103 $new->add_member($query_id);
104 $new->add_member($subject_id);
105 $new->debug($self->get_debug());
111 Usage: $cluster_set->add_cluster($c)
112 Desc: adds a new cluster, $c, to the cluster set.
113 The cluster will be tracked through the
114 internal cluster key hash for fast access.
115 Ret: nothing meaningful
116 Args: a CXGN::Cluster::Precluster to add to this set
123 # my $unique_key = $cluster->get_unique_key();
124 $self->add_key_hash($cluster);
130 my $unique_key = $cluster->get_unique_key();
131 $self->{key_hash
}{$unique_key}=$cluster;
134 =head2 remove_cluster()
136 Usage: $set->remove_cluster($c);
137 Desc: remove the cluster from this set
138 Ret: nothing meaningful
139 Args: CXGN::Cluster::Precluster to remove
146 $self->debug("Deleting cluster ".$cluster->get_unique_key()."...\n");
147 delete($self->{key_hash
}{$cluster->get_unique_key()});
150 =head2 get_clusters()
152 Usage: $set->get_clusters
153 Desc: gets all clusters as list of CXGN::Cluster::Precluster
155 Ret: list of CXGN::Cluster::Precluster objects, in ascending
156 order of number of members
164 return sort {$a->get_member_count <=> $b->get_member_count}
165 values %{$self->{key_hash
}};
171 Desc: add a seq id to the cluster hash for fast
172 cluster retrieval using a seq id.
184 if (!$id || !$cluster) { die "need cluster object and id"; }
185 $self->{id_map
}{$id}=$cluster;
191 Desc: gets the cluster that contains the sequence with id $id.
203 if (!$id) { die "get_cluster: need an id!\n"; }
204 return $self->{id_map
}{$id};
207 =head2 generate_unique_key()
209 Usage: my $key = $cluster_set->generate_unique_key()
210 Desc: returns a new unique key for a cluster. Unique
211 keys are simply generated by added 1 to the previous
220 sub generate_unique_key
{
222 return ($self->{unique_key
})++;
225 =head2 reset_unique_key()
227 Usage: $cluster_set->reset_unique_key()
228 Desc: resets the unique key to 0. Should not
229 be called during normal operation, because
230 keys will not be unique anymore.
238 sub reset_unique_key
{
240 $self->{unique_key
}=0;