change rules for cluster accessible dirs.
[cxgn-corelibs.git] / lib / CXGN / Cluster / ClusterSet.pm
bloba5e667a01f4d44fb4753a3e49b82d48dfd9cf157
4 =head1 NAME CXGN::Cluster::ClusterSet
6 CXGN::Cluster::ClusterSet - a package to manage sets of preclusters
8 =head1 DESCRIPTION
10 This package keeps track of all the sequence ids that are being clustered, and builds hashes for fast access to determine if a sequence is already in a cluster. It has a function, add_match, that takes two ids and adds them to existing clusters or creates a new cluster if applicable.
12 =head1 AUTHOR
14 Lukas Mueller <lam87@cornell.edu>
16 =cut
18 use strict;
20 package CXGN::Cluster::ClusterSet;
22 use base qw( CXGN::Cluster::Object );
24 =head2 new()
26 Usage: my $cluster_set = CGN::Cluster::ClusterSet->new()
27 Desc: Constructor
28 Ret: a cluster set object
29 Args: none
30 Side Effects: none
32 =cut
34 sub new {
35 my $class = shift;
36 my $self = $class -> SUPER::new(@_);
37 $self->reset_unique_key();
38 keys(%{$self->{key_hash}})=100000;
39 return $self;
42 =head2 add_match()
44 Usage: $cluster_set -> add_match($query_id, $subject_id)
45 Desc: this function checks if the query and subject ids are
46 already in clusters; if they are in the same cluster
47 the function does nothing, if only subject id or query
48 id are in a cluster, the other is added to that same
49 cluster, and if both are in different clusters, the two
50 clusters are pulled together.
51 Ret:
52 Args: two ids representing the match.
53 Side Effects:
54 Example:
56 =cut
59 sub add_match {
60 my $self = shift;
61 my $query_id = shift;
62 my $subject_id = shift;
63 # $self->set_debug(1);
64 if (!$query_id || !$subject_id) {
65 $self->debug("Ignoring $query_id and $subject_id - incomplete match.\n");
66 return;
69 my $c1 = $self->get_cluster($query_id);
70 my $c2 = $self->get_cluster($subject_id);
71 if ( ($c1 && $c2) && ($c1 == $c2) ) {
72 $self->debug(" [ ignoring both in ".$c1->get_unique_key()."]\n");
73 # do nothing, because both have already been added
74 # to the same precluster
76 elsif ( ($c1 && $c2) && ($c1 != $c2) ) {
77 $self->debug("IDs already in distinct clusters. Combining...\n");
78 # we have a problem because the two have
79 # already been assigned to distinct sub-clusters.
80 # we need to pull the clusters together.
81 $self->debug("Before combining: "
82 .$c1->get_unique_key().":".$c1->get_size().
83 " ".$c2->get_unique_key().":".$c2->get_size()."\n");
84 $c1->combine($c2);
85 $self->debug("After combining: "
86 .$c1->get_unique_key().":".$c1->get_size()."\n");
88 elsif ($c1 && !$c2) {
89 $self->debug("query $query_id already in cluster [".$c1->get_unique_key()."], adding $subject_id\n");
90 $c1->add_member($subject_id);
91 $self->debug("Now containing ".$c1->get_size()." members.\n");
93 elsif (!$c1 && $c2) {
94 $self->debug("subject $subject_id already in cluster [".$c2->get_unique_key()."], adding $query_id\n");
95 $c2->add_member($query_id);
97 else {
98 $self->debug("creating new cluster...\n");
99 # there is no cluster yet...
100 # generate a new cluster
101 my $new = CXGN::Cluster::Precluster->new($self);
102 $self->add_cluster($new);
103 $new->add_member($query_id);
104 $new->add_member($subject_id);
105 $new->debug($self->get_debug());
109 =head2 add_cluster()
111 Usage: $cluster_set->add_cluster($c)
112 Desc: adds a new cluster, $c, to the cluster set.
113 The cluster will be tracked through the
114 internal cluster key hash for fast access.
115 Ret: nothing meaningful
116 Args: a CXGN::Cluster::Precluster to add to this set
118 =cut
120 sub add_cluster {
121 my $self = shift;
122 my $cluster = shift;
123 # my $unique_key = $cluster->get_unique_key();
124 $self->add_key_hash($cluster);
127 sub add_key_hash {
128 my $self = shift;
129 my $cluster = shift;
130 my $unique_key = $cluster->get_unique_key();
131 $self->{key_hash}{$unique_key}=$cluster;
134 =head2 remove_cluster()
136 Usage: $set->remove_cluster($c);
137 Desc: remove the cluster from this set
138 Ret: nothing meaningful
139 Args: CXGN::Cluster::Precluster to remove
141 =cut
143 sub remove_cluster {
144 my $self = shift;
145 my $cluster = shift;
146 $self->debug("Deleting cluster ".$cluster->get_unique_key()."...\n");
147 delete($self->{key_hash}{$cluster->get_unique_key()});
150 =head2 get_clusters()
152 Usage: $set->get_clusters
153 Desc: gets all clusters as list of CXGN::Cluster::Precluster
154 objects
155 Ret: list of CXGN::Cluster::Precluster objects, in ascending
156 order of number of members
157 Args: nothing
159 =cut
161 sub get_clusters {
162 my $self = shift;
164 return sort {$a->get_member_count <=> $b->get_member_count}
165 values %{$self->{key_hash}};
168 =head2 add_id()
170 Usage:
171 Desc: add a seq id to the cluster hash for fast
172 cluster retrieval using a seq id.
173 Ret:
174 Args:
175 Side Effects:
176 Example:
178 =cut
180 sub add_id {
181 my $self = shift;
182 my $cluster = shift;
183 my $id = shift;
184 if (!$id || !$cluster) { die "need cluster object and id"; }
185 $self->{id_map}{$id}=$cluster;
188 =head2 get_cluster()
190 Usage:
191 Desc: gets the cluster that contains the sequence with id $id.
192 see also add_id().
193 Ret:
194 Args:
195 Side Effects:
196 Example:
198 =cut
200 sub get_cluster {
201 my $self = shift;
202 my $id = shift;
203 if (!$id) { die "get_cluster: need an id!\n"; }
204 return $self->{id_map}{$id};
207 =head2 generate_unique_key()
209 Usage: my $key = $cluster_set->generate_unique_key()
210 Desc: returns a new unique key for a cluster. Unique
211 keys are simply generated by added 1 to the previous
212 key.
213 Ret:
214 Args:
215 Side Effects:
216 Example:
218 =cut
220 sub generate_unique_key {
221 my $self=shift;
222 return ($self->{unique_key})++;
225 =head2 reset_unique_key()
227 Usage: $cluster_set->reset_unique_key()
228 Desc: resets the unique key to 0. Should not
229 be called during normal operation, because
230 keys will not be unique anymore.
231 Ret:
232 Args:
233 Side Effects:
234 Example:
236 =cut
238 sub reset_unique_key {
239 my $self=shift;
240 $self->{unique_key}=0;
243 return 1;