9 def HandleCluster(self
, cluster
, prior
):
10 self
.priors
[cluster
] = prior
11 def Prior(self
, cluster
):
12 return self
.priors
[cluster
]
14 class ProgramClusters
:
19 def AddCluster(self
, cluster
):
20 if not self
.clusters
.has_key(cluster
):
21 self
.clusters
[cluster
] = 0
22 self
.clusters
[cluster
] += 1
26 # Figure out the best cluster, and its count
29 for cluster
in self
.clusters
.keys():
30 num_clusters
= self
.clusters
[cluster
]
31 if num_clusters
> best_count
:
32 best_count
= num_clusters
33 best_cluster
= cluster
34 return (best_cluster
, best_count
, self
.total
)
37 infile
= open(argv
[1], 'r')
38 program_clusters_map
= {}
39 cluster_info
= ClusterInfo()
40 clusters_re
= re
.compile('[^_\\d\\.]+')
42 fields
= line
.strip().split(',')
43 base
= os
.path
.basename(fields
[0])
44 m
= clusters_re
.search(base
)
45 program_id
= m
.group(0)
46 if not program_clusters_map
.has_key(program_id
):
47 program_clusters_map
[program_id
] = ProgramClusters()
48 cluster
= int(fields
[1])
49 prior
= float(fields
[-1])
50 program_clusters_map
[program_id
].AddCluster(cluster
)
51 cluster_info
.HandleCluster(cluster
, prior
)
52 for program
in program_clusters_map
.keys():
53 (best_cluster
, count
, total
) = program_clusters_map
[program
].Summary()
54 print '%s & %d & %d & %.2f & %.2f \\\\' % (
55 program
, best_cluster
, count
, count
* 1.0 / total
, cluster_info
.Prior(best_cluster
))
58 if __name__
== '__main__':