modified: src1/common.h
[GalaxyCodeBases.git] / projects / tri / q1.pl
blob482671f9830abd605bcdcb9c753b93c304c3143b
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Bio::AlignIO;
5 use DBI;
7 use Data::Dump qw (ddx);
8 #use IO::Unread;
10 use Bio::Align::DNAStatistics;
11 use Bio::EnsEMBL::Registry;
13 ## Load the registry automatically
14 my $reg = "Bio::EnsEMBL::Registry";
15 #my $url = 'mysql://anonymous@ensembldb.ensembl.org';
16 my $url = 'mysql://root@localhost';
17 $reg->load_registry_from_url($url);
19 my $compara_db_adaptor = $reg->get_DBAdaptor('Multi', 'compara');
20 my $genome_db_adaptor = $compara_db_adaptor->get_GenomeDBAdaptor();
21 my $dbh = $compara_db_adaptor->dbc->db_handle;
23 my $sql = qq{
24 SELECT DISTINCT ss.genome_db_id, gdb.taxon_id, gdb.name, gdb.assembly FROM species_set_tag sst
25 JOIN species_set ss USING (species_set_id)
26 JOIN genome_db gdb USING (genome_db_id)
27 WHERE sst.value = 'mammals';
30 my $sth = $dbh->prepare($sql);
31 $sth->execute();
33 my (@IDsGnomeDB, %GnomeDBnfo);
34 while(my @retdat = $sth->fetchrow()) {
35 #warn join("\t",@retdat),"\n";
36 #$GnomeDBnfo{$retdat[0]} = [@retdat[1,2,3]];
37 my $GnomedbID = shift @retdat;
38 push @IDsGnomeDB,$GnomedbID;
39 $GnomeDBnfo{$GnomedbID} = \@retdat;
42 ddx \%GnomeDBnfo;
44 #my $MammaliaTaxID = 40674; # http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&id=40674&lvl=3&keep=1&srchmode=1&unlock
45 #my $MammaliaGDBs = $genome_db_adaptor->fetch_all_by_ancestral_taxon_id($MammaliaTaxID,1);
46 #ddx $MammaliaGDBs;
48 $sql = qq{
49 SELECT DISTINCT method_link_species_set_id FROM method_link_species_set
50 JOIN method_link USING (method_link_id)
51 JOIN species_set ss ON ss.species_set_id=method_link_species_set.species_set_id
52 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
53 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
54 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
55 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
56 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
57 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES';
59 $sth = $dbh->prepare($sql);
60 $sth->execute();
62 my (@IDsMLSS);
63 while(my @retdat = $sth->fetchrow()) {
64 push @IDsMLSS,$retdat[0];
66 warn 'MLSS_ID: ',join(', ',@IDsMLSS),"\n";
68 ## The BioPerl alignment formatter
69 my $alignIO = Bio::AlignIO->newFh(-format => "clustalw");
71 my $HomologyAdaptor = $reg->get_adaptor("Multi", "compara", "Homology");
72 for my $mlss (@IDsMLSS) {
73 my $homologies = $HomologyAdaptor->fetch_all_by_MethodLinkSpeciesSet($mlss);
74 print '-----',$mlss,"-----\n";
75 ## For each homology
76 foreach my $this_homology (@{$homologies}) {
77 next unless defined $this_homology->dn();
78 print $this_homology->toString(),"\n";
79 print "The non-synonymous substitution rate is: ", $this_homology->dn(), "\n";
81 my $description = $this_homology->description;
82 # next unless ($description =~ /orth/); # uncomment for orthologs only
83 my ($a,$b) = @{$this_homology->gene_list};
84 my $spa = $a->taxon->get_short_name;
85 my $spb = $b->taxon->get_short_name;
86 my $labela = $a->stable_id;
87 $labela .= "(" . $a->display_label . ")" if $a->display_label;
88 my $labelb = $b->stable_id;
89 $labelb .= "(" . $b->display_label . ")" if $b->display_label;
91 ## Get and print the alignment
92 my $simple_align = $this_homology->get_SimpleAlign( -seq_type => 'cds' );
93 print $alignIO $simple_align;
94 #ddx $this_homology;
96 my $dn; my $ds;
97 my $lnl = $this_homology->lnl;
98 if ($lnl) {
99 $dn = $this_homology->dn;
100 $ds = $this_homology->ds;
101 } else {
102 # This bit calculates dnds values using the counting method in bioperl-run
103 my $aln = $simple_align;
104 my $stats;
105 eval { $stats = new Bio::Align::DNAStatistics;};
106 if($stats->can('calc_KaKs_pair')) {
107 my ($seq1id,$seq2id) = map { $_->display_id } $aln->each_seq;
108 my $results;
109 print ">";
110 eval { $results = $stats->calc_KaKs_pair($aln, $seq1id, $seq2id);};
111 unless ($@) {
112 my $counting_method_dn = $results->[0]{D_n};
113 my $counting_method_ds = $results->[0]{D_s};
114 $dn = $counting_method_dn;
115 $ds = $counting_method_ds;
120 $dn = 'na' if (!defined($dn));
121 print "$spa,$labela,$spb,$labelb,$dn,$ds,$description\n";
123 print "\n";
126 __END__
128 les q2.txt |grep -P 'ENSMMUG00000029355|ENSMMUG00000030190|ENSPPYG00000025870|ENSCJAG00000022855|ENSPPYG00000016721|ENSECAG00000024079'
131 USE ensembl_compara_80;
133 SELECT DISTINCT ss.species_set_id, ss1.genome_db_id, gdb1.taxon_id, gdb1.name, ss.genome_db_id, gdb.taxon_id, gdb.name, gdb.assembly FROM species_set ss
134 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
135 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
136 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
137 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
138 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
139 JOIN genome_db gdb1 ON gdb1.genome_db_id=ss1.genome_db_id
140 JOIN genome_db gdb ON gdb.genome_db_id=ss.genome_db_id
141 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id;
142 =>5046
144 SELECT DISTINCT ss.species_set_id FROM species_set ss
145 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
146 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
147 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
148 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
149 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
150 JOIN genome_db gdb1 ON gdb1.genome_db_id=ss1.genome_db_id
151 JOIN genome_db gdb ON gdb.genome_db_id=ss.genome_db_id
152 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id;
153 =>746
155 EXPLAIN SELECT DISTINCT method_link_species_set_id, method_link_species_set.name, ss.species_set_id FROM method_link_species_set
156 JOIN method_link USING (method_link_id)
157 JOIN species_set ss ON ss.species_set_id=method_link_species_set.species_set_id
158 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
159 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
160 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
161 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
162 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
163 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES';
164 +------+-------------+-------------------------+-------+-----------------------------+----------------+---------+--------------------------------------------------------------------------+------+-----------------------------------------------------------+
165 | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
166 +------+-------------+-------------------------+-------+-----------------------------+----------------+---------+--------------------------------------------------------------------------+------+-----------------------------------------------------------+
167 | 1 | SIMPLE | method_link | const | PRIMARY,type | type | 52 | const | 1 | Using temporary |
168 | 1 | SIMPLE | sst0 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where |
169 | 1 | SIMPLE | ss0 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst0.species_set_id | 50 | Using where; Using index |
170 | 1 | SIMPLE | ss | ref | species_set_id,genome_db_id | genome_db_id | 5 | ensembl_compara_80.ss0.genome_db_id | 72 | |
171 | 1 | SIMPLE | method_link_species_set | ref | method_link_id | method_link_id | 9 | const,ensembl_compara_80.ss.species_set_id | 11 | |
172 | 1 | SIMPLE | sst2 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where; Distinct; Using join buffer (flat, BNL join) |
173 | 1 | SIMPLE | ss2 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst2.species_set_id | 50 | Using where; Using index; Distinct |
174 | 1 | SIMPLE | ss1 | ref | species_set_id,genome_db_id | species_set_id | 9 | ensembl_compara_80.ss.species_set_id,ensembl_compara_80.ss2.genome_db_id | 11 | Using index; Distinct |
175 +------+-------------+-------------------------+-------+-----------------------------+----------------+---------+--------------------------------------------------------------------------+------+-----------------------------------------------------------+
176 8 rows in set (0.01 sec)
178 EXPLAIN SELECT DISTINCT method_link_species_set_id, method_link_species_set.name, method_link_species_set.species_set_id FROM method_link_species_set
179 JOIN method_link USING (method_link_id)
180 WHERE method_link.type='ENSEMBL_ORTHOLOGUES' AND method_link_species_set.species_set_id IN (
181 SELECT DISTINCT ss.species_set_id FROM species_set ss
182 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
183 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
184 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
185 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
186 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
187 JOIN genome_db gdb1 ON gdb1.genome_db_id=ss1.genome_db_id
188 JOIN genome_db gdb ON gdb.genome_db_id=ss.genome_db_id
189 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id
191 +------+--------------+-------------------------+--------+-----------------------------+----------------+---------+--------------------------------------------------------------------------+------+-----------------------------------------------------------+
192 | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
193 +------+--------------+-------------------------+--------+-----------------------------+----------------+---------+--------------------------------------------------------------------------+------+-----------------------------------------------------------+
194 | 1 | PRIMARY | method_link | const | PRIMARY,type | type | 52 | const | 1 | Using temporary |
195 | 1 | PRIMARY | method_link_species_set | ALL | method_link_id | NULL | NULL | NULL | 2633 | Using where |
196 | 1 | PRIMARY | <subquery2> | eq_ref | distinct_key | distinct_key | 4 | func | 1 | Distinct |
197 | 2 | MATERIALIZED | sst0 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where; Distinct |
198 | 2 | MATERIALIZED | ss0 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst0.species_set_id | 50 | Using where; Using index; Distinct |
199 | 2 | MATERIALIZED | gdb | eq_ref | PRIMARY | PRIMARY | 4 | ensembl_compara_80.ss0.genome_db_id | 1 | Using index; Distinct |
200 | 2 | MATERIALIZED | ss | ref | species_set_id,genome_db_id | genome_db_id | 5 | ensembl_compara_80.ss0.genome_db_id | 72 | Distinct |
201 | 2 | MATERIALIZED | sst2 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where; Distinct; Using join buffer (flat, BNL join) |
202 | 2 | MATERIALIZED | ss2 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst2.species_set_id | 50 | Using where; Using index; Distinct |
203 | 2 | MATERIALIZED | ss1 | ref | species_set_id,genome_db_id | species_set_id | 9 | ensembl_compara_80.ss.species_set_id,ensembl_compara_80.ss2.genome_db_id | 11 | Using index; Distinct |
204 | 2 | MATERIALIZED | gdb1 | eq_ref | PRIMARY | PRIMARY | 4 | ensembl_compara_80.ss2.genome_db_id | 1 | Using index; Distinct |
205 +------+--------------+-------------------------+--------+-----------------------------+----------------+---------+--------------------------------------------------------------------------+------+-----------------------------------------------------------+
206 11 rows in set (0.02 sec)
208 SELECT DISTINCT method_link_species_set_id, method_link_species_set.name, ss.species_set_id FROM method_link_species_set
209 JOIN method_link USING (method_link_id)
210 JOIN species_set ss ON ss.species_set_id=method_link_species_set.species_set_id
211 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
212 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
213 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
214 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
215 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
216 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES';
217 =>741 rows in set (1.02 sec)
219 EXPLAIN SELECT DISTINCT homology_id,method_link_species_set_id, method_link_species_set.name, ss.species_set_id FROM method_link_species_set
220 JOIN homology USING (method_link_species_set_id)
221 JOIN method_link USING (method_link_id)
222 JOIN species_set ss ON ss.species_set_id=method_link_species_set.species_set_id
223 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
224 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
225 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
226 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
227 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
228 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES';
229 +------+-------------+-------------------------+-------+-----------------------------+----------------------------+---------+--------------------------------------------------------------------------+-------+-----------------------------------------------------------+
230 | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
231 +------+-------------+-------------------------+-------+-----------------------------+----------------------------+---------+--------------------------------------------------------------------------+-------+-----------------------------------------------------------+
232 | 1 | SIMPLE | method_link | const | PRIMARY,type | type | 52 | const | 1 | Using temporary |
233 | 1 | SIMPLE | sst0 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where |
234 | 1 | SIMPLE | ss0 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst0.species_set_id | 50 | Using where; Using index |
235 | 1 | SIMPLE | ss | ref | species_set_id,genome_db_id | genome_db_id | 5 | ensembl_compara_80.ss0.genome_db_id | 72 | |
236 | 1 | SIMPLE | method_link_species_set | ref | PRIMARY,method_link_id | method_link_id | 9 | const,ensembl_compara_80.ss.species_set_id | 11 | |
237 | 1 | SIMPLE | homology | ref | method_link_species_set_id | method_link_species_set_id | 4 | ensembl_compara_80.method_link_species_set.method_link_species_set_id | 31685 | |
238 | 1 | SIMPLE | sst2 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where; Distinct; Using join buffer (flat, BNL join) |
239 | 1 | SIMPLE | ss2 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst2.species_set_id | 50 | Using where; Using index; Distinct |
240 | 1 | SIMPLE | ss1 | ref | species_set_id,genome_db_id | species_set_id | 9 | ensembl_compara_80.ss.species_set_id,ensembl_compara_80.ss2.genome_db_id | 11 | Using index; Distinct |
241 +------+-------------+-------------------------+-------+-----------------------------+----------------------------+---------+--------------------------------------------------------------------------+-------+-----------------------------------------------------------+
242 9 rows in set (0.01 sec)
244 EXPLAIN SELECT DISTINCT homology_id FROM homology
245 WHERE method_link_species_set_id IN (
246 SELECT DISTINCT method_link_species_set_id FROM method_link_species_set
247 INNER JOIN method_link USING (method_link_id)
248 INNER JOIN species_set ss ON ss.species_set_id=method_link_species_set.species_set_id
249 INNER JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
250 INNER JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
251 INNER JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
252 INNER JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
253 INNER JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
254 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES'
256 +------+-------------+-------------------------+-------+-----------------------------+----------------------------+---------+--------------------------------------------------------------------------+-------+-----------------------------------------------------------+
257 | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
258 +------+-------------+-------------------------+-------+-----------------------------+----------------------------+---------+--------------------------------------------------------------------------+-------+-----------------------------------------------------------+
259 | 1 | PRIMARY | method_link | const | PRIMARY,type | type | 52 | const | 1 | Using temporary |
260 | 1 | PRIMARY | sst0 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where; Start temporary |
261 | 1 | PRIMARY | ss0 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst0.species_set_id | 50 | Using where; Using index |
262 | 1 | PRIMARY | ss | ref | species_set_id,genome_db_id | genome_db_id | 5 | ensembl_compara_80.ss0.genome_db_id | 72 | |
263 | 1 | PRIMARY | method_link_species_set | ref | PRIMARY,method_link_id | method_link_id | 9 | const,ensembl_compara_80.ss.species_set_id | 11 | |
264 | 1 | PRIMARY | homology | ref | method_link_species_set_id | method_link_species_set_id | 4 | ensembl_compara_80.method_link_species_set.method_link_species_set_id | 31685 | |
265 | 1 | PRIMARY | sst2 | ALL | tag_species_set_id | NULL | NULL | NULL | 9 | Using where; Distinct; Using join buffer (flat, BNL join) |
266 | 1 | PRIMARY | ss2 | ref | species_set_id,genome_db_id | species_set_id | 4 | ensembl_compara_80.sst2.species_set_id | 50 | Using where; Using index; Distinct |
267 | 1 | PRIMARY | ss1 | ref | species_set_id,genome_db_id | species_set_id | 9 | ensembl_compara_80.ss.species_set_id,ensembl_compara_80.ss2.genome_db_id | 11 | Using index; Distinct; End temporary |
268 +------+-------------+-------------------------+-------+-----------------------------+----------------------------+---------+--------------------------------------------------------------------------+-------+-----------------------------------------------------------+
269 9 rows in set (0.01 sec)
271 SELECT DISTINCT homology_id,method_link_species_set_id, method_link_species_set.name, ss.species_set_id FROM method_link_species_set
272 JOIN homology USING (method_link_species_set_id)
273 JOIN method_link USING (method_link_id)
274 JOIN species_set ss ON ss.species_set_id=method_link_species_set.species_set_id
275 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
276 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
277 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
278 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
279 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
280 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES'
281 LIMIT 100;
282 =>100 rows in set (4.41 sec)
284 SELECT DISTINCT homology_id,h.description,mlss.name,h.dn,h.ds FROM homology h
285 JOIN method_link_species_set mlss USING (method_link_species_set_id)
286 JOIN method_link USING (method_link_id)
287 JOIN species_set ss ON ss.species_set_id=mlss.species_set_id
288 JOIN species_set ss0 ON ss.genome_db_id=ss0.genome_db_id
289 JOIN species_set ss1 ON ss.species_set_id=ss1.species_set_id
290 JOIN species_set ss2 ON ss1.genome_db_id=ss2.genome_db_id
291 JOIN species_set_tag sst0 ON sst0.species_set_id=ss0.species_set_id
292 JOIN species_set_tag sst2 ON sst2.species_set_id=ss2.species_set_id
293 WHERE sst0.value = 'mammals' AND sst2.value = 'mammals' AND ss1.genome_db_id != ss.genome_db_id AND method_link.type='ENSEMBL_ORTHOLOGUES'
294 AND h.dn IS NOT NULL
295 LIMIT 100,100;
296 =>100 rows in set (4.52 sec)