1 # -*-Perl-*- Test Harness script for Bioperl
12 -requires_modules => [qw( 5.010 DB_File DBI DBD::SQLite )]
15 use_ok('Bio::DB::Taxonomy');
16 use_ok('Bio::Tree::Tree');
19 my $temp_dir = test_output_dir();
21 # TODO: run basic tests making sure that a database is not regenerated if
22 # present or unless forced
24 ok my $db_flatfile = Bio::DB::Taxonomy->new(
26 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
27 -namesfile => test_input_file('taxdump', 'names.dmp'),
29 isa_ok $db_flatfile, 'Bio::DB::Taxonomy::sqlite';
30 isa_ok $db_flatfile, 'Bio::DB::Taxonomy';
32 ok my $db = Bio::DB::Taxonomy->new(
34 -directory => $temp_dir,
35 -nodesfile => test_input_file('taxdump', 'nodes.dmp'),
36 -namesfile => test_input_file('taxdump', 'names.dmp'),
42 # taxid data in the nodes.dmp file should be unique, we ignore repeated values
45 is $db->get_num_taxa, 188;
47 lives_ok {$id = $db->get_taxonid('Homo sapiens')};
51 ## easy test on human, try out the main Taxon methods
53 ok $n = $db->get_taxon(9606);
55 is $n->object_id, $n->id;
56 is $n->ncbi_taxid, $n->id;
57 is $n->parent_id, 9605;
58 is $n->rank, 'species';
60 is $n->node_name, 'Homo sapiens';
61 is $n->scientific_name, $n->node_name;
62 is ${$n->name('scientific')}[0], $n->node_name;
64 my %common_names = map { $_ => 1 } $n->common_names;
65 is keys %common_names, 3, ref($db).": common names";
66 ok exists $common_names{human};
67 ok exists $common_names{man};
69 is $n->division, 'Primates';
70 is $n->genetic_code, 1;
71 is $n->mitochondrial_genetic_code, 2;
73 # these are entrez-only, data not available in dmp files
74 #if ($db eq $db_entrez) {
75 # ok defined $n->pub_date;
76 # ok defined $n->create_date;
77 # ok defined $n->update_date;
80 # briefly test some Bio::Tree::NodeI methods
81 ok my $ancestor = $n->ancestor;
82 is $ancestor->scientific_name, 'Homo';
83 # unless set explicitly, Bio::Taxon doesn't return anything for
84 # each_Descendent; must ask the database directly
85 ok my @children = $ancestor->db_handle->each_Descendent($ancestor);
88 #sleep(3) if $db eq $db_entrez;
90 ## do some trickier things...
91 ok my $n2 = $db->get_Taxonomy_Node('89593');
92 is $n2->scientific_name, 'Craniata';
94 # briefly check we can use some Tree methods
95 my $tree = Bio::Tree::Tree->new();
96 is $tree->get_lca($n, $n2)->scientific_name, 'Craniata';
99 my @nodes = $tree->get_nodes;
100 is scalar(@nodes), 0;
102 @lineage_nodes = $tree->get_lineage_nodes($n->id); # read ID, only works if nodes have been added to tree
103 is scalar @lineage_nodes, 0;
104 @lineage_nodes = $tree->get_lineage_nodes($n); # node object always works
105 cmp_ok(scalar @lineage_nodes, '>', 20);
108 like($tree->get_lineage_string($n), qr/cellular organisms;Eukaryota/);
109 like($tree->get_lineage_string($n,'-'), qr/cellular organisms-Eukaryota/);
110 like($tree->get_lineage_string($n2), qr/cellular organisms;Eukaryota/);
112 # can we actually form a Tree and use other Tree methods?
113 ok $tree = Bio::Tree::Tree->new(-node => $n);
114 cmp_ok($tree->number_nodes, '>', 20);
115 cmp_ok(scalar($tree->get_nodes), '>', 20);
116 is $tree->find_node(-rank => 'genus')->scientific_name, 'Homo';
118 # check that getting the ancestor still works now we have explitly set the
119 # ancestor by making a Tree
120 is $n->ancestor->scientific_name, 'Homo';
122 ok $n = $db->get_Taxonomy_Node('1760');
123 is $n->scientific_name, 'Actinobacteria (class)';
125 # entrez isn't as good at searching as flatfile, so we have to special-case
126 my @ids = sort $db->get_taxonids('Chloroflexi');
128 is_deeply \@ids, [200795];
131 @ids = sort $db->get_taxonids('chloroflexi');
133 is_deeply \@ids, [200795];
135 # fuzzy match using SQL syntax to match any 'Chloroflexi'
136 @ids = sort $db->get_taxonids('Chloroflexi%');
138 is_deeply \@ids, [200795, 32061];
140 $id = $db->get_taxonids('Chloroflexi (class)');
143 @ids = $db->get_taxonids('Rhodotorula');
145 @ids = $db->get_taxonids('Rhodotorula <Microbotryomycetidae>');
149 # get_lca should work on nodes from different databases
151 test_skip(-tests => 9, -requires_networking => 1);
153 # check that the result is the same as if we are retrieving from the same DB
155 my $h_flat = $db_flatfile->get_taxon(-name => 'Homo');
156 my $h_flat2 = $db_flatfile->get_taxon(-name => 'Homo sapiens');
157 ok my $tree_functions = Bio::Tree::Tree->new();
158 is $tree_functions->get_lca($h_flat, $h_flat2)->scientific_name, 'Homo', 'get_lca() within flatfile db';
162 #eval { $h_entrez = $db_entrez->get_taxon(-name => 'Homo sapiens');};
163 #skip "Unable to connect to entrez database; no network or server busy?", 7 if $@;
165 #eval { $h_entrez2 = $db_entrez->get_taxon(-name => 'Homo');};
166 #skip "Unable to connect to entrez database; no network or server busy?", 7 if $@;
167 #ok $tree_functions = Bio::Tree::Tree->new();
168 #is $tree_functions->get_lca($h_entrez, $h_entrez2)->scientific_name, 'Homo', 'get_lca() within entrez db';
170 #ok $tree_functions = Bio::Tree::Tree->new();
171 # mixing entrez and flatfile
173 # local $TODO = 'Mixing databases for get_lca() not working, see bug #3416';
174 # is $tree_functions->get_lca($h_flat, $h_entrez)->scientific_name, 'Homo', 'get_lca() mixing flatfile and remote db';
176 # even though the species taxa for Homo sapiens from list and flat databases
177 # have the same internal id, get_lca won't work because they have different
178 # roots and descendents
179 #$h_list = $db_list->get_taxon(-name => 'Homo sapiens');
180 #is $h_list->ancestor->internal_id, $h_flat->internal_id;
181 #ok ! $tree_functions->get_lca($h_flat, $h_list);
183 # but we can form a tree with the flat node then remove all the ranks we're
184 # not interested in and try again
185 #$tree = Bio::Tree::Tree->new(-node => $h_flat);
186 #$tree->splice(-keep_rank => \@ranks);
187 #is $tree->get_lca($h_flat, $h_list)->scientific_name, 'Homo';
190 # Some tests carried over from flatfile and others that would be nice to pass
192 # ideas from taxonomy2tree.PLS that let us make nice tree, using
193 # Bio::Tree::TreeFunctionsI methods; this is a weird and trivial example just
194 # because our test flatfile database only has the full lineage of one species
196 for my $name ('Human', 'Hominidae') {
197 my $ncbi_id = $db_flatfile->get_taxonid($name);
199 my $node = $db_flatfile->get_taxon(-taxonid => $ncbi_id);
202 ok $tree->merge_lineage($node);
205 ok $tree = Bio::Tree::Tree->new(-node => $node);
209 is $tree->get_nodes, 30;
210 $tree->contract_linear_paths;
211 my $ids = join(",", map { $_->id } $tree->get_nodes);
212 is $ids, '131567,9606';
215 unlink 'taxonomy.sqlite' if (-e 'taxonomy.sqlite');