5 bp_taxonomy2tree - Building a taxonomic tree based on the full lineages of a set of species names
9 This scripts looks up the provided species names in the NCBI Taxonomy database,
10 retrieves their full lineage and puts them in a Newick taxonomic tree displayed
13 bp_taxonomy2tree.pl -s Orangutan -s Gorilla -s Chimpanzee -s Human
14 bp_taxonomy2tree.pl -s Orangutan -s Gorilla -s Chimpanzee -s "Homo Sapiens"
16 Can also provide -d to specify the directory to store index files in, -o to
17 specify the location of your NCBI nodes file, and -a for the NCBI names file.
18 Or the option -e to use the web-based Entrez taxonomy database if you do not
19 have the NCBI flatfiles installed.
21 This script requires that the bioperl-run pkg be also installed.
23 Providing the nodes.dmp and names.dmp files from the NCBI Taxonomy
24 dump (see Bio::DB::Taxonomy::flatfile for more info) is only necessary
25 on the first time running. This will create the local indexes and may
26 take quite a long time. However once created, these indexes will
27 allow fast access for species to taxon id OR taxon id to species name
30 =head1 AUTHOR - Gabriel Valiente, reimplemented by Sendu Bala
32 Email valiente@lsi.upc.edu
39 use Bio
::DB
::Taxonomy
;
41 use Bio
::Tree
::Compatible
;
45 my $index_dir = "./db/";
46 my $nodesfile = "nodes.dmp";
47 my $namesfile = "names.dmp";
50 # the input to the script is an array of species names
51 GetOptions
( 's|species=s' => \
@species,
52 'd|dir:s' => \
$index_dir,
53 'o|nodesfile:s' => \
$nodesfile,
54 'a|namesfile:s' => \
$namesfile,
55 'e|entrez' => \
$use_entrez,
56 'h|help' => sub { system('perldoc', $0); exit }, );
58 my $db = Bio
::DB
::Taxonomy
->new( -source
=> $use_entrez ?
'entrez' : 'flatfile',
59 -directory
=> $index_dir,
60 -nodesfile
=> $nodesfile,
61 -namesfile
=> $namesfile );
63 # the full lineages of the species are merged into a single tree
65 for my $name (@species) {
66 my $node = $db->get_taxon(-name
=> $name);
69 $tree->merge_lineage($node);
72 $tree = Bio
::Tree
::Tree
->new(-node
=> $node);
76 warn "no NCBI Taxonomy node for species ",$name,"\n";
80 # simple paths are contracted by removing degree one nodes
81 $tree->contract_linear_paths;
83 # convert tree ids to their names for nice output with TreeIO
84 foreach my $node ($tree->get_nodes) {
85 $node->id($node->node_name);
88 # the tree is output in Newick format
89 my $output = Bio
::TreeIO
->new(-format
=> 'newick');
90 $output->write_tree($tree);