v1.7.3
[bioperl-live.git] / bin / bp_taxid4species
blob1abe992336a7b5e15eadbbd020044ed7bae141c0
1 #!/usr/bin/perl
2 # Author: Jason Stajich <jason@bioperl.org>
3 # Purpose: Retrieve the NCBI Taxa ID for organism(s)
5 # TODO: add rest of POD
8 use LWP::UserAgent;
9 use XML::Twig;
10 use strict;
11 use warnings;
12 use Getopt::Long;
13 use Data::Dumper;
14 my $verbose = 0;
15 my $plain = 0;
16 my $help = 0;
17 my $USAGE = "taxid4species: [-v] [-p] \"Genus1 species1\" \"Genus2 species2\"";
19 GetOptions('v|verbose' => \$verbose,
20 'p|plain' => \$plain,
21 'h|help' => \$help);
22 die("$USAGE\n") if $help;
24 my $ua = new LWP::UserAgent();
26 my $urlbase = 'https://www.ncbi.nlm.nih.gov/entrez/eutils/';
27 my $esearch = 'esearch.fcgi?db=taxonomy&usehistory=y&term=';
28 my $esummary = 'esummary.fcgi?db=taxonomy&query_key=QUERYKEY&WebEnv=WEBENV';
30 my (@organisms) = @ARGV;
31 die("must provide valid organism") unless @organisms;
32 my $organismstr = join(" OR ", @organisms);
33 $organismstr =~ s/\s/\+/g;
35 # Esearch
36 my $response = $ua->get($urlbase . $esearch . $organismstr);
37 my $t = XML::Twig->new();
38 print $response->content,"\n"if($verbose);
39 $t->parse($response->content);
40 my $root = $t->root;
41 my $querykey = $root->first_child('QueryKey')->text;
42 my $webenv = $root->first_child('WebEnv')->text;
44 # Esummary
45 $esummary =~ s/QUERYKEY/$querykey/;
46 $esummary =~ s/WEBENV/$webenv/;
47 $response = $ua->get($urlbase . $esummary);
48 $t = XML::Twig->new();
49 print $response->content,"\n"if($verbose);
50 $t->parse($response->content);
51 $root = $t->root;
53 # Parse XML
54 my %taxinfo;
55 foreach my $docsum ($root->children) {
56 foreach my $item ($docsum->children('Item')) {
57 if ($item->{att}{Name} eq 'ScientificName') {
58 my $sciname = $item->text;
59 $taxinfo{lc $sciname}{sciname} = $sciname;
60 $taxinfo{lc $sciname}{tid} = $docsum->first_child_text('Id');
61 last;
66 # Output in same order as given on command line
67 foreach my $orgn (@organisms) {
68 if (exists $taxinfo{lc $orgn}) {
69 my $tid = $taxinfo{lc $orgn}{tid};
71 if ($plain) { print $tid, "\n"; }
72 else { print join(", ", "'$orgn'", $tid), "\n"; }
74 else { print "'$orgn' not found\n"; }
79 =head1 NAME
81 bp_taxid4species - simple script which returns the NCBI Taxonomic id for a requested species
83 =head1 SYNOPSIS
85 bp_taxid4species [-v] [-p] [-h] "Genus1 species1" "Genus2 species2"
87 Options:
88 -v verbose
89 -p plain
90 -h help
92 =head1 DESCRIPTION
94 This simple script shows how to get the taxa id from NCBI Entrez and
95 will return a list of taxa ids for requested organisms.
97 =head1 FEEDBACK
99 =head2 Mailing Lists
101 User feedback is an integral part of the evolution of this and other
102 Bioperl modules. Send your comments and suggestions preferably to
103 the Bioperl mailing list. Your participation is much appreciated.
105 bioperl-l@bioperl.org - General discussion
106 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
108 =head2 Reporting Bugs
110 Report bugs to the Bioperl bug tracking system to help us keep track
111 of the bugs and their resolution. Bug reports can be submitted via the
112 web:
114 https://github.com/bioperl/bioperl-live/issues
116 =head1 AUTHOR
118 Jason Stajich jason-at-bioperl-dot-org
120 =cut