7 A module using eFetch to access information about NCBI identifiers
11 #Terminal Script, useful for testing:
12 use CXGN::Tools::Entrez;
13 my $eutil = CXGN::Tools::Entrez->new();
14 $eutil->run_terminal();
17 # Fetch stuff within a script!
18 use CXGN::Tools::Entrez;
20 my $eutil = CXGN::Tools::Entrez->new({
27 my $first_abstract = $eutil->next();
31 my $eutil = CXGN::Tools::Entrez->new();
32 my $seq = $eutil->get_sequence("NP_188752", "Protein");
37 C. Carpita <ccarpita@gmail.com>
41 package CXGN
::Tools
::Entrez
;
43 use Class
::MethodMaker
45 scalar => [qw
/ db query format ret_mode
47 fetch_start fetch_size fetch_max count
55 use constant DEBUG
=> $ENV{ENTREZ_DEBUG
};
58 print STDERR
"\nDEBUG MODE\n" if DEBUG
;
64 my $self = bless {}, $class;
66 if($args && (ref($args) ne "HASH")){
67 die "\nArguments must be sent as a hash reference: ...->new( { db => 'Pubmed', ... } )\n";
69 $self->url_base("http://www.ncbi.nlm.nih.gov/entrez/eutils");
70 $self->fetch_start(0);
71 $self->fetch_size(10);
72 $self->fetch_max(100);
73 $self->ret_mode("text");
75 #Default (usually JSON string)
76 #This should be set in subclass or by user
79 while(my($k, $v) = each %$args){
80 unless (__PACKAGE__
->can($k)){
81 die "\nSetting '$k' not recognized\n";
93 $self->esearch($self->url_base() . "/esearch.fcgi?" .
95 "&retmax=" . $self->fetch_max() .
97 "&term=" . $self->query());
99 print "\nEsearch: " . $self->esearch() if DEBUG
;
100 my $result = LWP
::Simple
::get
($self->esearch);
102 my ($count, $query_key, $web_env) = $result =~
103 /<Count>(\d+)<\/Count
>.*<QueryKey
>(\d
+)<\
/QueryKey>.*<WebEnv>(\S+)<\/WebEnv
>/s
;
105 print STDERR
"\nQuery result size: $count" if DEBUG
;
106 print STDERR
"\n$result\n\n" if DEBUG
;
107 $self->count($count);
108 $self->query_key($query_key);
109 $self->web_env($web_env);
110 $self->{queried
} = 1;
115 my $fetch_size = shift;
116 $fetch_size ||= $self->fetch_size();
118 my $fetch_start = shift;
119 my $no_increment = 0;
120 (defined $fetch_start)?
($no_increment = 1):($fetch_start = $self->fetch_start());
122 if($fetch_start >= $self->count()){
123 print STDERR
"\nProvided fetch start exceeds result size";
127 my $efetch = $self->url_base() . "/efetch.fcgi?" .
128 "rettype=" . $self->format() .
130 "&retstart=" . $fetch_start .
131 "&retmax=" . $fetch_size .
132 "&db=" . $self->db() .
133 "&query_key=" . $self->query_key() .
134 "&WebEnv=" . $self->web_env();
136 my $result = LWP
::Simple
::get
($efetch);
137 print STDERR
"\nNo result from fetch" unless($result);
139 #Increment internal counter unless starting point was specified
140 $self->fetch_start($self->fetch_start() + $fetch_size) unless $no_increment;
148 return unless $query;
149 $self->query($query);
152 my $fetch_size = shift;
153 $fetch_size ||= $self->fetch_max();
155 return $self->next($fetch_size);
163 die "\nDatabase (2nd arg) not specified" unless $db;
164 if($db && !($db =~ /(protein)|(nucleotide)/i)){
165 die "\nSecond argument (database) must be 'protein' or 'nucleotide'";
168 $self->format("fasta");
169 my $result = $self->fetch($id, 1);
170 $result =~ s/>.*?\n//s;
176 $self->ask_for_input();
177 print "\nRunning query...\n";
179 $self->terminal_fetch();
184 $self->db(ask_user
("Database", "Protein"));
185 $self->query(ask_user
("Query", "Cytochrome P450"));
186 $self->format(ask_user
("Format", "Fasta"));
191 while(my $result = $self->next()){
193 my $first = $self->fetch_start() - $self->fetch_size() + 1;
194 my $last = $first + $self->fetch_size() - 1;
195 $last = $self->count() if $last > $self->count();
196 print "\nResults $first - $last out of " . $self->count() . "\n";
197 my $press_msg = "Press <return> to fetch next " . $self->fetch_size() . " results...";
198 print "\n" . ("=" x
(length($press_msg))) . "\n";
199 print "$press_msg\n";
201 print "\nFetching...\n";
207 print "$_[0] [$_[1]]: ";
210 if($rc eq "") { $rc = $_[1]; }