Merge pull request #42 from solgenomics/topic/duplicate_image_warning
[cxgn-corelibs.git] / lib / CXGN / Tools / Pubmed.pm
blobb6bdc341ba83cfcb1ed8b7191e008fe898affa11
1 package CXGN::Tools::Pubmed;
2 use strict;
3 use warnings;
4 use XML::Twig;
5 use CXGN::Chado::Publication;
6 use CXGN::Chado::Pubauthor;
8 =head1 CXGN::Tools::Pubmed
10 get data from pubmed and parse the fields that should be loaded in Chado schema
13 =head2
16 =head1 Author
18 Naama Menda
20 =cut
22 =head2 new
24 Usage: my $pubmed = CXGN::Tools::Pubmed->new($publication_obj);
25 Desc:
26 Ret:
27 Args: $publication_object
28 Side Effects:
29 Example:
31 =cut
33 our $pub_object=undef;
35 sub new {
36 my $class = shift;
37 $pub_object= shift;
39 my $args = {};
40 my $self = bless $args, $class;
43 $self->set_pub_object($pub_object);
45 my $accession= $pub_object->get_accession();
46 if ($accession) {
47 $self->fetch($accession);
49 return $self;
53 sub fetch {
54 my $self=shift;
55 my $accession=shift;
56 my $pub_xml = `wget "eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$accession&rettype=xml&retmode=text" -O - `;
58 eval {
59 my $twig=XML::Twig->new(
60 twig_roots =>
62 'Article/ArticleTitle' => \&title,
63 'JournalIssue/Volume' => \&volume,
64 'JournalIssue/Issue' => \&issue,
65 'DateCompleted/Year' => \&pyear,
66 'PubDate/Year' => \&pyear,
67 'Pagination/MedlinePgn' => \&pages,
68 'Journal/Title' => \&journal_name,
69 'PublicationTypeList/PublicationType' => \&pub_type,
70 'Abstract/AbstractText' => \&abstract,
71 Author => \&author,
72 ELocationID => \&e_id,
74 twig_handlers =>
76 # AbstractText => \&abstract,
79 pretty_print => 'indented', # output will be nicely formatted
80 );
82 $twig->parse($pub_xml ); # build it
85 my $uniquename= $accession . ":" . $self->get_pub_object->get_title();
86 if (!$pub_object->get_uniquename() ) { $pub_object->set_uniquename($uniquename); }
87 #$pub_object->set_db_name('PMID');
88 if (!$pub_object->get_cvterm_name) {
89 $pub_object->set_cvterm_name('journal');
90 $pub_object->d("!No cvtermname found for publication $accession. Setting to default 'journal'! ");
93 if($@) {
94 my $message= "Error in transaction or NCBI server seems to be down. Please check your input for accession $accession or try again later.\n $@";
95 $pub_object->set_message($message);
96 }else { return undef ; }
99 =head2 get_pub_object
101 Usage:
102 Desc:
103 Ret:
104 Args:
105 Side Effects:
106 Example:
108 =cut
110 sub get_pub_object {
111 my $self=shift;
112 return $self->{pub_object};
116 =head2 set_pub_object
118 Usage:
119 Desc:
120 Ret:
121 Args:
122 Side Effects:
123 Example:
125 =cut
127 sub set_pub_object {
128 my $self=shift;
129 $self->{pub_object}=shift;
132 =head2 get_title
134 Usage:
135 Desc: get the title of the article
136 Ret:
137 Args:
138 Side Effects:
139 Example:
141 =cut
143 sub get_title {
144 my $self=shift;
145 return $self->{articleTitle};
148 =head2 title
150 Usage:
151 Desc: set the title of the article
152 Ret:
153 Args:
154 Side Effects:
155 Example:
157 =cut
159 sub title {
161 my ($twig, $elt)= @_;
162 $pub_object->set_title($elt->text) ;
163 $twig->purge;
167 =head2 volume
169 Usage:
170 Desc:
171 Ret:
172 Args:
173 Side Effects:
174 Example:
176 =cut
178 sub volume {
179 my ($twig, $elt)= @_;
180 $pub_object->set_volume($elt->text) ;
181 $twig->purge;
185 =head2 get_issue
187 Usage:
188 Desc:
189 Ret:
190 Args:
191 Side Effects:
192 Example:
194 =cut
196 sub get_issue {
197 my $self=shift;
198 return $self->{issue};
202 =head2 issue
204 Usage:
205 Desc:
206 Ret:
207 Args:
208 Side Effects:
209 Example:
211 =cut
213 sub issue {
214 my ($twig, $elt)= @_;
215 $pub_object->set_issue($elt->text) ;
217 $twig->purge;
220 =head2 get_pyear
222 Usage:
223 Desc:
224 Ret:
225 Args:
226 Side Effects:
227 Example:
229 =cut
231 sub get_pyear {
232 my $self=shift;
233 return $self->{pyear};
237 =head2 pyear
239 Usage:
240 Desc:
241 Ret:
242 Args:
243 Side Effects:
244 Example:
246 =cut
248 sub pyear {
249 my ($twig, $elt)= @_;
250 my $pyear = $elt->text;
251 $pub_object->set_pyear($pyear) if !$pub_object->get_pyear() ;
252 #$pub_object->d("DEBUG:pyear is '$pyear'!!!!");
253 $twig->purge;
256 =head2 get_pages
258 Usage:
259 Desc:
260 Ret:
261 Args:
262 Side Effects:
263 Example:
265 =cut
267 sub get_pages {
268 my $self=shift;
269 return $self->{pages};
273 =head2 pages
275 Usage:
276 Desc:
277 Ret:
278 Args:
279 Side Effects:
280 Example:
282 =cut
284 sub pages {
285 my ($twig, $elt)= @_;
286 $pub_object->set_pages($elt->text) ;
288 $twig->purge;
294 =head2 get_journal_name
296 Usage:
297 Desc:
298 Ret:
299 Args:
300 Side Effects:
301 Example:
303 =cut
305 sub get_journal_name {
306 my $self=shift;
307 return $self->{journal_name};
311 =head2 journal_name
313 Usage:
314 Desc:
315 Ret:
316 Args:
317 Side Effects:
318 Example:
320 =cut
322 sub journal_name {
323 my ($twig, $elt)= @_;
324 $pub_object->set_series_name($elt->text) ;
326 $twig->purge;
329 =head2 get_pub_type
331 Usage:
332 Desc:
333 Ret:
334 Args:
335 Side Effects:
336 Example:
338 =cut
340 sub get_pub_type {
341 my $self=shift;
342 return $self->{pub_type};
347 =head2 pub_type
349 Usage:
350 Desc:
351 Ret:
352 Args:
353 Side Effects:
354 Example:
356 =cut
358 sub pub_type {
359 my ($twig, $elt)= @_;
360 my $pub_type= $elt->text;
361 if ($pub_type =~ m/Journal/i) {
362 $pub_type = 'journal' ;
363 $pub_object->set_cvterm_name($pub_type) ;
364 $pub_object->d("DEBUG:found pub_type '$pub_type' . Setting cvterm_name to 'journal' !!\n");
366 }else {
367 #$pub_object->d("DEBUG:pub_type $pub_type is not a recognizeable cvterm!!\n");
369 $twig->purge;
373 =head2 get_abstract
375 Usage:
376 Desc:
377 Ret:
378 Args:
379 Side Effects:
380 Example:
382 =cut
384 sub get_abstract {
385 my $self=shift;
386 return $self->{abstract};
390 =head2 abstract
392 Usage:
393 Desc:
394 Ret:
395 Args:
396 Side Effects:
397 Example:
399 =cut
401 sub abstract {
402 my ($twig, $elt)= @_;
403 $pub_object->set_abstract($elt->text) ;
405 $twig->purge;
410 =head2 author
412 Usage:
413 Desc:
414 Ret:
415 Args:
416 Side Effects:
417 Example:
419 =cut
421 sub author {
422 my ($twig, $elt)= @_;
424 my $lastname=$elt->children_text('LastName');
425 my $initials=$elt->children_text('Initials'); #sometimes the firstname has no initials but full first name 'ForName'..
427 if (!$initials) { $initials=$elt->children_text('ForeName') || $elt->children_text('FirstName') ; }
430 my $author_data= $lastname ."|" . $initials ; #.",".$initials ;
432 $pub_object->add_author($author_data) ;
433 #$rank ++;
434 $twig->purge;
437 =head2 e_id
439 Usage:
440 Desc: Store DOI
441 Ret:
442 Args:
443 Side Effects: sets eid for the publication object
444 Example:
446 =cut
448 sub e_id {
449 my ($twig, $elt)= @_;
450 $pub_object->set_eid($elt->text) ;
451 $twig->purge;