Merge pull request #4106 from solgenomics/topic/wishlist
[sgn.git] / bin / load_pub_doi.pl
blobc7a41641312a6340c239f1eda28efa7bcf4c2255
1 #!/usr/bin/perl
3 use Getopt::Std;
4 use CXGN::DB::InsertDBH;
5 use Bio::Chado::Schema;
6 use Data::Dumper;
7 use CXGN::Chado::Publication;
8 use CXGN::Tools::Pubmed;
9 use strict;
10 use warnings;
12 use vars qw | $opt_H $opt_D |;
14 getopts('H:D:');
16 my $dbh = CXGN::DB::InsertDBH->new( {
17 dbhost => $opt_H,
18 dbname => $opt_D,
19 } );
23 my $schema = Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() }, { on_connect_do => ['SET search_path TO public;'] , autocommit => 1 } );
25 print STDERR "finding dbxrefs for all publications\n";
26 my $dbxref_rs = $schema->resultset("General::Dbxref")->search(
28 'db.name' => 'PMID',
29 },
31 join => [ 'db', 'pub_dbxrefs' ] ,
32 "+select" => [ "pub_dbxrefs.pub_id" ],
33 "+as" => [ "pub_id" ],
36 my $pub_count;
37 my $doi_count;
38 while (my $dbxref = $dbxref_rs->next ) {
39 $pub_count++;
40 my $accession = $dbxref->accession;
41 my $pub_id = $dbxref->get_column("pub_id");
42 if (!$pub_id) {
43 warn "No pub_id exists for accession $accession! Skipping ! \n\n";
44 next;
46 my $pub = CXGN::Chado::Publication->new( $dbh, $pub_id );
47 $pub->set_accession($accession);
48 my $pubmed = CXGN::Tools::Pubmed->new($pub);
50 my $eid = $pub->get_eid;
51 my $title = $pub->get_title;
52 if ( $eid ) {
53 $doi_count++;
54 print STDERR "Found DOI $eid\n";
55 my $db = $schema->resultset("General::Db")->find_or_create(
57 name => 'DOI',
58 urlprefix => 'http://',
59 url => 'doi.org',
60 } );
62 my $e_dbxref = $db->find_or_create_related("dbxrefs" , { accession => $eid } );
63 $e_dbxref->find_or_create_related("pub_dbxrefs", { pub_id => $pub_id } ) ;
65 print STDERR "Loaded DOI $eid for publication $pub_id (accession $accession) dbxref id = " . $e_dbxref->get_column('dbxref_id') . " **\n";
66 } else {
67 print STDERR "no DOI for pub_id $pub_id , pubmed accession = $accession\n";
70 $schema->txn_commit;
71 print STDERR "\nDONE. Found $pub_count publications, $doi_count DOIs \n\n";