6 use JSON
qw( decode_json );
11 my $dbh = DBI
->connect("dbi:SQLite:dbname=giga.authors.sqlite","","",{RaiseError
=> 0,PrintError
=> 1,AutoCommit
=> 0}) or die $DBI::errstr
;
12 $dbh->do("CREATE TABLE PubDat (DOI TEXT, Title TEXT, Type TEXT, Authors TEXT, RefList TEXT)") or die $dbh->errstr;
13 # CREATE TABLE IF NOT EXISTS PubDat ? But we have not check previous got entries.
15 my $sthi = $dbh->prepare( "INSERT INTO PubDat ( DOI,Title,Type,Authors,RefList ) VALUES ( ?,?,?,?,? )" );
17 my $ua = LWP
::UserAgent
->new;
18 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:58.0) Gecko/20100101 Firefox/58.0");
23 if ($filename=~/.xz$/) {
24 open( $infile,"-|","xz -dc $filename") or die "Error opening $filename: $!\n";
25 } elsif ($filename=~/.gz$/) {
26 open( $infile,"-|","gzip -dc $filename") or die "Error opening $filename: $!\n";
27 } elsif ($filename=~/.bz2$/) {
28 open( $infile,"-|","bzip2 -dc $filename") or die "Error opening $filename: $!\n";
29 } else {open( $infile,"<",$filename) or die "Error opening $filename: $!\n";}
35 my ($tocSections,$reflist,$jsonData)=('','');
36 my @lines = split(/^/m,$cnt);
37 for (my $i=0;$i<=$#lines;$i++) {
38 if ($lines[$i] =~ /<script type="application\/ld\
+json
">/) {
39 $jsonData = $lines[$i+1];
41 if ($lines[$i] =~ /<div class="ref-list
">/) {
42 $reflist = $lines[$i];
45 if ($lines[$i] =~ /Issue Section:/) {
46 $lines[$i+1] =~ />([^<>]+)<\/a>/;
51 my $decoded_json = decode_json( $jsonData );
52 #print ">>>$tocSections<<<\n";
53 #print Dumper $decoded_json;
54 return [$tocSections,$decoded_json,$reflist];
59 my $req = HTTP::Request->new(GET => $URL);
61 for my $i (1 .. $times) {
62 my $res = $ua->request($req);
63 if ($res->is_success) {
64 $ret = getthings($res->content);
67 print $res->status_line, " <<<--- $i of
$times\n";
68 $ret = ["\n",$res->status_line];
76 my $fh = openfile('giga.tsv.bz2');
77 open O,'>','giga.authors.ini' || die("[x
]Cannot Open Output File
.");
83 print join(" | ",@dat[0,16]),"\n";
84 unless ($dat[16] =~ /\//) {
85 print O "[$dat[16]]\nTitle
=\"$dat[0]\"\nType
=\"Missing
or Misformatted DOI
!\"\n\n";
88 my $url = 'https://academic.oup.com/gigascience/article-lookup/doi/' . $dat[16];
89 my $ret=fetchURL($url,5);
91 if ($ret->[0] eq '') {
92 print O "[$dat[16]]\nTitle
=\"$dat[0]\"\nType
=\"Wrong DOI
!\"\n\n";
94 } elsif ($ret->[0] eq "\n") {
95 print O "[$dat[16]]\nTitle
=\"$dat[0]\"\nType
=\"Error
: $ret->[1]\"\n\n";
98 print O "[$dat[16]]\nTitle
=\"$dat[0]\"\nType
=\"$ret->[0]\"\nAuthors
={\n";
100 my $authors = ${$ret->[1]}{'author'};
104 print O join('"',"\t",$_->{'name
'},"=",$_->{'affiliation
'},"\n");
105 $AuthorStr .= join('"','',$_->{'name'},"=",$_->{'affiliation'},"\n");
107 print O "}\nRefList
=\
{$ret->[2]\
}\n\n";
109 $sthi->execute($dat[16],$dat[0],$ret->[0],$AuthorStr,$ret->[2]) or die $sthi->errstr;
119 [10.1186/s13742-015-0066-5]
120 Title="The ocean sampling day consortium
"
123 "Kopf
, Anna
"="1 Max Planck Institute
for Marine Microbiology
, Celsiusstrasse
1, D
-28359Bremen
, Germany
2 Jacobs University Bremen gGmbH
, Campus Ring
1, D
-28759 Bremen
, Germany
"
124 "Bicak
, Mesude
"="3 University of Oxford
, 7 Keble Road
, OX1
3QG Oxford
, Oxfordshire
, UK
"
125 "Kottmann
, Renzo
"="1 Max Planck Institute
for Marine Microbiology
, Celsiusstrasse
1, D
-28359Bremen
, Germany
"
126 "Schnetzer
, Julia
"="1 Max Planck Institute
for Marine Microbiology
, Celsiusstrasse
1, D
-28359Bremen
, Germany
2 Jacobs University Bremen gGmbH
, Campus Ring
1, D
-28759 Bremen
, Germany
"
127 "Øvreås
, Lise
"="26 Department of Biology
, University of Bergen
, Thormøhlensgate
53 B
, 5020 Bergen
, Norway
"
128 "Glöckner
, Frank Oliver
"="1 Max Planck Institute
for Marine Microbiology
, Celsiusstrasse
1, D
-28359Bremen
, Germany
2 Jacobs University Bremen gGmbH
, Campus Ring
1, D
-28759 Bremen
, Germany
"