make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / find_and_load_longitude_latitude.pl
blob019adf4fa202c66a69838ba085e0fec3abaa41e8
1 #!/usr/bin/perl
3 =head1
5 find_and_load_longitude_latitude.pl - find long/lat, altitude, country_name, country_code, and program for locations in cxgn databases. optionally can directly update or create the found props in the database
7 =head1 SYNOPSIS
9 find_and_load_longitude_latitude.pl -H localhost -D cxgn -O outfile.tsv -s
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H localhost
14 -D database
15 -O outfile.csv
16 -s save found props that don't have existing values in the database
18 =head1 DESCRIPTION
21 =head1 AUTHOR
23 Nicolas Morales nm529@cornell.edu
25 =cut
27 use strict;
29 use Getopt::Std;
30 use Data::Dumper;
31 use Carp qw /croak/ ;
32 use Pod::Usage;
33 use Spreadsheet::ParseExcel;
34 use Bio::Chado::Schema;
35 use CXGN::DB::InsertDBH;
36 use CXGN::BreedersToolbox::Projects;
37 use CXGN::Location;
38 use LWP::UserAgent;
39 use Encode qw( encode );
40 use JSON;
41 use SGN::Model::Cvterm;
43 our ($opt_H, $opt_D, $opt_O, $opt_s);
45 getopts('H:D:O:s');
47 if (!$opt_H || !$opt_D || !$opt_O) {
48 pod2usage(-verbose => 2, -message => "Must provide options -H, -D, and -O \n");
51 my $dbhost = $opt_H;
52 my $dbname = $opt_D;
54 my $dbh = CXGN::DB::InsertDBH->new({
55 dbhost=>$dbhost,
56 dbname=>$dbname,
57 dbargs => {AutoCommit => 1, RaiseError => 1}
58 });
60 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
61 $dbh->do('SET search_path TO public,sgn');
63 open(my $F, ">", $opt_O) || die "Can't open file ".$opt_O;
65 # print STDERR "LocationName\tLongitudeInDB\tLatitudeInDB\tAltitudeInDB\tCountry\tCountryCode\tProgram\tFoundLongitude\tFoundLatitude\tFoundAltitude\tFoundCountry\tFoundCountryCode\tFoundProgram\n";
67 print $F "LocationName\tLongitudeInDB\tLatitudeInDB\tAltitudeInDB\tCountry\tCountryCode\tProgram\tFoundLongitude\tFoundLatitude\tFoundAltitude\tFoundCountry\tFoundCountryCode\tFoundProgram\n";
69 my $program_trial_relationship_id = SGN::Model::Cvterm->get_cvterm_row($schema, "breeding_program_trial_relationship", "project_relationship")->cvterm_id();
70 my $project_location_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, "project location", "project_property")->cvterm_id();
72 my $project_object = CXGN::BreedersToolbox::Projects->new( { schema => $schema });
73 my $all_locations = $project_object->get_location_geojson_data();
75 foreach my $location_hash (@$all_locations) {
77 my $location = $location_hash->{'properties'};
78 my $name = $location->{'Name'};
79 my $id = $location->{'Id'};
80 my ($longitude, $latitude, $altitude, $country_name, $country_code, $program);
81 my ($found_latitude, $found_longitude, $found_altitude, $found_country_name, $found_country_code, $found_program);
83 #retrieve coordinates from name
84 $name =~ s/\s/+/g;
85 my $server_endpoint1 = "http://maps.googleapis.com/maps/api/geocode/json?address=$name";
86 my $req = HTTP::Request->new(GET => $server_endpoint1);
87 $req->header('content-type' => 'application/json');
88 my $ua = LWP::UserAgent->new;
89 my $resp = $ua->request($req);
90 if ($resp->is_success) {
91 my $message = $resp->decoded_content;
92 my $json_utf8 = encode('UTF-8', $message);
93 my $message_hash = decode_json $json_utf8;
94 my $location = $message_hash->{'results'}->[0]->{'geometry'}->{'location'};
95 $found_latitude = $location->{'lat'};
96 $found_longitude = $location->{'lng'};
97 $latitude = $location_hash->{'properties'}->{'Latitude'} || $found_latitude;
98 $longitude = $location_hash->{'properties'}->{'Longitude'} || $found_longitude;
101 #retrieve altitude from coordinates
102 my $server_endpoint2 = "http://www.datasciencetoolkit.org/coordinates2statistics/$latitude%2c$longitude?statistics=elevation";
103 $req = HTTP::Request->new(GET => $server_endpoint2);
104 $req->header('content-type' => 'application/json');
105 $ua = LWP::UserAgent->new;
106 $resp = $ua->request($req);
107 if ($resp->is_success) {
108 my $message = $resp->decoded_content;
109 my $json_utf8 = encode('UTF-8', $message);
110 my $message_hash = decode_json $json_utf8;
111 $found_altitude = $message_hash->[0]->{'statistics'}->{'elevation'}->{'value'};
112 $altitude = $location->{'Altitude'} || $found_altitude;
115 #retrieve country code and name from coordinates
116 my $server_endpoint3 = "http://www.datasciencetoolkit.org/coordinates2politics/$latitude%2c$longitude?";
117 $req = HTTP::Request->new(GET => $server_endpoint3);
118 $req->header('content-type' => 'application/json');
119 $ua = LWP::UserAgent->new;
120 $resp = $ua->request($req);
121 if ($resp->is_success) {
122 my $message = $resp->decoded_content;
123 my $json_utf8 = encode('UTF-8', $message);
124 my $message_hash = decode_json $json_utf8;
125 $found_country_name = $message_hash->[0]->{'politics'}->[0]->{'name'};
126 $found_country_code = uc($message_hash->[0]->{'politics'}->[0]->{'code'});
127 $country_name = $location->{'Country'} || $found_country_name;
128 $country_code = $location->{'Code'} || $found_country_code;
132 #retrieve breeding program from associated trials
133 my $program_query = "SELECT geo.nd_geolocation_id,
134 breeding_program.name,
135 count(distinct(projectprop.project_id))
136 FROM nd_geolocation AS geo
137 LEFT JOIN projectprop ON (projectprop.value::INT = geo.nd_geolocation_id AND projectprop.type_id=?)
138 LEFT JOIN project AS trial ON (trial.project_id=projectprop.project_id)
139 LEFT JOIN project_relationship ON (subject_project_id=trial.project_id AND project_relationship.type_id =?)
140 LEFT JOIN project breeding_program ON (breeding_program.project_id=object_project_id)
141 WHERE nd_geolocation_id =?
142 GROUP BY 1,2
143 ORDER BY 3
144 LIMIT 1";
145 my $prepared_query=$dbh->prepare($program_query);
146 $prepared_query->execute($project_location_cvterm_id, $program_trial_relationship_id, $id);
147 my ($geo_id, $found_program, $count) = $prepared_query->fetchrow_array();
148 $program = $location->{'Program'} || $found_program;
151 # print STDERR "$name saved props:\t".$location->{'Longitude'}."\t".$location->{'Latitude'}."\t".$location->{'Altitude'}."\t".$location->{'Country'}."\t".$location->{'Code'}."\t".$location->{'Program'}."\n$name found props:\t".$found_longitude."\t".$found_latitude."\t".$found_altitude."\t".$found_country_name."\t".$found_country_code."\t".$found_program."\n";
153 print $F $name."\t".$location->{'Longitude'}."\t".$location->{'Latitude'}."\t".$location->{'Altitude'}."\t".$location->{'Country'}."\t".$location->{'Code'}."\t".$location->{'Program'}."\t".$found_longitude."\t".$found_latitude."\t".$found_altitude."\t".$found_country_name."\t".$found_country_code."\t".$found_program."\n";
155 if ($opt_s){
157 print STDERR "Updating $name with properties:
158 country_name => $country_name,
159 country_code => $country_code,
160 breeding_program => $program,
161 latitude => $latitude,
162 longitude => $longitude,
163 altitude => $altitude \n";
165 my $updated_location = CXGN::Location->new( {
166 bcs_schema => $schema,
167 nd_geolocation_id => $id,
168 country_name => $country_name,
169 country_code => $country_code,
170 breeding_program => $program,
171 latitude => $latitude,
172 longitude => $longitude,
173 altitude => $altitude,
176 my $store = $updated_location->store_location();
178 if ($store->{'error'}) {
179 print STDERR $store->{'error'}."\n";
185 close($F);