Merge pull request #2803 from solgenomics/topic/fix_all_tests
[sgn.git] / bin / load_locations_extended.pl
blobaf723ef653535e150be1c4a175c417dc2fcf2ec2
1 #!/usr/bin/perl
3 =head1
5 load_locations_extended.pl - loading locations into cxgn databases.
7 =head1 SYNOPSIS
9 load_locations_extended.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script loads locations data into Chado, by adding data to nd_geolocation table, and properties to nd_geolocationprop. Infile is Excel .xls format.
20 Header must have in the first 4 columns: 'Name', 'Longitude', 'Latitude', 'Altitude'.
21 Header columns from colum 5 and onwards are stored as cvterms that are part of the cv 'geolocation_property'.
24 =head1 AUTHOR
26 Nicolas Morales (nm529@cornell.edu)
28 =cut
30 use strict;
32 use Getopt::Std;
33 use Data::Dumper;
34 use Carp qw /croak/ ;
35 use Pod::Usage;
36 use Spreadsheet::ParseExcel;
37 use Bio::Chado::Schema;
38 use CXGN::DB::InsertDBH;
40 our ($opt_H, $opt_D, $opt_i);
42 getopts('H:D:i:');
44 if (!$opt_H || !$opt_D || !$opt_i) {
45 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
48 my $dbhost = $opt_H;
49 my $dbname = $opt_D;
50 my $parser = Spreadsheet::ParseExcel->new();
51 my $excel_obj = $parser->parse($opt_i);
53 my $dbh = CXGN::DB::InsertDBH->new({
54 dbhost=>$dbhost,
55 dbname=>$dbname,
56 dbargs => {AutoCommit => 1, RaiseError => 1}
57 });
59 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
60 $dbh->do('SET search_path TO public,sgn');
63 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
64 my ( $row_min, $row_max ) = $worksheet->row_range();
65 my ( $col_min, $col_max ) = $worksheet->col_range();
67 if ($col_max ne '11' || $worksheet->get_cell(0,0)->value() ne 'Full Name' || $worksheet->get_cell(0,1)->value() ne 'Longitude' || $worksheet->get_cell(0,2)->value() ne 'Latitude' || $worksheet->get_cell(0,3)->value() ne 'Altitude' || $worksheet->get_cell(0,4)->value() ne 'Uniquename' || $worksheet->get_cell(0,5)->value() ne 'Agricultural Ecological Zone'|| $worksheet->get_cell(0,6)->value() ne 'Continent' || $worksheet->get_cell(0,7)->value() ne 'Country' || $worksheet->get_cell(0,8)->value() ne 'Country Code' || $worksheet->get_cell(0,9)->value() ne 'adm1' || $worksheet->get_cell(0,10)->value() ne 'adm2' || $worksheet->get_cell(0,11)->value() ne 'adm3') {
68 pod2usage(-verbose => 2, -message => "Headers must be only in this order: Full Name, Longitude, Latitude, Altitude, Uniquename, Agricultural Ecological Zone, Continent, Country, Country Code, adm1, adm2, adm3\n");
72 for my $row ( 1 .. $row_max ) {
74 my $name = $worksheet->get_cell($row,0)->value();
75 my $longitude = $worksheet->get_cell($row,1)->value();
76 my $latitude = $worksheet->get_cell($row,2)->value();
77 my $altitude = $worksheet->get_cell($row,3)->value();
79 my $new_row;
80 $new_row = $schema->resultset('NaturalDiversity::NdGeolocation')->new({ description => $name });
81 if ($longitude) {
82 $new_row->longitude($longitude);
84 if ($latitude) {
85 $new_row->latitude($latitude);
87 if ($altitude) {
88 $new_row->altitude($altitude);
90 $new_row->insert();
92 print STDERR "Stored: ".$name." in nd_geolocation.\n";
94 for my $col ( 4 .. $col_max ) {
96 my $prop_term = $worksheet->get_cell(0, $col)->value();
97 my $prop_value = $worksheet->get_cell($row,$col)->value();
99 my $prop_cvterm = $schema->resultset("Cv::Cvterm")->create_with({
100 name => $prop_term,
101 cv => 'geolocation_property',
104 my $new_prop = $schema->resultset('NaturalDiversity::NdGeolocationprop')->create({
105 nd_geolocation_id => $new_row->nd_geolocation_id(),
106 type_id => $prop_cvterm->cvterm_id(),
107 value => $prop_value,
110 print STDERR "Stored: ".$prop_value." in nd_geolocationprop.\n";
115 print STDERR "Script Complete.\n";