make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / load_locations.pl
blob94ed21677cbb7cf2d3db22ef297fd90e18df426b
1 #!/usr/bin/perl
3 =head1
5 load_locations.pl - loading locations into cxgn databases.
7 =head1 SYNOPSIS
9 load_locations.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script loads locations data into Chado, by adding data to nd_geolocation table. Infile is Excel .xls and .xlsx format.
20 Header is in this order: 'Full Name', 'Longitude', 'Latitude', 'Altitude'
22 =head1 AUTHOR
24 Nicolas Morales (nm529@cornell.edu)
26 =cut
28 use strict;
30 use Getopt::Std;
31 use Data::Dumper;
32 use Carp qw /croak/ ;
33 use Pod::Usage;
34 use Spreadsheet::ParseExcel;
35 use Spreadsheet::ParseXLSX;
36 use Bio::Chado::Schema;
37 use CXGN::DB::InsertDBH;
39 our ($opt_H, $opt_D, $opt_i);
41 getopts('H:D:i:');
43 if (!$opt_H || !$opt_D || !$opt_i) {
44 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
47 my $dbhost = $opt_H;
48 my $dbname = $opt_D;
50 # Match a dot, extension .xls / .xlsx
51 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
52 my $parser;
54 if ($extension eq '.xlsx') {
55 $parser = Spreadsheet::ParseXLSX->new();
57 else {
58 $parser = Spreadsheet::ParseExcel->new();
61 my $excel_obj = $parser->parse($opt_i);
63 my $dbh = CXGN::DB::InsertDBH->new({
64 dbhost=>$dbhost,
65 dbname=>$dbname,
66 dbargs => {AutoCommit => 1, RaiseError => 1}
67 });
69 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
70 $dbh->do('SET search_path TO public,sgn');
73 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
74 my ( $row_min, $row_max ) = $worksheet->row_range();
75 my ( $col_min, $col_max ) = $worksheet->col_range();
77 if ($col_max ne '3' || $worksheet->get_cell(0,0)->value() ne 'Full Name' || $worksheet->get_cell(0,1)->value() ne 'Longitude' || $worksheet->get_cell(0,2)->value() ne 'Latitude' || $worksheet->get_cell(0,3)->value() ne 'Altitude') {
78 pod2usage(-verbose => 2, -message => "Headers must be only in this order: Full Name, Longitude, Latitude, Altitude\n");
82 for my $row ( 1 .. $row_max ) {
84 my $name = $worksheet->get_cell($row,0)->value();
85 my $longitude = $worksheet->get_cell($row,1)->value();
86 my $latitude = $worksheet->get_cell($row,2)->value();
87 my $altitude = $worksheet->get_cell($row,3)->value();
89 my $new_row;
90 $new_row = $schema->resultset('NaturalDiversity::NdGeolocation')->new({ description => $name });
91 if ($longitude) {
92 $new_row->longitude($longitude);
94 if ($latitude) {
95 $new_row->latitude($latitude);
97 if ($altitude) {
98 $new_row->altitude($altitude);
100 $new_row->insert();
102 print STDERR "Stored: ".$name."\n";
106 print STDERR "Script Complete.\n";