clean
[sgn.git] / bin / load_locations.pl
blob98434bd06fcf18f3dcc12f560b602c5e07b6ab06
1 #!/usr/bin/perl
3 =head1
5 load_locations.pl - loading locations into cxgn databases.
7 =head1 SYNOPSIS
9 load_locations.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script loads locations data into Chado, by adding data to nd_geolocation table. Infile is Excel .xls format.
20 Header is in this order: 'Full Name', 'Longitude', 'Latitude', 'Altitude'
22 =head1 AUTHOR
24 Nicolas Morales (nm529@cornell.edu)
26 =cut
28 use strict;
30 use Getopt::Std;
31 use Data::Dumper;
32 use Carp qw /croak/ ;
33 use Pod::Usage;
34 use Spreadsheet::ParseExcel;
35 use Bio::Chado::Schema;
36 use CXGN::DB::InsertDBH;
38 our ($opt_H, $opt_D, $opt_i);
40 getopts('H:D:i:');
42 if (!$opt_H || !$opt_D || !$opt_i) {
43 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
46 my $dbhost = $opt_H;
47 my $dbname = $opt_D;
48 my $parser = Spreadsheet::ParseExcel->new();
49 my $excel_obj = $parser->parse($opt_i);
51 my $dbh = CXGN::DB::InsertDBH->new({
52 dbhost=>$dbhost,
53 dbname=>$dbname,
54 dbargs => {AutoCommit => 1, RaiseError => 1}
55 });
57 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
58 $dbh->do('SET search_path TO public,sgn');
61 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
62 my ( $row_min, $row_max ) = $worksheet->row_range();
63 my ( $col_min, $col_max ) = $worksheet->col_range();
65 if ($col_max ne '3' || $worksheet->get_cell(0,0)->value() ne 'Full Name' || $worksheet->get_cell(0,1)->value() ne 'Longitude' || $worksheet->get_cell(0,2)->value() ne 'Latitude' || $worksheet->get_cell(0,3)->value() ne 'Altitude') {
66 pod2usage(-verbose => 2, -message => "Headers must be only in this order: Full Name, Longitude, Latitude, Altitude\n");
70 for my $row ( 1 .. $row_max ) {
72 my $name = $worksheet->get_cell($row,0)->value();
73 my $longitude = $worksheet->get_cell($row,1)->value();
74 my $latitude = $worksheet->get_cell($row,2)->value();
75 my $altitude = $worksheet->get_cell($row,3)->value();
77 my $new_row;
78 $new_row = $schema->resultset('NaturalDiversity::NdGeolocation')->new({ description => $name });
79 if ($longitude) {
80 $new_row->longitude($longitude);
82 if ($latitude) {
83 $new_row->latitude($latitude);
85 if ($altitude) {
86 $new_row->altitude($altitude);
88 $new_row->insert();
90 print STDERR "Stored: ".$name."\n";
94 print STDERR "Script Complete.\n";