5 load_locations_extended.pl - loading locations into cxgn databases.
9 load_locations_extended.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
19 This script loads locations data into Chado, by adding data to nd_geolocation table, and properties to nd_geolocationprop. Infile is Excel .xls format.
20 Header must have in the first 4 columns: 'Name', 'Longitude', 'Latitude', 'Altitude'.
21 Header columns from colum 5 and onwards are stored as cvterms that are part of the cv 'geolocation_property'.
26 Nicolas Morales (nm529@cornell.edu)
36 use Spreadsheet
::ParseExcel
;
37 use Bio
::Chado
::Schema
;
38 use CXGN
::DB
::InsertDBH
;
40 our ($opt_H, $opt_D, $opt_i);
44 if (!$opt_H || !$opt_D || !$opt_i) {
45 pod2usage
(-verbose
=> 2, -message
=> "Must provide options -H (hostname), -D (database name), -i (input file) \n");
50 my $parser = Spreadsheet
::ParseExcel
->new();
51 my $excel_obj = $parser->parse($opt_i);
53 my $dbh = CXGN
::DB
::InsertDBH
->new({
56 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
59 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
60 $dbh->do('SET search_path TO public,sgn');
63 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
64 my ( $row_min, $row_max ) = $worksheet->row_range();
65 my ( $col_min, $col_max ) = $worksheet->col_range();
67 if ($col_max ne '11' || $worksheet->get_cell(0,0)->value() ne 'Full Name' || $worksheet->get_cell(0,1)->value() ne 'Longitude' || $worksheet->get_cell(0,2)->value() ne 'Latitude' || $worksheet->get_cell(0,3)->value() ne 'Altitude' || $worksheet->get_cell(0,4)->value() ne 'Uniquename' || $worksheet->get_cell(0,5)->value() ne 'Agricultural Ecological Zone'|| $worksheet->get_cell(0,6)->value() ne 'Continent' || $worksheet->get_cell(0,7)->value() ne 'Country' || $worksheet->get_cell(0,8)->value() ne 'Country Code' || $worksheet->get_cell(0,9)->value() ne 'adm1' || $worksheet->get_cell(0,10)->value() ne 'adm2' || $worksheet->get_cell(0,11)->value() ne 'adm3') {
68 pod2usage
(-verbose
=> 2, -message
=> "Headers must be only in this order: Full Name, Longitude, Latitude, Altitude, Uniquename, Agricultural Ecological Zone, Continent, Country, Country Code, adm1, adm2, adm3\n");
72 for my $row ( 1 .. $row_max ) {
74 my $name = $worksheet->get_cell($row,0)->value();
75 my $longitude = $worksheet->get_cell($row,1)->value();
76 my $latitude = $worksheet->get_cell($row,2)->value();
77 my $altitude = $worksheet->get_cell($row,3)->value();
80 $new_row = $schema->resultset('NaturalDiversity::NdGeolocation')->new({ description
=> $name });
82 $new_row->longitude($longitude);
85 $new_row->latitude($latitude);
88 $new_row->altitude($altitude);
92 print STDERR
"Stored: ".$name." in nd_geolocation.\n";
94 for my $col ( 4 .. $col_max ) {
96 my $prop_term = $worksheet->get_cell(0, $col)->value();
97 my $prop_value = $worksheet->get_cell($row,$col)->value();
99 my $prop_cvterm = $schema->resultset("Cv::Cvterm")->create_with({
101 cv
=> 'geolocation_property',
104 my $new_prop = $schema->resultset('NaturalDiversity::NdGeolocationprop')->create({
105 nd_geolocation_id
=> $new_row->nd_geolocation_id(),
106 type_id
=> $prop_cvterm->cvterm_id(),
107 value
=> $prop_value,
110 print STDERR
"Stored: ".$prop_value." in nd_geolocationprop.\n";
115 print STDERR
"Script Complete.\n";