5 load_locations_extended.pl - loading locations into cxgn databases.
9 load_locations_extended.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
19 This script loads locations data into Chado, by adding data to nd_geolocation table, and properties to nd_geolocationprop. Infile is Excel .xls and .xlsx format.
20 Header must have in the first 4 columns: 'Name', 'Longitude', 'Latitude', 'Altitude'.
21 Header columns from colum 5 and onwards are stored as cvterms that are part of the cv 'geolocation_property'.
26 Nicolas Morales (nm529@cornell.edu)
36 use Spreadsheet
::ParseExcel
;
37 use Spreadsheet
::ParseXLSX
;
38 use Bio
::Chado
::Schema
;
39 use CXGN
::DB
::InsertDBH
;
41 our ($opt_H, $opt_D, $opt_i);
45 if (!$opt_H || !$opt_D || !$opt_i) {
46 pod2usage
(-verbose
=> 2, -message
=> "Must provide options -H (hostname), -D (database name), -i (input file) \n");
52 # Match a dot, extension .xls / .xlsx
53 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
56 if ($extension eq '.xlsx') {
57 $parser = Spreadsheet
::ParseXLSX
->new();
60 $parser = Spreadsheet
::ParseExcel
->new();
63 my $excel_obj = $parser->parse($opt_i);
65 my $dbh = CXGN
::DB
::InsertDBH
->new({
68 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
71 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
72 $dbh->do('SET search_path TO public,sgn');
75 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
76 my ( $row_min, $row_max ) = $worksheet->row_range();
77 my ( $col_min, $col_max ) = $worksheet->col_range();
79 if ($col_max ne '11' || $worksheet->get_cell(0,0)->value() ne 'Full Name' || $worksheet->get_cell(0,1)->value() ne 'Longitude' || $worksheet->get_cell(0,2)->value() ne 'Latitude' || $worksheet->get_cell(0,3)->value() ne 'Altitude' || $worksheet->get_cell(0,4)->value() ne 'Uniquename' || $worksheet->get_cell(0,5)->value() ne 'Agricultural Ecological Zone'|| $worksheet->get_cell(0,6)->value() ne 'Continent' || $worksheet->get_cell(0,7)->value() ne 'Country' || $worksheet->get_cell(0,8)->value() ne 'Country Code' || $worksheet->get_cell(0,9)->value() ne 'adm1' || $worksheet->get_cell(0,10)->value() ne 'adm2' || $worksheet->get_cell(0,11)->value() ne 'adm3') {
80 pod2usage
(-verbose
=> 2, -message
=> "Headers must be only in this order: Full Name, Longitude, Latitude, Altitude, Uniquename, Agricultural Ecological Zone, Continent, Country, Country Code, adm1, adm2, adm3\n");
84 for my $row ( 1 .. $row_max ) {
86 my $name = $worksheet->get_cell($row,0)->value();
87 my $longitude = $worksheet->get_cell($row,1)->value();
88 my $latitude = $worksheet->get_cell($row,2)->value();
89 my $altitude = $worksheet->get_cell($row,3)->value();
92 $new_row = $schema->resultset('NaturalDiversity::NdGeolocation')->new({ description
=> $name });
94 $new_row->longitude($longitude);
97 $new_row->latitude($latitude);
100 $new_row->altitude($altitude);
104 print STDERR
"Stored: ".$name." in nd_geolocation.\n";
106 for my $col ( 4 .. $col_max ) {
108 my $prop_term = $worksheet->get_cell(0, $col)->value();
109 my $prop_value = $worksheet->get_cell($row,$col)->value();
111 my $prop_cvterm = $schema->resultset("Cv::Cvterm")->create_with({
113 cv
=> 'geolocation_property',
116 my $new_prop = $schema->resultset('NaturalDiversity::NdGeolocationprop')->create({
117 nd_geolocation_id
=> $new_row->nd_geolocation_id(),
118 type_id
=> $prop_cvterm->cvterm_id(),
119 value
=> $prop_value,
122 print STDERR
"Stored: ".$prop_value." in nd_geolocationprop.\n";
127 print STDERR
"Script Complete.\n";