Merge branch 'master' into topic/phen_stderr
[sgn.git] / bin / change_accessions_to_crosses.pl
blobe62934a03b8247f14bdf94a514c46696e3bb627b
1 #!/usr/bin/perl
3 =head1
5 change_accessions_to_crosses.pl - a script for changing stocks with type accession to type cross, linking these crosses to female and male parents, nd_experiments and crossing experiments
7 =head1 SYNOPSIS
9 change_accessions_to_crosses.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
17 =head1 DESCRIPTION
18 Use case: Previously, users were not able to use cross stock type in trials because cross stock type was not allowed. As a result, some of the cross names were uploaded as accession stock type. After an improvement by allowing cross stock type in trials, users would like to change those names with accession stock type to cross stock type.
20 This script changes stocks with type accession to type cross, then links each cross to parents,a cross type, an nd_experiment and a project. The infile provided has 5 columns. The first column contains stock uniquenames stored as accession stock type. The second column contains female parent uniquenames. The third column contains male parent uniquenames. The forth column contains cross type info. The fifth column contains crossing experiment names. There is no header on the infile and the infile is .xls and .xlsx.
21 =head1 AUTHOR
23 Titima Tantikanjana <tt15@cornell.edu>
25 =cut
27 use strict;
29 use Getopt::Std;
30 use Data::Dumper;
31 use Carp qw /croak/ ;
32 use Pod::Usage;
33 use Spreadsheet::ParseExcel;
34 use Spreadsheet::ParseXLSX;
35 use Bio::Chado::Schema;
36 use CXGN::DB::InsertDBH;
37 use Try::Tiny;
38 use SGN::Model::Cvterm;
40 our ($opt_H, $opt_D, $opt_i);
42 getopts('H:D:i:');
44 if (!$opt_H || !$opt_D || !$opt_i) {
45 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file)\n");
48 my $dbhost = $opt_H;
49 my $dbname = $opt_D;
51 # Match a dot, extension .xls / .xlsx
52 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
53 my $parser;
55 if ($extension eq '.xlsx') {
56 $parser = Spreadsheet::ParseXLSX->new();
58 else {
59 $parser = Spreadsheet::ParseExcel->new();
62 my $excel_obj = $parser->parse($opt_i);
64 my $dbh = CXGN::DB::InsertDBH->new({
65 dbhost=>$dbhost,
66 dbname=>$dbname,
67 dbargs => {AutoCommit => 1, RaiseError => 1}
68 });
70 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
71 $dbh->do('SET search_path TO public,sgn');
74 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
75 my ( $row_min, $row_max ) = $worksheet->row_range();
76 my ( $col_min, $col_max ) = $worksheet->col_range();
78 my $coderef = sub {
79 my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
80 my $cross_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'cross', 'stock_type')->cvterm_id();
81 my $female_parent_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'female_parent', 'stock_relationship')->cvterm_id();
82 my $male_parent_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'male_parent', 'stock_relationship')->cvterm_id();
83 my $project_location_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'project location', 'project_property')->cvterm_id();
84 my $cross_experiment_cvterm = SGN::Model::Cvterm->get_cvterm_row($schema, 'cross_experiment', 'experiment_type');
86 for my $row ( 0 .. $row_max ) {
88 my $stock_uniquename = $worksheet->get_cell($row,0)->value();
89 $stock_uniquename =~ s/^\s+|\s+$//g;
91 my $female_parent_uniquename = $worksheet->get_cell($row,1)->value();
92 $female_parent_uniquename =~ s/^\s+|\s+$//g;
94 my $male_parent_uniquename = $worksheet->get_cell($row,2)->value();
95 $male_parent_uniquename =~ s/^\s+|\s+$//g;
97 my $cross_type = $worksheet->get_cell($row,3)->value();
98 $cross_type =~ s/^\s+|\s+$//g;
100 my $crossing_experiment_name = $worksheet->get_cell($row,4)->value();
101 $crossing_experiment_name =~ s/^\s+|\s+$//g;
103 my $stock_rs = $schema->resultset('Stock::Stock')->find({ uniquename => $stock_uniquename, type_id => $accession_cvterm_id });
104 my $female_rs = $schema->resultset('Stock::Stock')->find({ uniquename => $female_parent_uniquename, type_id => $accession_cvterm_id });
105 my $male_rs = $schema->resultset('Stock::Stock')->find({ uniquename => $male_parent_uniquename, type_id => $accession_cvterm_id });
106 my $crossing_experiment_rs = $schema->resultset("Project::Project")->find( { name => $crossing_experiment_name });
108 if (!$stock_rs) {
109 print STDERR "Error! Stock with uniquename $stock_uniquename was not found in the database.\n";
110 next();
112 if (!$female_rs) {
113 print STDERR "Error! Female parent with uniquename $female_parent_uniquename was not found in the database.\n";
114 next();
116 if (!$male_rs) {
117 print STDERR "Error! Male parent with uniquename $male_parent_uniquename was not found in the database.\n";
118 next();
120 if (!$crossing_experiment_rs) {
121 print STDERR "Error! Crossing experiment: $crossing_experiment_name was not found in the database.\n";
122 next();
125 my $crossing_experiment_id = $crossing_experiment_rs->project_id();
126 my $geolocation_rs = $schema->resultset("Project::Projectprop")->find({project_id => $crossing_experiment_id, type_id => $project_location_cvterm_id});
128 #update stock type
129 my $cross_stock_rs = $stock_rs->update({ type_id => $cross_cvterm_id});
131 #link cross to female and male parents
132 my $stock_female_rs = $schema->resultset("Stock::StockRelationship")->search({ object_id => $cross_stock_rs->stock_id(), type_id => $female_parent_cvterm_id });
133 my $previous_female = $stock_female_rs->count();
134 # print STDERR "FEMALE COUNT =".Dumper($previous_female)."\n";
135 if ($previous_female > 0) {
136 print STDERR "Stock: $stock_uniquename already has female parent in the database.\n";
137 next();
138 } else {
139 $cross_stock_rs->find_or_create_related('stock_relationship_objects', {
140 type_id => $female_parent_cvterm_id,
141 object_id => $cross_stock_rs->stock_id(),
142 subject_id => $female_rs->stock_id(),
143 value => $cross_type,
147 my $stock_male_rs = $schema->resultset("Stock::StockRelationship")->search({ object_id => $cross_stock_rs->stock_id(), type_id => $male_parent_cvterm_id });
148 my $previous_male = $stock_male_rs->count();
149 if ($previous_male > 0) {
150 print STDERR "Stock: $stock_uniquename already has male parent in the database.\n";
151 next();
152 } else {
153 $cross_stock_rs->find_or_create_related('stock_relationship_objects', {
154 type_id => $male_parent_cvterm_id,
155 object_id => $cross_stock_rs->stock_id(),
156 subject_id => $male_rs->stock_id(),
160 #create experiment
161 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create({
162 nd_geolocation_id => $geolocation_rs->value,
163 type_id => $cross_experiment_cvterm->cvterm_id(),
166 #link cross unique id to the experiment
167 $experiment->find_or_create_related('nd_experiment_stocks' , {
168 stock_id => $cross_stock_rs->stock_id(),
169 type_id => $cross_experiment_cvterm->cvterm_id(),
172 #link experiment to the project
173 $experiment->find_or_create_related('nd_experiment_projects', {
174 project_id => $crossing_experiment_id,
179 my $transaction_error;
180 try {
181 $schema->txn_do($coderef);
182 } catch {
183 $transaction_error = $_;
186 if ($transaction_error) {
187 print STDERR "Transaction error storing terms: $transaction_error\n";
188 } else {
189 print STDERR "Script Complete.\n";