Merge branch 'master' into topic/row_column_designs
[sgn.git] / db / 00057 / FixTrialTypes.pm
bloba6395bedf32dbf49e1d6abcf90902f061274e2f8
1 #!/usr/bin/env perl
3 =head1 NAME
5 FixTrialTypes.pm
7 =head1 SYNOPSIS
9 mx-run FixTrialTypes [options] -H hostname -D dbname -u username [-F]
11 this is a subclass of L<CXGN::Metadata::Dbpatch>
12 see the perldoc of parent class for more details.
14 =head1 DESCRIPTION
16 This patch updates the list of possible trial types by removing duplicates, standardizing names, and assigning types to trials where it can be deduced from the trial name.
18 =head1 AUTHOR
20 Bryan Ellerbrock<bje24@cornell.edu>
22 =head1 COPYRIGHT & LICENSE
24 Copyright 2010 Boyce Thompson Institute for Plant Research
26 This program is free software; you can redistribute it and/or modify
27 it under the same terms as Perl itself.
29 =cut
32 package FixTrialTypes;
34 use Moose;
35 use Bio::Chado::Schema;
36 use Try::Tiny;
37 extends 'CXGN::Metadata::Dbpatch';
40 has '+description' => ( default => <<'' );
41 This patch updates the list of possible trial types by removing duplicates, standardizing names, and assigning types to trials where it can be deduced from the trial name.
44 sub patch {
45 my $self=shift;
47 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
49 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
51 print STDOUT "\nExecuting the SQL commands.\n";
53 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
55 my $coderef = sub {
56 #get resultsets and cv_id for project types
57 my $cv_rs = $schema->resultset("Cv::Cv");
58 my $cvterm_rs = $schema->resultset("Cv::Cvterm");
59 my $projectprop_rs = $schema->resultset("Project::Projectprop");
60 my $db_rs = $schema->resultset("General::Db");
61 my $cv_id = $cv_rs->search({name => 'project_type'})->first()->cv_id();
62 my $db_id = $db_rs->search({name => 'local'})->first()->db_id();
63 # my $cv_id = $cv_row->cv_id;
64 print STDERR "Project types cv id = $cv_id \n";
65 print STDERR "Local db id = $db_id \n";
67 #update the list of possible trial types by adding/updating to standardized names and removing duplicates
68 my $SN_id = &update_or_create_type('%seedling%', 'Seedling Nursery', $cvterm_rs, $cv_id, $db_id);
69 my $VR_id = &update_or_create_type('%variety%', 'Variety Release Trial', $cvterm_rs, $cv_id, $db_id);
71 my $CE_id = &update_or_create_type('%Clonal Evaluation%', 'Clonal Evaluation', $cvterm_rs, $cv_id, $db_id);
72 $CE_id = &find_or_update_type ('clonal', 'Clonal Evaluation', $cvterm_rs, $cv_id);
73 &link_to_new_type('clonal', $CE_id, $cv_id, $cvterm_rs, $projectprop_rs);
74 &delete_old_type('clonal', $cvterm_rs, $cv_id);
76 my $PYT_id = &find_or_update_type ('PYT', 'Preliminary Yield Trial', $cvterm_rs, $cv_id);
77 &link_to_new_type('PYT', $PYT_id, $cv_id, $cvterm_rs, $projectprop_rs);
78 &link_to_new_type('Preliminary Yield Trials', $PYT_id, $cv_id, $cvterm_rs, $projectprop_rs);
79 &delete_old_type('PYT', $cvterm_rs, $cv_id);
80 &delete_old_type('Preliminary Yield Trials', $cvterm_rs, $cv_id);
82 my $AYT_id = &update_or_create_type('%Advance%', 'Advanced Yield Trial', $cvterm_rs, $cv_id, $db_id);
83 $AYT_id = &find_or_update_type ('AYT', 'Advanced Yield Trial', $cvterm_rs, $cv_id);
84 &link_to_new_type('AYT', $AYT_id, $cv_id, $cvterm_rs, $projectprop_rs);
85 &link_to_new_type('Advanced Yield Trials', $AYT_id, $cv_id, $cvterm_rs, $projectprop_rs);
86 &delete_old_type('AYT', $cvterm_rs, $cv_id);
87 &delete_old_type('Advanced Yield Trials', $cvterm_rs, $cv_id);
89 my $UYT_id = &find_or_update_type ('UYT', 'Uniform Yield Trial', $cvterm_rs, $cv_id);
90 &link_to_new_type('UYT', $UYT_id, $cv_id, $cvterm_rs, $projectprop_rs);
91 &link_to_new_type('Uniform Yield Trials', $UYT_id, $cv_id, $cvterm_rs, $projectprop_rs);
92 &delete_old_type('UYT', $cvterm_rs, $cv_id);
93 &delete_old_type('Uniform Yield Trials', $cvterm_rs, $cv_id);
95 #delete any types not among the standard 6
96 my $all_ids = [$SN_id, $CE_id, $VR_id, $PYT_id, $AYT_id, $UYT_id ];
97 my $obsolete_types = $cvterm_rs->search(
99 cv_id => $cv_id,
100 cvterm_id => { 'not in' => $all_ids}
102 my $num_to_delete = $obsolete_types->count;
103 print STDERR "Deleting $num_to_delete additional obsolete trial types . . .\n";
104 $obsolete_types->delete();
105 print STDERR $schema->resultset("Cv::Cvterm")->search({ cv_id => $cv_id })->count() . " standard trial types remaining.\n";
107 # get all projects
108 my $all_trial_rs = $schema->resultset('Project::Project')->search;
110 #loop through all projects and use regex on trial name. if matches type abbrevation, set type.
111 while (my $trial = $all_trial_rs->next) {
112 my $trial_name = $trial->name;
113 for ($trial_name) {
114 if (/seedling/i) {
115 my $action = &update_or_create_prop($schema, $trial->project_id, $all_ids, $SN_id, 'Seedling Nursery');
116 print STDERR "trial $trial_name matched '/seedling/i', $action\n";
117 } elsif (/clonal/i) {
118 my $action = &update_or_create_prop($schema, $trial->project_id, $all_ids, $CE_id, 'Clonal Evaluation');
119 print STDERR "trial $trial_name matched '/clonal/i', $action\n";
120 } elsif (/pyt/i) {
121 my $action = &update_or_create_prop($schema, $trial->project_id, $all_ids, $PYT_id, 'Preliminary Yield Trial');
122 print STDERR "trial $trial_name matched '/pyt/i', $action\n";
123 } elsif (/ayt/i) {
124 my $action = &update_or_create_prop($schema, $trial->project_id, $all_ids, $AYT_id, 'Advanced Yield Trial');
125 print STDERR "trial $trial_name matched '/ayt/i', $action\n";
126 } elsif (/uyt/i) {
127 my $action = &update_or_create_prop($schema, $trial->project_id, $all_ids, $UYT_id, 'Uniform Yield Trial');
128 print STDERR "trial $trial_name matched '/uyt/i', $action\n";
129 } elsif (/variety/i) {
130 my $action = &update_or_create_prop($schema, $trial->project_id, $all_ids, $VR_id, 'Variety Release Trial');
131 print STDERR "trial $trial_name matched '/variety/i', $action\n";
132 } else {
133 print STDERR "no indication of type found in name of trial $trial_name\n";
138 sub update_or_create_type () {
139 my ($duplicate_type_name, $new_type_name, $cvterm_rs, $cv_id, $db_id) = @_;
140 my ($cvterm_id, $cvterm_name);
141 my $duplicate_rs = $cvterm_rs->search(
143 cv_id => $cv_id,
144 name => { -like => $duplicate_type_name }
146 if ($duplicate_rs->first) {
147 $cvterm_id = $duplicate_rs->first->cvterm_id;
148 $cvterm_name = $duplicate_rs->first->name;
149 print STDERR "Updating cvterm with name $cvterm_name and id $cvterm_id to $new_type_name \n";
150 $duplicate_rs->first->update( { name => $new_type_name }, );
151 } else {
152 print STDERR "Adding cvterm with name $new_type_name \n";
153 # looks like we may need to ge the local db_id and include it in the creation here while joining dbxref table
154 my $dbxref_rs = $schema->resultset("General::Dbxref")->create(
156 cvterm => { cv_id => $cv_id, name => $new_type_name, },
157 db_id => $db_id,
158 accession => $new_type_name,
161 $cvterm_id = $cvterm_rs->search( {name => $new_type_name }, )->first->cvterm_id;
163 print STDERR "$new_type_name has been created with id $cvterm_id \n";
164 return $cvterm_id;
167 sub find_or_update_type () {
168 my ($duplicate_type_name, $type_name, $cvterm_rs, $cv_id) = @_;
169 my $type_rs = $cvterm_rs->search(
171 cv_id => $cv_id,
172 name => $type_name
174 if (!$type_rs->first) {
175 $type_rs = $cvterm_rs->search(
177 cv_id => $cv_id,
178 name => $duplicate_type_name
180 print STDERR "Updating cvterm with name $duplicate_type_name to $type_name \n";
181 $type_rs->first->update( { name => $type_name }, );
182 } else {
183 print STDERR "Found exisiting cvterm with name $type_name and id ". $type_rs->first->cvterm_id ."\n";
185 return $type_rs->first->cvterm_id;
188 sub link_to_new_type () {
189 my ($old_type_name, $new_type_id, $cv_id, $cvterm_rs, $projectprop_rs) = @_;
190 my $duplicate_rs = $cvterm_rs->search(
192 cv_id => $cv_id,
193 name => $old_type_name
195 if ($duplicate_rs->first) {
196 my $duplicate_cvterm_id = $duplicate_rs->first->cvterm_id;
197 print STDERR "updating prop rows linked to type ". $duplicate_rs->first->name ." to standardized type with id $new_type_id \n";
198 my $trials_to_update_rs = $projectprop_rs->search(
200 type_id => $duplicate_cvterm_id
202 if ($trials_to_update_rs->first) {
203 foreach my $row ($trials_to_update_rs) {
204 my $trial_id = $row->first->project_id;
205 print STDERR "Updating trial with id $trial_id from old type $old_type_name to new type $new_type_id \n";
206 $row->update( { type_id => $new_type_id } );
212 sub delete_old_type () {
213 my ($old_type_name, $cvterm_rs, $cv_id) = @_;
214 my $old_type_row = $cvterm_rs->search(
216 cv_id => $cv_id,
217 name => $old_type_name
219 if ($old_type_row->first) {
220 print STDERR "Deleting obsolete type $old_type_name\n";
221 $old_type_row->delete;
225 sub update_or_create_prop () {
226 my ($schema, $project_id, $all_ids, $type_id, $value) = @_;
227 my $exisiting_type = $schema->resultset('Project::Projectprop')->search(
229 project_id => $project_id,
230 type_id => { -in => $all_ids}
232 if ($exisiting_type->first) {
233 if ($exisiting_type->first->type_id eq $type_id) {
234 return "and type is already set to $value";
235 } else {
236 my $old_type = $schema->resultset("Cv::Cvterm")->search({ cvterm_id => $exisiting_type->first->type_id() })->first()->name();
237 $exisiting_type->first->update( { type_id => $type_id, value => $value }, );
238 return "type updated from $old_type to $value";
240 } else {
241 my $new_type = $schema->resultset('Project::Projectprop')->create(
243 project_id => $project_id,
244 type_id => $type_id,
245 value => $value
247 return "type set to $value";
252 try {
253 $schema->txn_do($coderef);
254 } catch {
255 die "FixTrialTypes patch failed! " . $_ . "\n" ;
258 print "You're done!\n";
262 ####
263 1; #
264 ####