Merge pull request #5163 from solgenomics/audit-error-checking
[sgn.git] / db / 00004 / AddNewMapParents.pm
blob29a4d52c7669a345ad273f1cb497ade6878d53a5
1 #!/usr/bin/env perl
4 =head1 NAME
6 AddNewMapParents.pm
8 =head1 SYNOPSIS
10 mx-run ThisPackageName [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
17 This is a patch for updating parents for the new Nicotiana maps (Wu et al 2010) and the potato map (from Dan Bolser)
19 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
21 =head1 AUTHOR
23 Naama Menda<nm249@cornell.edu>
25 =head1 COPYRIGHT & LICENSE
27 Copyright 2010 Boyce Thompson Institute for Plant Research
29 This program is free software; you can redistribute it and/or modify
30 it under the same terms as Perl itself.
32 =cut
35 package AddNewMapParents;
37 use Moose;
38 extends 'CXGN::Metadata::Dbpatch';
40 use Bio::Chado::Schema;
41 use CXGN::Accession;
43 sub init_patch {
44 my $self = shift;
45 my $name = __PACKAGE__;
46 print "dbpatch name is ':" . $name . "\n\n";
47 my $description = 'loading map parents for tobacco and potato maps';
48 my @previous_requested_patches ; #ADD HERE
50 $self->name($name);
51 $self->description($description);
52 $self->prereq(\@previous_requested_patches);
56 sub patch {
57 my $self=shift;
58 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
60 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
62 print STDOUT "\nExecuting the SQL commands.\n";
63 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
65 #-- insert Nicotiana acuminata#
67 #insert into sgn.accession (chado_organism_id, common_name) values (22547, 'Nicotiana acuminata');
69 #insert into sgn.accession (chado_organism_id, common_name) values (1483, 'Nicotiana tomentosiformis');
71 #insert into sgn.accession (chado_organism_id, common_name) values (13311, 'Solanum phureja');
73 my @maps = (
75 name => "Tobacco N. tomentosiformis",
76 abstract => "population of 55 interspecific F2 plants from the cross N. tomentosiformis TA3385 × N. otophora TA3353",
77 parent1 =>'TA3385',
78 parent2 => 'TA3353',
79 species1 => 'Nicotiana tomentosiformis',
80 species2 => 'Nicotiana otophora',
81 long_name => "N. tomentosiformis TA3385 × N. otophora TA3353",
85 name => "Tobacco N. acuminata",
86 abstract => "mapping population of 51 intraspecific F2 plants from N. acuminata TA3460 × N. acuminata var. multiflora TA3461",
87 parent1 =>'TA3460',
88 parent2 => 'TA3461',
89 species1 => 'Nicotiana acuminata',
90 species2 => 'Nicotiana acuminata var. multiflora',
91 long_name => "N. acuminata TA3460 × N. acuminata var. multiflora TA3461",
95 name => "Solanum phureja diploid map 2010",
96 abstract => "Potato, the world's most important vegetable crop and a key member of the Solanaceae, is being sequenced by the multi-national Potato Genome Sequencing Consortium (PGSC, see www.potatogenome.net). Using a whole
97 genome shotgun approach the PGSC has generated a high quality draft
98 sequence of a completely homozygous ‘doubled monoploid’ clone (DM1-3
99 516R44 or CIP 801092) of S. tuberosum group Phureja complementing
100 their earlier efforts using the heterozygous RH89-039-16 clone. In
101 order to augment the genetic and physical anchoring of the sequenced
102 DM genome, a segregating backcross population between the DM clone and
103 a heterozygous diploid S. goniocalyx clone (CIP No. 703825) as the
104 recurrent parent was established. The polymorphism across 169 progeny
105 clones was assessed using a total of 4836 STS markers including 2174
106 DArT (Diversity Arrays Technology), 378 SSR (simple sequence repeat)
107 alleles and 2304 SNP (single nucleotide polymorphism) marker types.
108 SSR and SNP markers were designed directly to scaffolds, whereas
109 polymorphic DArT marker sequences were searched against the scaffolds
110 for high quality unique matches. The data from 2619 polymorphic STS
111 markers was analysed using JoinMap 4 and a DM genetic map containing
112 the expected 12 potato linkage groups was developed de novo. The
113 mapped STS markers, because of their known unique position and/or
114 sequence on the genome, were directly anchored to the DM
115 super-scaffolds. This in turn assisted in physical anchoring of DM
116 super-scaffolds on to the DM/DI//DI linkage map. In addition to this,
117 in silico approaches involving the RH genetic and physical map, as
118 well as tomato map data from SGN (http://solgenomics.net/) were also
119 exploited to further enhance the anchoring of DM genome. Overall, we
120 are able to genetically anchor 623 Mb (85.7%) of the assembled 727 Mb
121 genome arranged in 651 super-scaffolds to an approximate location onto
122 one of the twelve potato linkage groups. In the post potato sequencing
123 era, this integrated sequence and genetic reference map will form an
124 important resource for linking to all future genetic mapping efforts
125 by the potato community.",
126 parent1 =>'CIP 801092',
127 parent2 => 'CIP 703825',
128 species1 => 'Solanum phureja',
129 species2 => 'Solanum goniocalyx',
130 long_name => "DM1-3 516R44 clone (CIP 801092) backcrossed to a heterozygous diploid S. goniocalyx clone (CIP No. 703825)",
134 $schema->resultset("Organism::Organism")->find_or_create( {
135 genus => 'Solanum',
136 species => 'Solanum goniocalyx',
137 }, );
138 $schema->resultset("Organism::Organism")->find_or_create( {
139 genus => 'Nicotiana',
140 species => 'Nicotiana acuminata var. multiflora',
141 }, );
142 my $result = $schema->txn_do( sub {
143 for my $map ( @maps ) {
145 my ( $map_id ) = $self->dbh->selectrow_array( <<'', undef, $map->{name} );
146 SELECT map_id
147 FROM sgn.map
148 WHERE short_name = ?
150 $map_id or die "Map '$map->{name}' not found in database. Aborting.\n";
152 my $accession_id1 = $self->_find_accession( $schema, $map->{name}, $map->{parent1}, $map->{species1} );
153 my $accession_id2 = $self->_find_accession( $schema, $map->{name}, $map->{parent2}, $map->{species2} );
155 print <<"";
156 Updating map '$map->{name}'
157 id = $map_id
158 abstract = $map->{abstract}
159 parent1 = $accession_id1
160 parent2 = $accession_id2
162 $self->dbh->do( <<'', undef, $map->{long_name}, $map->{abstract}, $accession_id1, $accession_id2, ,'genetic' , $map_id );
163 UPDATE sgn.map
164 SET long_name= ?, abstract = ? , parent_1 = ? , parent_2 = ?, map_type = ?
165 WHERE map_id = ?
169 if ( $self->trial ) {
170 print "Trial mode! Rolling back transaction.\n\n";
171 $schema->txn_rollback;
172 return 0;
173 } else {
174 print "Committing.\n";
175 return 1;
179 print $result ? "Patch applied successfully.\n" : "Patch not applied.\n";
182 sub _find_accession {
183 my $self = shift;
184 my $schema = shift;
185 my $name = shift;
186 my $parent = shift;
187 my $species = shift;
189 my $organism = $schema->resultset("Organism::Organism")->find( { species=> $species }, );
190 die 'Organism $species not found in the database! Aborting\n' if !$organism;
191 my $sgn_q = "SELECT organism_id FROM sgn.organism WHERE chado_organism_id =? ";
192 my $o_sth = $self->dbh->prepare($sgn_q);
193 $o_sth->execute($organism->organism_id);
194 my ($sgn_organism_id) = $o_sth->fetchrow_array() || 'NULL';
196 my $accession_cvterm = $schema->resultset("Cv::Cvterm")->create_with( {
197 name => 'accession',
198 cv => 'stock type', }
200 $self->dbh->do('set search_path to sgn, public');
201 my $accession = CXGN::Accession->new($self->dbh , $parent);
202 my $accession_id;
203 if ( !$accession) {
204 my ($stock) = $schema->resultset("Stock::Stock")->find_or_create(
206 name => $parent,
207 uniquename => $parent,
208 type_id => $accession_cvterm->cvterm_id,
209 organism_id => $organism->organism_id,
210 }, );
211 print "inserting into accession_names value $parent\n";
212 $self->dbh->do("insert into sgn.accession_names (accession_name) values ('".$parent."') " );
213 my $a_name_id = $self->dbh->last_insert_id('Pg' , 'sgn','accession_names', 'accession_name_id');
214 print "inserting into accession value $a_name_id, stock_id = " . $stock->stock_id . "\n";
215 $self->dbh->do("insert into sgn.accession (accession_name_id, stock_id, organism_id, chado_organism_id) values ('".$a_name_id . "', " . $stock->stock_id . ", $sgn_organism_id , " . $organism->organism_id . ")");
216 $accession_id = $self->dbh->last_insert_id('Pg' , 'sgn','accession', 'accession_id');
217 print "Updating accession_names , setting accession_id = ". $accession_id . "\n";
218 $self->dbh->do("UPDATE sgn.accession_names SET accession_id = ". $accession_id . " WHERE accession_name_id = $a_name_id");
220 $accession = CXGN::Accession->new($self->dbh , $parent);
221 } else { $accession_id = $accession->accession_id ; }
223 return $accession_id;
229 ####
230 1; #
231 ####