a little more sanity checking in tomato genome pubs db patch
[sgn.git] / db / 0001_load_tomato_gen_pubs.pl
blob895de5043b8f53346a84f716c2da4f39449180c8
1 #!/usr/bin/env perl
4 =head1 NAME
6 [ this script name ].pl
8 =head1 SYNOPSIS
10 this_script.pl [options]
12 Options:
14 -D <dbname> (mandatory)
15 dbname to load into
17 -H <dbhost> (mandatory)
18 dbhost to load into
20 -p <script_executor_user> (mandatory)
21 username to run the script
23 -F force to run this script and don't stop it by
24 missing previous db_patches
26 Note: If the first time that you run this script, obviously
27 you have no previous dbversion row in the md_dbversion
28 table, so you need to force the execution of this script
29 using -F
31 =head1 DESCRIPTION
33 store in the database the tomato genome publications as pubprops
35 =head1 AUTHORS
37 Jean Lee <jhl233@cornell.edu>
38 Naama Menda < nm249@cornell.edu>
40 =head1 COPYRIGHT & LICENSE
42 Copyright 2010 Boyce Thompson Institute for Plant Research
44 This program is free software; you can redistribute it and/or modify
45 it under the same terms as Perl itself.
47 =cut
50 use strict;
51 use warnings;
53 use Bio::Chado::Schema;
54 use Pod::Usage;
55 use Getopt::Std;
56 use CXGN::DB::InsertDBH;
57 use CXGN::Metadata::Dbversion; ### Module to interact with the metadata.md_dbversion table
60 ## Declaration of the parameters used to run the script
62 our ($opt_H, $opt_D, $opt_p, $opt_F, $opt_h);
63 getopts("H:D:p:Fh");
65 ## If is used -h <help> or none parameters is detailed print pod
67 if (!$opt_H && !$opt_D && !$opt_p && !$opt_F && !$opt_h) {
68 print STDOUT "No optionas passed. Printing help\n\n";
69 pod2usage(1);
71 elsif ($opt_h) {
72 pod2usage(1);
76 ## Declaration of the name of the script and the description
78 my $patch_name = '0001_load_tomato_gen_pubs.pl';
79 my $patch_descr = 'This script stores pubprop for the tomato genome publications. It assumes these are ALREADY STORED in the database. The best way to load first the publications is by using the web interface ';
81 print STDOUT "\n+--------------------------------------------------------------------------------------------------+\n";
82 print STDOUT "Executing the patch:\n $patch_name.\n\nDescription:\n $patch_descr.\n\nExecuted by:\n $opt_p.";
83 print STDOUT "\n+--------------------------------------------------------------------------------------------------+\n\n";
85 ## And the requeriments if you want not use all
87 my @previous_requested_patches = ( ## ADD HERE
88 );
90 ## Specify the mandatory parameters
92 if (!$opt_H || !$opt_D) {
93 print STDOUT "\nMANDATORY PARAMETER ERROR: -D <db_name> or/and -H <db_host> parameters has not been specified for $patch_name.\n";
96 if (!$opt_p) {
97 print STDOUT "\nMANDATORY PARAMETER ERROR: -p <script_executor_user> parameter has not been specified for $patch_name.\n";
100 ## Create the $schema object for the db_version object
101 ## This should be replace for CXGN::DB::DBICFactory as soon as it can use CXGN::DB::InsertDBH
103 my $db = CXGN::DB::InsertDBH->new(
105 dbname => $opt_D,
106 dbhost => $opt_H
109 my $dbh = $db->get_actual_dbh();
111 print STDOUT "\nCreating the Metadata Schema object.\n";
113 my $metadata_schema = CXGN::Metadata::Schema->connect(
114 sub { $dbh },
115 { on_connect_do => ['SET search_path TO metadata;'] },
118 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
120 ### Now it will check if you have runned this patch or the previous patches
122 my $dbversion = CXGN::Metadata::Dbversion->new($metadata_schema)
123 ->complete_checking( {
124 patch_name => $patch_name,
125 patch_descr => $patch_descr,
126 prepatch_req => \@previous_requested_patches,
127 force => $opt_F
132 ### CREATE AN METADATA OBJECT and a new metadata_id in the database for this data
134 my $metadata = CXGN::Metadata::Metadbdata->new($metadata_schema, $opt_p);
136 ### Get a new metadata_id (if you are using store function you only need to supply $metadbdata object)
138 my $metadata_id = $metadata->store()
139 ->get_metadata_id();
141 ### Now you can insert the data using different options:
143 ## 1- By sql queryes using $dbh->do(<<EOSQL); and detailing in the tag the queries
145 ## 2- Using objects with the store function
147 ## 3- Using DBIx::Class first level objects
150 ## In this case we will use the SQL tag
152 print STDERR "\nExecuting the SQL commands.\n";
155 my $schema = Bio::Chado::Schema->connect(sub { $db->get_actual_dbh() } ,
156 { on_connect_do => ['SET SEARCH_PATH TO public, metadata;'] } );
158 #---arrays of pubmed ids and titles of publications---
159 my @pmid = (
160 18254380,
161 18317508,
162 17565940,
163 16830097,
164 16524981,
165 16489216,
166 16208505,
167 16010005,
168 10645957,
169 10382301,
170 10224272,
171 18469880,
172 8662247,
173 8653264,
174 8647403,
177 my @titles = ('A Snapshot of the Emerging Tomato Genome Sequence', 'Estimation of nuclear DNA content of plants by flow cytometry');
180 #---add pubmed publications to pubprop---
181 my $pmdb = $schema->resultset('General::Db')->find({name => 'PMID'})
182 or die "no db found with name 'PMID'";
184 foreach my $item ( @pmid ) {
185 my $dbxref = $pmdb->find_related(
186 'dbxrefs',
187 {accession => $item}
189 or die "no dbxref found for PMID $item";
191 my $pub = $dbxref->find_related('pub_dbxrefs', {})
192 ->find_related('pub', {})
193 ->create_pubprops(
194 {'tomato genome publication' => '1'},
195 { autocreate => 1 }
199 #---add other publications manually---
200 foreach my $title ( @titles ) {
201 my $pub = $schema->resultset( "Pub::Pub" )->find({ title => $title })
202 or die "no publication found with title '$title'";
204 $pub->create_pubprops(
205 {'tomato genome publication' => '1'},
206 {autocreate => 1}
210 ## Now it will add this new patch information to the md_version table. It did the dbversion object before and
211 ## set the patch_name and the patch_description, so it only need to store it.
214 $dbversion->store($metadata);
216 print STDOUT "DONE!\n";
218 $dbh->commit;
220 __END__