Merge pull request #2802 from solgenomics/topic/cross_progenies
[sgn.git] / bin / load_snps.pl
blob5738c2fc9efa5f4e6f99e631f8e9a13cc677f4a0
1 #!/usr/bin/perl
3 # basic script to load snp genotypes
5 # usage: load_snps.pl -H hostname D dbname -i infile
7 # In General row headings are the accession name (or synonym) , which needs to be looked up in the stock table, and column headings are marker name, or alias.
9 # copy and edit this file as necessary
10 # common changes include the following:
13 =head1
15 NAME
17 load_snps.pl - a script to load snp genotypes into the SGN database (see sgn.snp table) .
19 =head1 DESCRIPTION
21 usage: load_snps.pl
23 Options:
25 =over 5
27 =item -H
29 The hostname of the server hosting the database.
31 =item -D
33 the name of the database
35 =item -t
37 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
40 =item -i
42 infile with the marker info
44 =item -o
46 outfile for catching errors and other messages
48 =back
50 The tab-delimited snp genotype file must have stocks and markers which already exist in the database.
51 Non-existing stocks or markers will be skipped.
54 =head1 AUTHORS
56 Naama Menda <nm249@cornell.edu>
59 =cut
61 use strict;
62 use warnings;
64 use CXGN::Tools::File::Spreadsheet;
65 use CXGN::Tools::Text;
66 use File::Slurp;
67 use Bio::Chado::Schema;
69 use CXGN::Marker;
70 use CXGN::Marker::Tools;
71 use CXGN::DB::Connection;
72 use CXGN::DB::InsertDBH;
74 use Data::Dumper;
75 use CXGN::DB::SQLWrappers;
77 use Getopt::Std;
80 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_o);
82 getopts('H:D:ti:o:');
86 my $dbh = CXGN::DB::InsertDBH->new({
87 dbname => $opt_D,
88 dbhost => $opt_H,
89 dbargs => {AutoCommit => 0,
90 RaiseError => 1}
91 });
92 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , );
93 # { on_connect_do => ['SET search_path TO public'], }, );
95 my $sql=CXGN::DB::SQLWrappers->new($dbh);
97 eval {
99 # make an object to give us the values from the spreadsheet
100 my $ss = CXGN::Tools::File::Spreadsheet->new($opt_i);
101 my @stocks = $ss->row_labels(); # row labels are the marker names
102 my @markers = $ss->column_labels(); # column labels are the headings for the data columns
104 for my $stock_name (@stocks) {
105 print "stockname = $stock_name\n";
106 my $stock_id = $schema->resultset("Cv::Cvterm")->search( {
107 name => 'solcap number' } )->
108 search_related('stockprops' , { value => $stock_name } )->
109 first->stock_id or die("No stock found for solcap number $stock_name! \n\n");
110 message( "*************Stock name = $stock_name, id = $stock_id\n" );
111 for my $marker_name (@markers) {
112 print "marker: $marker_name\n";
113 my @marker_ids = CXGN::Marker::Tools::marker_name_to_ids($dbh,$marker_name);
114 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
115 # just get the first ID in the list (if the list is longer than 1, we've already died)
116 my $marker_id = $marker_ids[0];
118 if(!$marker_id) {
119 message("Marker $marker_name does not exist! Skipping!!\n");
120 next;
122 else { message( "Marker name : $marker_name, marker_id found: $marker_id\n" ) ; }
124 my $genotype=$ss->value_at($stock_name,$marker_name)
125 or message("No genotype found for stock $stock_name and marker $marker_name!");
126 print "genotype: $genotype\n";
127 if ($genotype !~ /[a-zA-Z]/ ) {
128 message("non-snp genotype ($genotype) . Skipping!!");
129 next;
131 my $snp_genotype =$sql->insert_unless_exists('snp',{marker_id=>$marker_id, snp_nucleotide => $genotype, stock_id=> $stock_id } );
136 if ($@) {
137 print $@;
138 print "Failed; rolling back.\n";
139 $dbh->rollback();
141 else {
142 print"Succeeded.\n";
143 if ($opt_t) {
144 print"Rolling back.\n";
145 $dbh->rollback();
147 else {
148 print"Committing.\n";
149 $dbh->commit();
153 sub message {
154 my $message = shift;
155 print $message;
156 write_file( $opt_o, {append => 1 }, $message . "\n" ) if $opt_o;