3 # basic script to load snp genotypes
5 # usage: load_snps.pl -H hostname D dbname -i infile
7 # In General row headings are the accession name (or synonym) , which needs to be looked up in the stock table, and column headings are marker name, or alias.
9 # copy and edit this file as necessary
10 # common changes include the following:
17 load_snps.pl - a script to load snp genotypes into the SGN database (see sgn.snp table) .
29 The hostname of the server hosting the database.
33 the name of the database
37 (optional) test mode. Rollback after the script terminates. Database should not be affected. Good for test runs.
42 infile with the marker info
46 outfile for catching errors and other messages
50 The tab-delimited snp genotype file must have stocks and markers which already exist in the database.
51 Non-existing stocks or markers will be skipped.
56 Naama Menda <nm249@cornell.edu>
64 use CXGN
::Tools
::File
::Spreadsheet
;
65 use CXGN
::Tools
::Text
;
67 use Bio
::Chado
::Schema
;
70 use CXGN
::Marker
::Tools
;
71 use CXGN
::DB
::Connection
;
72 use CXGN
::DB
::InsertDBH
;
75 use CXGN
::DB
::SQLWrappers
;
80 our ($opt_H, $opt_D, $opt_i, $opt_t, $opt_o);
86 my $dbh = CXGN
::DB
::InsertDBH
->new({
89 dbargs
=> {AutoCommit
=> 0,
92 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , );
93 # { on_connect_do => ['SET search_path TO public'], }, );
95 my $sql=CXGN
::DB
::SQLWrappers
->new($dbh);
99 # make an object to give us the values from the spreadsheet
100 my $ss = CXGN
::Tools
::File
::Spreadsheet
->new($opt_i);
101 my @stocks = $ss->row_labels(); # row labels are the marker names
102 my @markers = $ss->column_labels(); # column labels are the headings for the data columns
104 for my $stock_name (@stocks) {
105 print "stockname = $stock_name\n";
106 my $stock_id = $schema->resultset("Cv::Cvterm")->search( {
107 name
=> 'solcap number' } )->
108 search_related
('stockprops' , { value
=> $stock_name } )->
109 first
->stock_id or die("No stock found for solcap number $stock_name! \n\n");
110 message
( "*************Stock name = $stock_name, id = $stock_id\n" );
111 for my $marker_name (@markers) {
112 print "marker: $marker_name\n";
113 my @marker_ids = CXGN
::Marker
::Tools
::marker_name_to_ids
($dbh,$marker_name);
114 if (@marker_ids>1) { die "Too many IDs found for marker '$marker_name'" }
115 # just get the first ID in the list (if the list is longer than 1, we've already died)
116 my $marker_id = $marker_ids[0];
119 message
("Marker $marker_name does not exist! Skipping!!\n");
122 else { message
( "Marker name : $marker_name, marker_id found: $marker_id\n" ) ; }
124 my $genotype=$ss->value_at($stock_name,$marker_name)
125 or message
("No genotype found for stock $stock_name and marker $marker_name!");
126 print "genotype: $genotype\n";
127 if ($genotype !~ /[a-zA-Z]/ ) {
128 message
("non-snp genotype ($genotype) . Skipping!!");
131 my $snp_genotype =$sql->insert_unless_exists('snp',{marker_id
=>$marker_id, snp_nucleotide
=> $genotype, stock_id
=> $stock_id } );
138 print "Failed; rolling back.\n";
144 print"Rolling back.\n";
148 print"Committing.\n";
156 write_file
( $opt_o, {append
=> 1 }, $message . "\n" ) if $opt_o;