5 download_genotypes.pl - downloads a genotyping file (vcf or dosage) using a file with a list of accession names and a genotyping protocol id.
9 perl bin/download_genotypes.pl -h [dbhost] -d [dbname] -i [infile] -o [outfile] -p [genotyping_protocol]
11 =head2 REQUIRED ARGUMENTS
13 -h host name e.g. "localhost"
14 -d database name e.g. "cxgn_cassava"
15 -p genotyping protocol name
17 -o path to output file
18 -f format [default vcf]
20 =head2 OPTIONAL ARGUMENTS
23 -t cluster shared temp dir
36 use Bio
::Chado
::Schema
;
40 use SGN
::Model
::Cvterm
;
41 use CXGN
::DB
::InsertDBH
;
42 use CXGN
::Dataset
::File
;
45 our ($opt_h, $opt_d, $opt_p, $opt_i, $opt_o, $opt_q, $opt_t, $opt_c, $opt_b, $opt_f);
47 getopts
("h:d:p:i:o:q:t:c:b:f:");
52 my $out_file = $opt_o;
53 my $protocol_name = $opt_p;
54 my $web_cluster_queue = $opt_q || '';
55 my $cluster_shared_tempdir = $opt_t || '/tmp';
56 my $cluster_host = $opt_c || 'localhost';
57 my $format = $opt_f || "vcf";
58 my $basepath = $opt_b || '/home/production/cxgn/sgn';
60 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
62 dbargs
=> {AutoCommit
=> 1,
68 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() });
70 my $q = "SELECT nd_protocol_id, name FROM nd_protocol WHERE name = ?";
72 my $h = $dbh->prepare($q);
73 $h->execute($protocol_name);
78 while (my ($pr_id, $pr_name) = $h->fetchrow_array()) {
79 print STDERR
"\nFound genotyping protocol: $pr_name -- id: $pr_id\n";
81 $protocol_id = $pr_id;
84 if (!$protocol_exists) {
85 die "\n\nGENOTYPING PROTOCOL $protocol_name does not exist in the database\n\n";
91 open(my $F, "< :encoding(UTF-8)", $in_file) || die "Can't open file $in_file\n";
95 push @accession_names, $_;
100 my $s = Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
101 my $p = CXGN
::People
::Schema
->connect( sub { $dbh->get_actual_dbh() });
105 foreach my $a (@accession_names) {
106 my $row = $s->resultset('Stock::Stock')->find( { uniquename
=> $a });
108 print STDERR
"Accession $a does not exist! Skipping!\n";
111 push @accession_ids, $row->stock_id();
115 my $ds = CXGN
::Dataset
::File
->new( { people_schema
=> $p, schema
=> $s } );
118 $ds->accessions(\
@accession_ids);
121 $ds->genotyping_protocols([ $protocol_id ]);
123 if ($format eq "vcf") {
124 my $fh = $ds->retrieve_genotypes_vcf($protocol_id, $out_file, '/tmp',
125 $cluster_shared_tempdir, 'Slurm',
126 $cluster_host, $web_cluster_queue,
129 elsif ($format eq "dosage") {
130 my $fh = $ds->retrieve_genotypes($protocol_id, $out_file, '/tmp',
131 $cluster_shared_tempdir, 'Slurm',
132 $cluster_host, $web_cluster_queue,
136 print STDERR
"Unknown format $format.\n";
139 print STDERR
"Done.\n";