make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / load_sequence_metadata_protocol.pl
blob0d3f556a525b051bb7a3d91f5c81769edadcf349
1 #! /usr/bin/perl
3 =head1
4 load_sequence_metadata_protocol.pl - create a new sequence metadata protocol
6 =head1 SYNOPSIS
7 This script uses the CXGN::Genotype::SequenceMetadata package to create a new
8 Sequence Metadata Protocol in the nd_protocol table and store its sequence
9 metadata protocol props in the nd_protocolprop table.
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -U database username (required)
16 -P database password (optional, default=prompt user for password)
17 -t Sequence Metadata Type ID, cvterm_id of term from 'sequence_metadata_types' CV (required)
18 -n protocol name (required)
19 -d protocol description (required)
20 -r reference genome name (required)
21 -s score description (optional)
22 -a attribute names and descriptions (optional)
23 Example: "ID=marker name,Locus=gene name,pvalue=p value"
24 -l external link definitions (optional)
25 Example: "JBrowse=https://wheat.pw.usda.gov/jb/?data=/ggds/whe-iwgsc2018&loc={{Locus}},Knetminer=https://knetminer.rothamsted.ac.uk/wheatknet/genepage?keyword={{Trait}}&list={{Locus}}"
27 =head1 AUTHOR
28 David Waring <djw64@cornell.edu>
30 =cut
32 use strict;
34 use Getopt::Std;
35 use Data::Dumper;
37 use Bio::Chado::Schema;
38 use CXGN::Genotype::SequenceMetadata;
41 # Read CLI Options
42 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_t, $opt_n, $opt_d, $opt_r, $opt_s, $opt_a, $opt_l);
43 getopts('H:D:U:P:t:n:d:r:s:a:l:');
46 # Check for required arguments
47 if ( !$opt_H || !$opt_U || !$opt_D ) {
48 die "ERROR: Database options -H, -D, and -U are required!\n";
50 if ( !$opt_t ) {
51 die "ERROR: Sequence Metadata Type is required!\n";
53 if ( !$opt_n ) {
54 die "ERROR: Sequence Metadata Protocol name is required!\n";
56 if ( !$opt_d ) {
57 die "ERROR: Sequence Metadata Protocol description is required!\n";
59 if ( !$opt_r ) {
60 die "ERROR: Reference genome name is required!\n";
64 # Connect to DB
65 my $pass = $opt_P;
66 if ( !$opt_P ) {
67 print "Password for $opt_H / $opt_D: \n";
68 my $pw = <>;
69 chomp($pw);
70 $pass = $pw;
72 print STDERR "Connecting to database...\n";
73 my $dsn = 'dbi:Pg:database='.$opt_D.";host=".$opt_H.";port=5432";
74 my $schema = Bio::Chado::Schema->connect($dsn, $opt_U, $pass);
77 # Parse attributes
78 my %attributes = ();
79 if ( defined $opt_a && $opt_a ne '' ) {
80 my @as = split(',', $opt_a);
81 foreach my $a (@as) {
82 my @vs = split('=', $a);
83 my $n = $vs[0];
84 my $d = $vs[1];
85 $attributes{$n} = $d;
89 # Parse links
90 my %links = ();
91 if ( defined $opt_l && $opt_l ne '' ) {
92 my @ls = split(',', $opt_l);
93 foreach my $l (@ls) {
94 my @vs = split('=', $l);
95 my $t = $vs[0];
96 my $u = $vs[1];
97 $links{$t} = $u;
101 # Create Protocol
102 my $smd = CXGN::Genotype::SequenceMetadata->new(bcs_schema => $schema, type_id => $opt_t);
103 my %args = (
104 protocol_name => $opt_n,
105 protocol_description => $opt_d,
106 reference_genome => $opt_r,
107 score_description => $opt_s,
108 attributes => \%attributes,
109 links => \%links
111 my $results = $smd->create_protocol(\%args);
113 print STDERR "Results:\n";
114 print STDERR Dumper $results;