make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / load_marker_dbxref.pl
blobbb69bd2f43b76f388b9a8b8c22176e9583561eb1
2 =head1 NAME
3 load_dbxref.pl - a script to link marker_names with cvterm for external database link, this script uses the nd_protocolprop table
4 with a typdef_id
6 =head1 SYNOPSYS
8 load_locus_publications.pl -p [person_id] -H [hostname] -D [database name] -c dbxref -j protocol_id file
10 where file contains a column with marker names
11 if marker_name does not exist then do not insert
12 nd_protocolprop format { dbxref: dbxref_name, marker: {array of marker_names} }
13 To use an existing protocol (not create a new nd_protocol name entry), use -j protocol_id
15 =head1 COMMAND-LINE OPTIONS
16 ARGUMENTS
18 -H host name (required) e.g. "localhost"
19 -D database name (required) e.g. "cxgn_cassava"
20 -j protocol_id (Will associate genotype data to an existing nd_protocol_id)
22 =head1 AUTHOR
24 Clay Birkett <clb343@cornell.edu>
26 =cut
28 use strict;
29 use warnings;
31 use Getopt::Std;
32 use File::Slurp qw | slurp |;
33 use CXGN::DB::InsertDBH;
35 our %opts;
36 getopts('p:H:D:j:', \%opts);
38 my $file = shift;
40 my @lines = slurp($file);
41 chomp(@lines);
43 my $dbh = CXGN::DB::InsertDBH->new( { dbname => $opts{D},
44 dbhost => $opts{H},
45 });
48 my $sth;
49 my @row;
50 my $alias;
51 my $count;
52 my $count_add;
53 my $marker_id;
54 my %alias_list;
55 my %marker_list;
56 my %unique_list;
58 my $protocol_id = $opt_j;
60 #if protocol_id provided, a new one will not be created
61 if ($protocol_id){
62 my $protocol = CXGN::Genotype::Protocol->new({
63 bcs_schema => $schema,
64 nd_protocol_id => $protocol_id
65 });
66 $organism_species = $protocol->species_name;
67 $obs_type = $protocol->sample_observation_unit_type_name if !$obs_type;
70 ##get list of current alias entries
71 ##use (marker_id, alias) as unique key so we don't duplicate entries
72 my $key;
73 $count = 0;
74 $sth = $dbh->prepare("SELECT alias, marker_id, preferred from marker_alias");
75 $sth->execute();
76 while (@row = $sth->fetchrow_array()) {
77 $count++;
78 $alias = $row[0];
79 if ($row[2]) {
80 $marker_list{$alias} = $row[1];
81 } else {
82 $alias_list{$alias} = $row[1];
84 $key = $row[0] . $row[1];
85 $unique_list{$key} = 1;
87 print "$count from marker_alias\n";
89 $count = 0;
90 $sth = $dbh->prepare("INSERT into marker_alias (marker_id, alias, preferred) values (?, ?, ?)");
91 foreach my $l (@lines) {
92 $count++;
93 push (@marker_list, $alias);
96 my ($marker_name, $alias) = split /\t/, $l;
97 my @alias_list = split /\|/, $alias;
98 if (exists($marker_list{$marker_name})) {
99 $marker_id = $marker_list{$marker_name};
100 foreach (@alias_list) {
101 $key = $_ . $marker_id;
102 if (exists($unique_list{$key})) {
103 } else {
104 $count_add++;
105 $sth->execute($marker_id, $_, 0);
106 $alias_list{$_} = $marker_id;
109 } else {
110 $sth2 = $dbh->prepare("INSERT into marker (dummy_field) values (null) RETURNING marker_id");
111 $sth2->execute();
112 ($marker_id) = $sth2->fetchrow_array();
113 $sth2 = $dbh->prepare("INSERT into marker_alias (marker_id, alias, preferred) values (?, ?, ?)");
114 $sth2->execute($marker_id, $marker_name, 1);
115 print "added marker $marker_name $marker_id\n";
116 foreach (@alias_list) {
117 $key = $_ . $marker_id;
118 if (exists($unique_list{$key})) {
119 } else {
120 $count_add++;
121 $sth->execute($marker_id, $_, 0);
122 $alias_list{$_} = $marker_id;
124 print "added alias $_\n";
126 last;
129 print "$count total $count_add added\n";
131 $dbh->commit();