make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / load_marker_locus_associations.pl
blob56fa7283174be4aab14956734b7368f71f844bcc
2 =head1 NAME
4 load_marker_locus_associations.pl - load associations between markers and loci.
6 =head1 DESCRIPTION
8 takes a file with two columns: locus names and marker names. Connects the two in the phenome.locus_marker table.
10 =head1 AUTHOR
12 Lukas Mueller <lam87@cornell.edu>
14 =cut
16 use strict;
17 use warnings;
19 use Getopt::Std;
20 use File::Slurp;
21 use CXGN::DB::InsertDBH;
22 use CXGN::Marker;
23 use CXGN::Phenome::Locus;
25 our %args;
27 getopts('H:D:f:', \%args);
29 foreach (values %args) {
30 print STDERR "param: $_\n";
33 my $dbh = CXGN::DB::InsertDBH->new( { dbname=> $args{D},
34 dbhost=> $args{H}
35 });
37 print STDERR "Reading file $args{f}\n";
38 my @lmas = read_file($args{f});
40 foreach my $line (@lmas) {
41 chomp($line);
42 my ($locus_symbol, $marker_name) = split /\t/, $line;
44 my $m = CXGN::Marker->new_with_name($dbh, $marker_name);
45 if (!$m) { print STDERR "Marker not found: $marker_name. Skipping\n"; next; }
46 my $marker_id = $m->marker_id();
48 my $h = $dbh -> prepare("SELECT locus_id FROM phenome.locus JOIN phenome.locus_alias using(locus_id) WHERE locus_name ilike ? OR locus_alias.alias ilike ? OR locus_symbol ilike ?");
50 $h->execute($locus_symbol, $locus_symbol, $locus_symbol);
52 my @ids = ();
53 while (my ($locus_id) = $h->fetchrow_array()) {
54 push @ids, $locus_id;
56 if (@ids > 1) {
57 warn "There are more than 1 loci associated with SNP $marker_name\n";
60 if (@ids == 0) {
61 warn "Locus $locus_symbol not found.\n";
63 foreach my $locus_id (@ids) {
64 my $l = CXGN::Phenome::Locus->new($dbh, $locus_id);
66 my $q = $dbh->prepare("INSERT INTO phenome.locus_marker (locus_id, marker_id) VALUES (?, ?)");
67 $q -> execute($locus_id, $marker_id);
68 print STDERR "Associated locus $locus_symbol ($locus_id) with marker $marker_name ($marker_id)\n";
72 $dbh->commit();