download fuzzy matches
[sgn.git] / bin / load_trait_formula.pl
blob5e2e51f895b77ab4495996f57ffbb578a3c9b3bf
1 #!/usr/bin/perl
3 =head1 NAME
5 load_trait_formula.pl - loads formulas for computing derived traits
7 =head1 DESCRIPTION
9 load_trait_formula.pl -H [database host] -D [database name] load_trait_formula_file.txt
11 Options:
13 -H the database host
14 -D the database name
16 load_trait_formula_file.txt: A file with two columns: trait name, trait formula.
18 If the trait name is found in the database, formula for computing the trait will be added as a cvtermprops.
20 =head1 AUTHOR
22 Alex Ogbonna <aco46@cornell.edu>
24 =cut
27 use strict;
28 use warnings;
29 use Bio::Chado::Schema;
30 use Getopt::Std;
31 use CXGN::DB::InsertDBH;
32 use SGN::Model::Cvterm;
34 our ($opt_H, $opt_D);
35 getopts("H:D:");
36 my $dbhost = $opt_H;
37 my $dbname = $opt_D;
38 my $file = shift;
39 my @traits;
40 my @formulas;
41 my @array_ref;
43 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>"$dbhost",
44 dbname=>"$dbname",
45 dbargs => {AutoCommit => 1,
46 RaiseError => 1,
49 } );
52 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() });
54 my $formula_cvterm = $schema->resultset("Cv::Cvterm")->create_with({
55 name => "formula",
56 cv => "cvterm_property",
57 });
59 my $type_id = $formula_cvterm->cvterm_id();
61 open (my $file_fh, "<", $file ) || die ("\nERROR: the file $file could not be found\n" );
63 my $header = <$file_fh>;
64 while (my $line = <$file_fh>) {
65 chomp $line;
67 my ($my_trait,$my_formula) = split("\t", $line);
68 push @traits, $my_trait;
69 push @formulas, $my_formula;
72 for (my $n=0; $n<scalar(@traits); $n++) {
73 print STDERR $traits[$n]."\n";
74 my $trait_cvterm = SGN::Model::Cvterm->get_cvterm_row_from_trait_name($schema, $traits[$n]);
75 if (!$trait_cvterm) {
76 print STDERR "The trait $traits[$n] is not in the database. Skipping...\n";
77 next();
80 my $cvterm_id = $trait_cvterm->cvterm_id();
81 my $new_prop= $trait_cvterm->create_cvtermprops({formula=>$formulas[$n]} , {} );