make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / download_obo.pl
blobe5b354c337b0110b70eda4f043a1b631547f7a3e
2 =head1 NAME
4 download_obo.pl - script to download obo file of an ontology
6 =head1 SYNOPSIS
8 perl download_obo.pl -i prefix -H host -D dbname
10 =head1 DESCRIPTION
12 Download an obo file from your database. The file will contain all terms listed under the provided ontology prefix (db.name).
13 The obo filename is $prefix_breedbase.obo.
14 After downloading the file it is recommended to test it in the Protege program to make sure it is a valid obo,
15 and loading back into the database with gmod_load_cvterms.pl <see the solgenomics/chado_tools repo>
18 This script currently prints the following fields:
20 [Term]
21 id: $prefix:$accession
22 name: $cvterm_name
23 namespace: $namespace
24 def: "$def" #if the cvterm 'definition' field is populated
25 is_obsolete: true #if cvterm is_obsolete field is 'true'
26 synonym: $synonym_name [] # one row for each synonym
27 xref: $xref_cvterm [] # one row for each cvterm xref that is not for the cvterm is_for_definition
28 is_a: $cvterm # one row for each is_a relationship object
29 relationship: $typedef $cvterm # one row for each relationship type that is not is_a (e.g. variable_of, method_of, scale_of)
31 --------------------------------------------
33 Example:
35 [Term]
36 id: CO_334:0000009
37 name: initial vigor assessment 1-7
38 namespace: cassava_trait
39 def: "Visual assessment of plant vigor during establishment scored one month after planting. 3 = Not vigorous, 5 = Medium vigor, 7 = highly vigorous." [CO:curators]
40 synonym: "Can't fall when there is strong wind" EXACT []
41 synonym: "IVig_IITAVisScg_1to7" EXACT []
42 synonym: "vigor" EXACT []
43 xref: TO:0000250
44 is_a: CO_334:0001000 ! Variables
45 is_a: CO_334:0002010 ! Farmer trait
46 relationship: variable_of CO_334:0000386 ! Initial vigor
47 relationship: variable_of CO_334:0010228 ! Visual Rating: Initial vigor_method
48 relationship: variable_of CO_334:0100434 ! 7pt scale
50 ------------------------------------------
52 If there are any cvterm.is_relationshiptype for this ontology they will be printed as
53 [Typedef]
54 id: $cvterm_name
55 name: $cvterm_name
58 =head1 COMMAND-LINE OPTIONS
60 -H host name
61 -D database name
62 -i prefix for the ontology (e.g. CO_334)
63 -t Test run. Rolls back at the end.
65 =head1 AUTHOR
67 Naama Menda <nm249@cornell.edu>
69 =cut
71 use strict;
73 use Getopt::Std;
74 use Data::Dumper;
75 use Try::Tiny;
76 use DateTime;
78 use Bio::Chado::Schema;
79 use CXGN::DB::InsertDBH;
80 use File::Slurp;
82 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_i);
84 getopts('H:D:U:P:i:');
86 my $dbhost = $opt_H;
87 my $dbname = $opt_D;
88 my $dbuser = $opt_U;
89 my $dbpass = $opt_P;
90 my $prefix = $opt_i;
92 dbname=>$dbname,
93 dbargs => {AutoCommit => 0,
94 RaiseError => 1}
95 } );
97 print STDERR "Connecting to database...\n";
98 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
100 my $obo_file = $prefix . ".breedbase.obo";
102 #resultset of all cvterms
103 my $cvterm_rs = $schema->resultset("Cv::Cvterm")->search(
104 { 'db.name' => $prefix },
105 { join => [ 'cv', { dbxref => 'db' } ] ,
106 '+select' => ['cv.name', 'dbxref.accession'],
107 '+as' => [ 'cv_name', 'dbxref_accession' ]
108 } );
110 my $date = DateTime->now();
112 my $obo_header = "format-version: 1.2
113 ontology: $prefix\n\n";
115 write_file( $obo_file, {append => 0 }, $obo_header ) ;
116 my $count=0;
117 while(my $cvterm = $cvterm_rs->next() ) {
118 my $accession = $cvterm->dbxref->accession();
119 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
120 print STDERR "Looking at Accession $accession\n";
121 my $cvterm_name = $cvterm->name();
122 my $namespace = $cvterm->cv->name();
123 my $def = $cvterm->definition();
124 #remove quotes from definition sting
125 $def =~ s/"//g;
126 my $is_obsolete = $cvterm->is_obsolete();
127 my $is_relationshiptype = $cvterm->is_relationshiptype();
128 my $is_obsolete = $cvterm->is_obsolete();
130 my $term_details = "\n[Term]\nid: $prefix:$accession\nname: $cvterm_name\nnamespace: $namespace\n";
131 $term_details .="def: \"$def\"\n" if $def;
132 $term_details .="is_obsolete: true\n" if $is_obsolete;
133 $count++;
134 if ($is_relationshiptype) {
135 $term_details = "
136 [Typedef]
137 id: $cvterm_name
138 name: $cvterm_name
141 write_file( $obo_file, {append => 1 }, "$term_details" );
143 my $syn_rs = $cvterm->cvtermsynonyms();
144 my $xref_rs = $cvterm->cvterm_dbxrefs();
145 my $relationships_rs = $cvterm->cvterm_relationship_subjects();
147 while( my $synonym = $syn_rs->next() ) {
148 my $syn_name = $synonym->synonym();
149 print STDERR "synonym = $syn_name\n";
150 my $type = $synonym->type;
151 my $type_name ;
153 defined $type ? $type_name = $type->name : "[]";
155 #synonyms need to be quoted. Somtimes they are already quoted if loaded properly from the obo loader
156 unless ($syn_name =~ /^\"/) { $syn_name = '"' . $syn_name . '"' ; }
157 #xref list for synonyms
158 unless ($syn_name =~ /$\]/) { $syn_name .= " []" ; }
159 write_file( $obo_file, {append => 1 }, "synonym: " . $syn_name . $type_name . "\n" );
161 while( my $xref = $xref_rs->next() ) {
162 my $xref_acc = $xref->dbxref->accession();
163 my $xref_prefix = $xref->dbxref->db->name();
164 print STDERR "xref = $xref_prefix:$xref_acc\n";
165 write_file( $obo_file, {append => 1 }, "xref: " . $xref_prefix . ":" . $xref_acc . "\n" ) if ( $xref->is_for_definition == 0 );
168 while( my $rel = $relationships_rs->next() ) {
169 my $object = $rel->object();
170 my $type = $rel->type();
171 my $object_name = $object->name();
172 my $object_acc = $object->dbxref->accession();
173 my $object_acc_prefix = $object->dbxref->db->name();
175 my $type_name = $type->name();
177 my $relationship_format = "is_a:";
178 if ($type_name ne "is_a") {
179 $relationship_format = "relationship: $type_name";
181 print STDERR "$relationship_format $object_acc_prefix:$object_acc\n";
182 write_file( $obo_file, {append => 1 }, "$relationship_format $object_acc_prefix:$object_acc " . "! ". $object_name . "\n" );
185 print STDERR "wrote $count terms to file $obo_file\n";