modified key
[sgn.git] / lib / CXGN / Onto.pm
blob1bc85c92513775a6d27888fb8cbc5b778e53b9d9
1 package CXGN::Onto;
3 use Moose;
4 use Moose::Util::TypeConstraints;
5 use Data::Dumper;
6 use JSON::Any;
7 use Try::Tiny;
8 use Bio::Chado::Schema;
9 use SGN::Model::Cvterm;
11 has 'schema' => (
12 isa => 'Bio::Chado::Schema',
13 is => 'rw',
14 required => 1
17 =head2 get_terms
19 parameters: namespace
21 returns: terms in namespace
23 Side Effects: none
25 =cut
27 sub get_terms {
28 my $self = shift;
29 my $cv_id = shift;
31 my $query = "SELECT distinct(cvterm_id), dbxref.accession, (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text AS name
32 FROM cvterm
33 JOIN dbxref USING(dbxref_id)
34 JOIN db USING(db_id)
35 JOIN cvterm_relationship is_subject ON cvterm.cvterm_id = is_subject.subject_id
36 WHERE cv_id = ? AND is_obsolete = ?
37 GROUP BY 1,2,3
38 ORDER BY 3";
40 my $h = $self->schema->storage->dbh->prepare($query);
41 $h->execute($cv_id, '0');
43 my @results;
44 while (my ($id, $accession, $name) = $h->fetchrow_array()) {
45 if ($accession +0 != 0) {
46 push @results, [$id, $name];
50 return @results;
53 sub get_root_nodes {
54 my $self = shift;
55 my $cv_type = shift;
57 my $query = "SELECT cv.cv_id, (((db.name::text || ':'::text) || dbxref.accession::text) || ' '::text) || cvterm.name AS name
58 FROM cv
59 JOIN cvprop ON(cv.cv_id = cvprop.cv_id AND cvprop.type_id IN (SELECT cvterm_id from cvterm where cvterm.name = ?))
60 JOIN cvterm on(cvprop.cv_id = cvterm.cv_id)
61 JOIN dbxref USING(dbxref_id)
62 JOIN db USING(db_id)
63 LEFT JOIN cvterm_relationship ON(cvterm.cvterm_id=cvterm_relationship.subject_id)
64 WHERE cvterm_relationship.subject_id IS NULL AND cvterm.is_obsolete= 0 AND cvterm.is_relationshiptype = 0";
66 my $h = $self->schema->storage->dbh->prepare($query);
67 $h->execute($cv_type);
69 my @results;
70 while (my ($id, $name) = $h->fetchrow_array()) {
71 push @results, [$id, $name];
74 return @results;
78 sub store_composed_term {
79 my $self = shift;
80 my $new_trait_names = shift;
81 #print STDERR Dumper $new_trait_names;
83 my $schema = $self->schema();
84 my $dbh = $schema->storage->dbh;
86 my $contains_relationship = $schema->resultset("Cv::Cvterm")->find({ name => 'contains' });
87 my $variable_relationship = $schema->resultset("Cv::Cvterm")->find({ name => 'VARIABLE_OF' });
89 my @new_terms;
90 foreach my $name (sort keys %$new_trait_names){
91 my $ids = $new_trait_names->{$name};
92 my @component_ids = split ',', $ids;
94 if (scalar(@component_ids)<2){
95 die "Should not save postcomposed term with less than 2 components\n";
98 my $existing_trait_id = SGN::Model::Cvterm->get_trait_from_exact_components($schema, \@component_ids);
99 if ($existing_trait_id) {
100 print STDERR "Skipping: This trait already exists $name with the following component_ids".Dumper(\@component_ids)."\n";
101 next;
104 my $db = $schema->resultset("General::Db")->find_or_create({ name => 'COMP' });
105 my $cv= $schema->resultset('Cv::Cv')->find_or_create( { name => 'composed_trait' });
107 my $accession_query = "SELECT nextval('composed_trait_ids')";
108 my $h = $dbh->prepare($accession_query);
109 $h->execute();
110 my $accession = $h->fetchrow_array();
112 my $new_term_dbxref = $schema->resultset("General::Dbxref")->create(
113 { db_id => $db->get_column('db_id'),
114 accession => sprintf("%07d",$accession)
117 my $parent_term= $schema->resultset("Cv::Cvterm")->find(
118 { cv_id =>$cv->cv_id(),
119 name => 'Composed traits',
122 #print STDERR "Parent cvterm_id = " . $parent_term->cvterm_id();
124 my $new_term = $schema->resultset('Cv::Cvterm')->find({ name=>$name });
125 if ($new_term){
126 print STDERR "Cvterm with name $name already exists... so components must be new\n";
127 } else {
128 $new_term= $schema->resultset("Cv::Cvterm")->create({
129 cv_id =>$cv->cv_id(),
130 name => $name,
131 dbxref_id => $new_term_dbxref-> dbxref_id()
136 #print STDERR "New term cvterm_id = " . $new_term->cvterm_id();
138 my $variable_rel = $schema->resultset('Cv::CvtermRelationship')->find_or_create({
139 subject_id => $new_term->cvterm_id(),
140 object_id => $parent_term->cvterm_id(),
141 type_id => $variable_relationship->cvterm_id()
144 foreach my $component_id (@component_ids) {
145 my $contains_rel = $schema->resultset('Cv::CvtermRelationship')->find_or_create({
146 subject_id => $component_id,
147 object_id => $new_term->cvterm_id(),
148 type_id => $contains_relationship->cvterm_id()
152 push @new_terms, [$new_term->cvterm_id, $new_term->name().'|COMP:'.sprintf("%07d",$accession)];
155 #Takes long on cassavabase.. instead the materialized view is refreshed automatically in a background ajax process.
156 #my $refresh1 = "REFRESH MATERIALIZED VIEW traits";
157 #my $h = $dbh->prepare($refresh1);
158 #$h->execute();
160 #my $refresh2 = "REFRESH MATERIALIZED VIEW trait_componentsXtraits";
161 #$h = $dbh->prepare($refresh2);
162 #$h->execute();
164 return \@new_terms;
167 sub store_ontology_identifier {
168 my $self = shift;
169 my $ontology_name = shift;
170 my $ontology_description = shift;
171 my $ontology_identifier = shift;
172 my $ontology_type = shift;
173 my $schema = $self->schema();
174 my $dbh = $schema->storage->dbh;
176 my $cv_check = $schema->resultset("Cv::Cv")->find({name=>$ontology_name});
177 if ($cv_check) {
178 return {
179 error => "The ontology name $ontology_name has already been used!"
183 my $db_check = $schema->resultset("General::Db")->find({name=>$ontology_name});
184 if ($db_check) {
185 return {
186 error => "The ontology identifier $ontology_identifier has already been used!"
190 my $cv_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, $ontology_type, 'composable_cvtypes')->cvterm_id();
192 my $coderef = sub {
193 my $cv_rs = $schema->resultset("Cv::Cv")->create({
194 name => $ontology_name,
195 definition => $ontology_description
197 my $new_cv_id = $cv_rs->cv_id();
199 my $new_ontology_cvprop = $schema->resultset("Cv::Cvprop")->create({
200 cv_id => $new_cv_id,
201 type_id => $cv_type_id
204 my $db_rs = $schema->resultset("General::Db")->create({
205 name => $ontology_identifier
207 my $new_db_id = $db_rs->db_id();
209 my $dbxref_rs = $schema->resultset("General::Dbxref")->create({
210 db_id => $new_db_id,
211 accession => "0000000"
213 my $new_dbxref_id = $dbxref_rs->dbxref_id();
215 my $cvterm_rs = $schema->resultset("Cv::Cvterm")->create({
216 name => $ontology_name,
217 definition => $ontology_description,
218 dbxref_id => $new_dbxref_id,
219 cv_id => $new_cv_id
222 return {
223 success => 1,
224 new_term => [$cvterm_rs->cvterm_id(), $cvterm_rs->name()]
228 try {
229 $schema->txn_do($coderef);
230 } catch {
231 return {
232 error => $_
237 sub store_observation_variable_trait_method_scale {
238 my $self = shift;
239 my $selected_observation_variable_db_id = shift;
240 my $new_observation_variable_name = shift;
241 my $new_observation_variable_definition = shift;
242 my $selected_trait_db_id = shift;
243 my $selected_trait_cvterm_id = shift;
244 my $new_trait_name = shift;
245 my $new_trait_definition = shift;
246 my $selected_method_db_id = shift;
247 my $selected_method_cvterm_id = shift;
248 my $new_method_name = shift;
249 my $new_method_definition = shift;
250 my $selected_scale_db_id = shift;
251 my $selected_scale_cvterm_id = shift;
252 my $new_scale_name = shift;
253 my $new_scale_definition = shift;
254 my $new_scale_format = shift;
255 my $new_scale_minumum = shift;
256 my $new_scale_maximum = shift;
257 my $new_scale_default = shift;
258 my $new_scale_categories = shift;
260 my $schema = $self->schema();
261 my $dbh = $schema->storage->dbh;
262 my $numeric_regex = '^-?[0-9]+([,.][0-9]+)?$';
264 my $new_observation_variable_cvterm_check = $schema->resultset("Cv::Cvterm")->search({
265 name => $new_observation_variable_name,
267 if ($new_observation_variable_cvterm_check->count() > 0) {
268 return { error => "The observation variable $new_observation_variable_name already exists in the database!" };
271 if (!$selected_trait_cvterm_id) {
272 my $new_trait_cvterm_check = $schema->resultset("Cv::Cvterm")->search({
273 name => $new_trait_name,
275 if ($new_trait_cvterm_check->count() > 0) {
276 return { error => "The trait $new_trait_name already exists in the database!" };
280 if (!$selected_method_cvterm_id) {
281 my $new_method_cvterm_check = $schema->resultset("Cv::Cvterm")->search({
282 name => $new_method_name,
284 if ($new_method_cvterm_check->count() > 0) {
285 return { error => "The method $new_method_name already exists in the database!" };
289 if (!$selected_scale_cvterm_id) {
290 my $new_scale_cvterm_check = $schema->resultset("Cv::Cvterm")->search({
291 name => $new_scale_name,
293 if ($new_scale_cvterm_check->count() > 0) {
294 return { error => "The scale $new_scale_name already exists in the database!" };
298 my $coderef = sub {
299 my $observation_variable_db_q = "SELECT db.name, dbxref.accession, cv.name, cv.cv_id FROM dbxref JOIN db USING(db_id) JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_observation_variable_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int DESC LIMIT 1;";
300 my $observation_variable_db_sth = $dbh->prepare($observation_variable_db_q);
301 $observation_variable_db_sth->execute();
302 my ($observation_variable_db_name, $observation_variable_last_accession, $observation_variable_cv_name, $observation_variable_cv_id) = $observation_variable_db_sth->fetchrow_array();
303 my $observation_variable_new_accession = sprintf("%07d", $observation_variable_last_accession + 1);
305 my $parent_observation_variable_cvterm_q = "SELECT cvterm.cvterm_id, cvterm.name FROM dbxref JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_observation_variable_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int ASC LIMIT 1;";
306 my $parent_observation_variable_cvterm_sth = $dbh->prepare($parent_observation_variable_cvterm_q);
307 $parent_observation_variable_cvterm_sth->execute();
308 my ($parent_observation_variable_cvterm_id, $parent_observation_variable_cvterm_name) = $parent_observation_variable_cvterm_sth->fetchrow_array();
310 my $new_term_observation_variable_dbxref = $schema->resultset("General::Dbxref")->find_or_create({
311 db_id => $selected_observation_variable_db_id,
312 accession => $observation_variable_new_accession
315 my $new_observation_variable_cvterm = $schema->resultset("Cv::Cvterm")->create({
316 cv_id => $observation_variable_cv_id,
317 name => $new_observation_variable_name,
318 definition => $new_observation_variable_definition,
319 dbxref_id => $new_term_observation_variable_dbxref->dbxref_id()
322 my $is_a_relationship = $schema->resultset("Cv::Cvterm")->search({ name => 'is_a' })->first();
323 my $contains_relationship = $schema->resultset("Cv::Cvterm")->search({ name => 'contains' })->first();
324 my $variable_relationship = $schema->resultset("Cv::Cvterm")->search({ name => 'VARIABLE_OF' })->first();
326 my $variable_rel = $schema->resultset('Cv::CvtermRelationship')->create({
327 subject_id => $new_observation_variable_cvterm->cvterm_id(),
328 object_id => $parent_observation_variable_cvterm_id,
329 type_id => $variable_relationship->cvterm_id()
332 if (!$selected_trait_cvterm_id) {
334 my $trait_db_q = "SELECT db.name, dbxref.accession, cv.name, cv.cv_id FROM dbxref JOIN db USING(db_id) JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_trait_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int DESC LIMIT 1;";
335 my $trait_db_sth = $dbh->prepare($trait_db_q);
336 $trait_db_sth->execute();
337 my ($trait_db_name, $trait_last_accession, $trait_cv_name, $trait_cv_id) = $trait_db_sth->fetchrow_array();
338 my $trait_new_accession = sprintf("%07d", $trait_last_accession + 1);
340 my $parent_trait_cvterm_q = "SELECT cvterm.cvterm_id, cvterm.name FROM dbxref JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_trait_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int ASC LIMIT 1;";
341 my $parent_trait_cvterm_sth = $dbh->prepare($parent_trait_cvterm_q);
342 $parent_trait_cvterm_sth->execute();
343 my ($parent_trait_cvterm_id, $parent_trait_cvterm_name) = $parent_trait_cvterm_sth->fetchrow_array();
345 my $new_term_trait_dbxref = $schema->resultset("General::Dbxref")->create({
346 db_id => $selected_trait_db_id,
347 accession => $trait_new_accession
350 my $new_trait_cvterm = $schema->resultset("Cv::Cvterm")->create({
351 cv_id => $trait_cv_id,
352 name => $new_trait_name,
353 definition => $new_trait_definition,
354 dbxref_id => $new_term_trait_dbxref->dbxref_id()
356 $selected_trait_cvterm_id = $new_trait_cvterm->cvterm_id();
358 my $trait_rel = $schema->resultset('Cv::CvtermRelationship')->create({
359 subject_id => $new_trait_cvterm->cvterm_id(),
360 object_id => $parent_trait_cvterm_id,
361 type_id => $is_a_relationship->cvterm_id()
365 my $observation_variable_to_trait_contains_rel = $schema->resultset('Cv::CvtermRelationship')->create({
366 subject_id => $selected_trait_cvterm_id,
367 object_id => $new_observation_variable_cvterm->cvterm_id(),
368 type_id => $contains_relationship->cvterm_id()
371 if (!$selected_method_cvterm_id) {
372 my $method_db_q = "SELECT db.name, dbxref.accession, cv.name, cv.cv_id FROM dbxref JOIN db USING(db_id) JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_method_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int DESC LIMIT 1;";
373 my $method_db_sth = $dbh->prepare($method_db_q);
374 $method_db_sth->execute();
375 my ($method_db_name, $method_last_accession, $method_cv_name, $method_cv_id) = $method_db_sth->fetchrow_array();
376 my $method_new_accession = sprintf("%07d", $method_last_accession + 1);
378 my $parent_method_cvterm_q = "SELECT cvterm.cvterm_id, cvterm.name FROM dbxref JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_method_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int ASC LIMIT 1;";
379 my $parent_method_cvterm_sth = $dbh->prepare($parent_method_cvterm_q);
380 $parent_method_cvterm_sth->execute();
381 my ($parent_method_cvterm_id, $parent_method_cvterm_name) = $parent_method_cvterm_sth->fetchrow_array();
383 my $new_term_method_dbxref = $schema->resultset("General::Dbxref")->create({
384 db_id => $selected_method_db_id,
385 accession => $method_new_accession
388 my $new_method_cvterm = $schema->resultset("Cv::Cvterm")->create({
389 cv_id => $method_cv_id,
390 name => $new_method_name,
391 definition => $new_method_definition,
392 dbxref_id => $new_term_method_dbxref->dbxref_id()
394 $selected_method_cvterm_id = $new_method_cvterm->cvterm_id();
396 my $method_rel = $schema->resultset('Cv::CvtermRelationship')->create({
397 subject_id => $new_method_cvterm->cvterm_id(),
398 object_id => $parent_method_cvterm_id,
399 type_id => $is_a_relationship->cvterm_id()
403 my $observation_variable_to_method_contains_rel = $schema->resultset('Cv::CvtermRelationship')->create({
404 subject_id => $selected_method_cvterm_id,
405 object_id => $new_observation_variable_cvterm->cvterm_id(),
406 type_id => $contains_relationship->cvterm_id()
409 if (!$selected_scale_cvterm_id) {
410 my $scale_db_q = "SELECT db.name, dbxref.accession, cv.name, cv.cv_id FROM dbxref JOIN db USING(db_id) JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_scale_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int DESC LIMIT 1;";
411 my $scale_db_sth = $dbh->prepare($scale_db_q);
412 $scale_db_sth->execute();
413 my ($scale_db_name, $scale_last_accession, $scale_cv_name, $scale_cv_id) = $scale_db_sth->fetchrow_array();
414 my $scale_new_accession = sprintf("%07d", $scale_last_accession + 1);
416 my $parent_scale_cvterm_q = "SELECT cvterm.cvterm_id, cvterm.name FROM dbxref JOIN cvterm USING(dbxref_id) JOIN cv USING(cv_id) WHERE db_id=$selected_scale_db_id AND dbxref.accession ~ '$numeric_regex' ORDER BY dbxref.accession::int ASC LIMIT 1;";
417 my $parent_scale_cvterm_sth = $dbh->prepare($parent_scale_cvterm_q);
418 $parent_scale_cvterm_sth->execute();
419 my ($parent_scale_cvterm_id, $parent_scale_cvterm_name) = $parent_scale_cvterm_sth->fetchrow_array();
421 my $new_term_scale_dbxref = $schema->resultset("General::Dbxref")->create({
422 db_id => $selected_scale_db_id,
423 accession => $scale_new_accession
426 my $scale_categories_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'trait_categories', 'trait_property')->cvterm_id();
427 my $scale_default_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'trait_default_value', 'trait_property')->cvterm_id();
428 my $scale_format_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'trait_format', 'trait_property')->cvterm_id();
429 my $scale_maximum_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'trait_maximum', 'trait_property')->cvterm_id();
430 my $scale_minimum_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'trait_minimum', 'trait_property')->cvterm_id();
432 my @cvtermprops;
433 if ($new_scale_format) {
434 push @cvtermprops, {type_id => $scale_format_cvterm_id, value => $new_scale_format};
436 if ($new_scale_minumum) {
437 push @cvtermprops, {type_id => $scale_minimum_cvterm_id, value => $new_scale_minumum};
439 if ($new_scale_maximum) {
440 push @cvtermprops, {type_id => $scale_maximum_cvterm_id, value => $new_scale_maximum};
442 if ($new_scale_default) {
443 push @cvtermprops, {type_id => $scale_default_cvterm_id, value => $new_scale_default};
445 if ($new_scale_categories) {
446 push @cvtermprops, {type_id => $scale_categories_cvterm_id, value => $new_scale_categories};
449 my $new_scale_cvterm = $schema->resultset("Cv::Cvterm")->create({
450 cv_id => $scale_cv_id,
451 name => $new_scale_name,
452 definition => $new_scale_definition,
453 dbxref_id => $new_term_scale_dbxref->dbxref_id(),
454 cvtermprops => \@cvtermprops
456 $selected_scale_cvterm_id = $new_scale_cvterm->cvterm_id();
458 my $scale_rel = $schema->resultset('Cv::CvtermRelationship')->create({
459 subject_id => $new_scale_cvterm->cvterm_id(),
460 object_id => $parent_scale_cvterm_id,
461 type_id => $is_a_relationship->cvterm_id()
465 my $observation_variable_to_scale_contains_rel = $schema->resultset('Cv::CvtermRelationship')->create({
466 subject_id => $selected_scale_cvterm_id,
467 object_id => $new_observation_variable_cvterm->cvterm_id(),
468 type_id => $contains_relationship->cvterm_id()
471 return {
472 success => 1,
473 new_term => [$new_observation_variable_cvterm->cvterm_id(), $new_observation_variable_cvterm->name()]
477 try {
478 $schema->txn_do($coderef);
479 } catch {
480 return {
481 error => $_
488 # Recursively get the children (and all granchildren, etc) of the specified cvterm
489 # ARGS:
490 # - cvterm_id = id of the root cvterm
491 # RETURNS: an arrayref of hashes of the children of the cvterm, with the following keys:
492 # - cvterm_id = id of the child cvterm
493 # - name = name of the child cvterm
494 # - definition = definition of the child cvterm
495 # - children = children of the child cvterm
496 # - accession = dbxref accession of the child cvterm
498 sub get_children {
499 my $self = shift;
500 my $cvterm_id = shift;
501 my $schema = $self->schema();
503 my @children;
504 my $cvterm = $schema->resultset('Cv::Cvterm')->find({ cvterm_id => $cvterm_id });
505 if ( defined $cvterm ) {
506 my $cvterm_rs = $cvterm->children();
507 while (my $r = $cvterm_rs->next()) {
508 my $child = $r->subject();
509 if ( !$child->is_obsolete() ) {
510 my $gc = $self->get_children($child->cvterm_id);
511 my $dbxref_rs = $schema->resultset('General::Dbxref')->find({ dbxref_id => $child->dbxref_id() });
512 my %c = (
513 cvterm_id => $child->cvterm_id(),
514 name => $child->name(),
515 definition => $child->definition(),
516 children => scalar(@$gc) > 0 ? $gc : undef,
517 accession => $dbxref_rs->accession()
519 push(@children, \%c);
524 my @sorted = sort { $a->{accession} <=> $b->{accession} } @children;
525 return \@sorted;