From 508b57946f59325279c2df1849d2c5f50dbf0602 Mon Sep 17 00:00:00 2001 From: Naama Menda Date: Wed, 3 Nov 2010 11:22:05 -0400 Subject: [PATCH] fixed recursive_children cvterm function, and added tests for parents and children --- lib/CXGN/Chado/Cvterm.pm | 98 +++++++++++++++++++++++++----------------------- lib/CXGN/Chado/Dbxref.pm | 19 +++++----- t/CXGN/Chado/cvterm.t | 33 ++++++++++------ 3 files changed, 81 insertions(+), 69 deletions(-) diff --git a/lib/CXGN/Chado/Cvterm.pm b/lib/CXGN/Chado/Cvterm.pm index 9e0a204..a4e6d3c 100644 --- a/lib/CXGN/Chado/Cvterm.pm +++ b/lib/CXGN/Chado/Cvterm.pm @@ -395,7 +395,7 @@ sub fetch { sub store { my $self = shift; my $cvterm_id=$self->get_cvterm_id(); - + if ($cvterm_id) { #check if exists: my $existing_cvterm_id=$self->cvterm_exists(); @@ -407,40 +407,40 @@ sub store { my $sth = $self->get_dbh()->prepare($query); $sth->execute($self->get_cv_id(), $self->get_cvterm_name(), $self->get_dbxref_id(), $self->get_definition(), $self->get_obsolete(), $self->get_cvterm_id()); } - }else { - if (!$self->get_dbxref_id()) { + }else { + if (!$self->get_dbxref_id()) { if (!$self->get_accession()) { die "Need an accession for a CV term!"; } my $dbxref = CXGN::Chado::Dbxref->new($self->get_dbh()); - $dbxref->set_accession($self->get_accession()); + $dbxref->set_accession($self->get_accession()); $dbxref->set_version($self->get_version()); $dbxref->set_description($self->definition()); my $db_name = $self->get_db_name(); if ($db_name) { $dbxref->set_db_name($db_name); } else { die "Need a DB name to store cvterm object.\n"; } - + my $dbxref_id = $dbxref->store(); $self->set_dbxref_id($dbxref_id); - $self->d("Inserted new dbxref for accession " . $self->get_db_name() . ":" . $self->get_accession ."\n"); + $self->d("Inserted new dbxref for accession " . $self->get_db_name() . ":" . $self->get_accession ."\n"); } - + my $query = "INSERT INTO cvterm (cv_id, name, dbxref_id, definition, is_obsolete, is_relationshiptype) VALUES (?, ?, ?, ?,?,?)"; - + my $sth = $self->get_dbh()->prepare($query); $sth->execute($self->get_cv_id(), $self->get_cvterm_name(), $self->get_dbxref_id(), $self->get_definition(), $self->get_obsolete(), $self->get_is_relationshiptype()); - + $cvterm_id = $self->get_dbh()->last_insert_id("cvterm", "public"); $self->set_cvterm_id($cvterm_id); } return $cvterm_id; - + } =head2 Class properties - -The following class properties have accessors (get_cvterm_id, set_cvterm_id...): - + +The following class properties have accessors (get_cvterm_id, set_cvterm_id...): + cvterm_id cv_id cv_name @@ -615,7 +615,7 @@ sub get_full_accession { Ret: a list of listrefs containing CXGN::Chado::Cvterm objects and relationship types Args: none Side Effects: none - Example: + Example: =cut @@ -649,34 +649,34 @@ sub get_recursive_parents { =head2 get_parents - Usage: $self->get_parents - Desc: find the direct parents of the cvterm + Usage: $self->get_parents + Desc: find the direct parents of the cvterm Ret: a list of listrefs containing CXGN::Chado::Cvterm objects and relationship types - Args: none - Side Effects: none + Args: none + Side Effects: none Example: =cut sub get_parents { my $self=shift; - my $parents_q = "SELECT object_id , type_id - FROM cvterm_relationship + my $parents_q = "SELECT object_id , type_id + FROM cvterm_relationship WHERE subject_id = ? "; my $parents_sth = $self->get_dbh()->prepare($parents_q); $parents_sth->execute($self->get_cvterm_id() ); my @parents = (); - while (my ($parent_term_id, $type_id) = $parents_sth->fetchrow_array()) { + while (my ($parent_term_id, $type_id) = $parents_sth->fetchrow_array()) { my $parent_term = CXGN::Chado::Cvterm->new($self->get_dbh(), $parent_term_id); my $relationship_term = CXGN::Chado::Cvterm->new($self->get_dbh(), $type_id); - + push @parents, [ $parent_term, $relationship_term ]; } return (@parents); } =head2 get_children - + Usage: my @children = $self->get_children() Desc: a method for finding all the child terms of a cv term and their relationship Ret: a list of lists with two elements: a cvterm object for the child and a @@ -720,24 +720,30 @@ sub get_children { sub get_recursive_children { my $self=shift; - my $q = "SELECT distinct(cvtermpath.subject_id), cvterm_relationship.type_id - FROM cvtermpath - JOIN cvterm_relationship USING (subject_id) - JOIN cvterm ON (cvtermpath.object_id = cvterm_id) + my $q = "SELECT distinct(cvtermpath.subject_id) + FROM cvtermpath + JOIN cvterm ON (cvtermpath.object_id = cvterm_id) WHERE cvtermpath.object_id =? AND cvterm.is_obsolete=0 AND pathdistance>0 "; - + my $sth = $self->get_dbh()->prepare($q); $sth->execute($self->get_cvterm_id() ); + + my $type_q = "SELECT type_id FROM cvterm_relationship + WHERE subject_id = ? AND object_id = ?"; + my $type_sth = $self->get_dbh->prepare($type_q); + my @children = (); - while (my ($child_term_id, $type_id) = $sth->fetchrow_array()) { + while (my ($child_term_id) = $sth->fetchrow_array()) { my $child_term = CXGN::Chado::Cvterm->new($self->get_dbh(), $child_term_id); + $type_sth->execute($child_term_id, $self->get_cvterm_id); + my ($type_id) = $type_sth->fetchrow_array(); my $relationship_term = CXGN::Chado::Cvterm->new($self->get_dbh(), $type_id); - push @children, [ $child_term, $relationship_term ]; } return (@children); } + =head2 count_children Usage: my $childrenNumber = $self->count_children() @@ -745,11 +751,11 @@ sub get_recursive_children { Ret: the number of children for the current db name (this is to avoid counting InterPro children of GO terms) Args: none Side Effects: none - Example: + Example: =cut -sub count_children { +sub count_children { my $self = shift; my $childNumber = 0; @@ -762,19 +768,19 @@ sub count_children { =head2 get_synonyms - + Usage: my @synonyms = $self->get_synonyms() - Desc: a method for fetching all synonyms of a cvterm + Desc: a method for fetching all synonyms of a cvterm Ret: an array of synonyms Args: none Side Effects: none - Example: + Example: =cut sub get_synonyms { my $self=shift; - + my $cvterm_id= $self->get_cvterm_id(); my $query= "SELECT synonym FROM cvtermsynonym WHERE cvterm_id= ?"; @@ -1275,26 +1281,26 @@ sub get_alt_id { Note: the db name is stripped off if provided (GO:0003832 is given as 0003832) Example: - + =cut sub map_to_slim { my $self = shift; my @slim = @_; - + my %slim_counts = (); for (my $i=0; $i<@slim; $i++) { - + # strip db name off id # $slim[$i]=~s/.*?(\d+).*/$1/; - + # make a unique list of slim terms # $slim_counts{$slim[$i]}=0; } $self->get_slim_counts(\%slim_counts); - + print Data::Dumper::Dumper(\%slim_counts); my @matches = (); @@ -1302,21 +1308,19 @@ sub map_to_slim { if ($slim_counts{$k}>0) { push @matches, $k; } } return @matches; - - } sub get_slim_counts { my $self = shift; my $slim_counts = shift; - + my $id = $self->identifier(); - + if (exists($slim_counts->{$id}) && defined($slim_counts->{$id})) { $slim_counts->{$id}++; return; } - + foreach my $p ($self->get_parents()) { $p->[0]->get_slim_counts($slim_counts); } @@ -1482,7 +1486,7 @@ sub get_recursive_loci { sub get_recursive_individuals { my $self=shift; - + my $query = "select distinct individual_id from cvtermpath join cvterm on (cvtermpath.object_id = cvterm.cvterm_id or cvtermpath.subject_id = cvterm.cvterm_id) join phenome.individual_dbxref using (dbxref_id ) join phenome.individual using (individual_id) where ( cvtermpath.object_id =?) and individual_dbxref.obsolete = 'f' and individual.obsolete = 'f' and pathdistance > 0 "; diff --git a/lib/CXGN/Chado/Dbxref.pm b/lib/CXGN/Chado/Dbxref.pm index eac22cd..ccc1d3c 100644 --- a/lib/CXGN/Chado/Dbxref.pm +++ b/lib/CXGN/Chado/Dbxref.pm @@ -138,37 +138,36 @@ sub fetch { sub store { my $self= shift; my $dbxref_id= $self->get_dbxref_id() ; - if (!$dbxref_id) { #do an insert - if (!$self->db_exists() ) { # insert a new db + if (!$dbxref_id) { #do an insert + if (!$self->db_exists() ) { # insert a new db $self->d( "***Dbxref.pm: storing a new db '".$self->get_db_name() ."'.\n"); my $db=CXGN::Chado::Db->new($self->get_dbh() ); $db->set_db_name($self->get_db_name() ); $db->store; $self->set_db_id($db->get_db_id() ); - }else { + }else { my $q= "SELECT db_id FROM db WHERE db.name = ?"; my $s=$self->get_dbh()->prepare($q); $s->execute($self->get_db_name() ) ; my ($db_id)= $s->fetchrow_array(); $self->set_db_id($db_id); } - my $existing_id= $self->exists_in_database(); - if (!$existing_id) { - #insert the new dbxref - my $query = "INSERT INTO public.dbxref (db_id, accession, description, version) VALUES(?,?,?,?) RETURNING dbxref_id"; + my $existing_id= $self->exists_in_database(); + if (!$existing_id) { + #insert the new dbxref + my $query = "INSERT INTO public.dbxref (db_id, accession, description, version) VALUES(?,?,?,?) RETURNING dbxref_id"; my $sth= $self->get_dbh()->prepare($query); if (!$self->get_version()) { $self->set_version(""); } #version field is not null $sth->execute($self->get_db_id, $self->get_accession, $self->get_description, $self->get_version()); ($dbxref_id) = $sth->fetchrow_array(); - - $self->set_dbxref_id($dbxref_id); + $self->set_dbxref_id($dbxref_id); } else { $self->set_dbxref_id($existing_id); } }else { # do an update my $query = "UPDATE public.dbxref SET description=?, version=? WHERE dbxref_id=?"; my $sth= $self->get_dbh()->prepare($query); $sth->execute($self->get_description(), $self->get_version(), $dbxref_id); } - return $dbxref_id; + return $self->get_dbxref_id; } =head2 exists_in_database diff --git a/t/CXGN/Chado/cvterm.t b/t/CXGN/Chado/cvterm.t index 8a4f44f..2b42140 100644 --- a/t/CXGN/Chado/cvterm.t +++ b/t/CXGN/Chado/cvterm.t @@ -34,22 +34,31 @@ ok( @root_namespaces > 0, "get_namespaces" ); my @new_roots = CXGN::Chado::Cvterm::get_roots( $dbh, "GO" ); ok( @new_roots == 3, "GO namespace get_roots" ); -my $cv_term = undef; +my $cvterm = undef; my $cv_accession = "GO:0009536"; -$cv_term = CXGN::Chado::Cvterm->new_with_accession( $dbh, $cv_accession ); +$cvterm = CXGN::Chado::Cvterm->new_with_accession( $dbh, $cv_accession ); -is( $cv_term->get_cvterm_name(), "plastid", "get_cvterm_name function" ); -is( $cv_term->get_cv_name(), "cellular_component", "get_cv_name" ); -is( $cv_term->get_accession(), "0009536", 'get_accession' ); -@term_list = $cv_term->get_children(); -cmp_ok( scalar @term_list, '>=', 11, "get_children" ); +is( $cvterm->get_cvterm_name(), "plastid", "get_cvterm_name function" ); +is( $cvterm->get_cv_name(), "cellular_component", "get_cv_name" ); +is( $cvterm->get_accession(), "0009536", 'get_accession' ); +@term_list = $cvterm->get_children(); +#count direct children +cmp_ok( scalar @term_list, '=', 12, "get_children" ); -@term_list = $cv_term->get_parents(); +@term_list = $cvterm->get_parents(); ok( @term_list == 2, "get_parents" ); -is( $term_list[0]->[0]->identifier(), "0044444", "get_parents check 1" ); -is( $term_list[1]->[0]->identifier(), "0043231", "get_parents check 2" ); +my @parent_names = sort ( map ($_->[0]->identifier , @term_list) ) ; +is( $parent_names[0], "0043231", "get_parents check 1" ); +is( $parent_names[1], "0044444", "get_parents check 2" ); + +# now look at the recursive children and parents +my @recursive_children = $cvterm->get_recursive_children; +is( @recursive_children , 121 , "get_recursive_children"); + +my @recursive_parents = $cvterm->get_recursive_parents; +is( @recursive_parents , 11 , "get_recursive_parents"); # make a new cterm and store it, all in a transaction. # then rollback to leave db content intact. @@ -58,7 +67,7 @@ SKIP : { my $ontology = CXGN::Chado::Ontology->new_with_name( $dbh, "biological_process" ); my $new_t = CXGN::Chado::Cvterm->new($dbh); - my $identifier = "0000001"; + my $identifier = "1111111"; $new_t->identifier($identifier); $new_t->set_obsolete(0); @@ -105,7 +114,7 @@ SKIP : { is( @other_parent_info, 2, "go term two parent test" ); @matches = $other_term->map_to_slim(@slim_terms); - + print "term match = " . $matches[0] . "\n"; my $p_term = CXGN::Chado::Cvterm->new_with_accession( $dbh, "GO:$matches[0]" ); @parent_info = $p_term->get_parents(); is( @parent_info, 1, "parent of parent test" ); -- 2.11.4.GIT