3 CXGN::Chado::Organism - a class to create and manipulate Chado organism objects.
12 Naama Menda <nm249@cornell.edu>
16 package CXGN
::Chado
::Organism
;
21 use base qw
/ CXGN::DB::Object / ;
25 Usage: my $organism = CXGN::Chado::Organism->new($schema, $organism_id);
27 Ret: a CXGN::Chado::Organism object
28 Args: a $schema a schema object,
29 $organism_id, if $organism_id is omitted, an empty metadata object is created.
30 Side_Effects: accesses the database, check if exists the database columns that this object use. die if the id is not an integer.
39 ### First, bless the class to create the object and set the schema into the object.
40 my $self = $class->SUPER::new
($schema);
42 ##Setting sbh for using some legacy functions from the old Organism object
43 my $dbh= $schema->storage->dbh();
46 #Check if $id is an integer
49 unless ($id =~ m/^\d+$/) { ## The id can be only an integer
50 my $error_message = "\nDATA TYPE ERROR: The organism_id ($id) for CXGN::Chado::Organism->new() IS NOT AN INTEGER.\n\n";
51 croak
($error_message);
54 $organism = $self->get_resultset('Organism::Organism')->find({ organism_id
=> $id });
56 unless (defined $organism) {
57 my $error_message2 = "\nDATABASE COHERENCE ERROR: The organism_id ($id) for CXGN::Chado::Organism->new(\$schema, \$id) ";
58 $error_message2 .= "DOES NOT EXIST IN THE DATABASE.\n";
59 $error_message2 .= "If you need enforce it, you can create an empty object (my \$org = CXGN::Chado::Organism->new(\$schema)";
60 $error_message2 .= " and set the variable (\$org->set_organism_id(\$id);)";
61 warn($error_message2);
65 $self->debug("Creating a new empty Organism object! " . $self->get_resultset('Organism::Organism'));
66 $organism = $self->get_resultset('Organism::Organism')->new({}); ### Create an empty resultset object;
68 ###It's important to set the object row for using the accesor in other class functions
69 $self->set_object_row($organism);
79 Desc: store a new organism
82 Side Effects: checks if the organism exists in the database, and if does, will attempt to update
89 my $id= $self->get_organism_id();
90 my $schema=$self->get_schema();
91 #no organism id . Check first if genus + species exists in te database
93 my $exists= $self->exists_in_database();
96 my $new_row = $self->get_object_row();
99 $id=$new_row->organism_id();
101 $self->set_organism_id($id);
102 $self->d( "Inserted a new organism " . $self->get_organism_id() ." database_id = $id\n");
104 $self->set_organism_id($exists);
105 my $existing_organism=$self->get_resultset('Organism::Organism')->find($exists);
106 #don't update here if organism already exist. User should call from the code exist_in_database
107 #and instantiate a new organism object with the database organism_id
108 #updating here is not a good idea, since it might not be what the user intended to do
109 #and it can mess up the database.
111 $self->debug("Organism " . $self->get_species() . " " . $self->get_genus() . " exists in database!");
115 $self->d( "Updating existing organism_id $id\n");
116 $self->get_object_row()->update();
118 return $self->get_organism_id()
122 =head2 exists_in_database
124 Usage: $self->exists_in_database()
125 Desc: check if the genus + species exists in the organism table
133 sub exists_in_database
{
135 my $genus= $self->get_genus() || '' ;
136 my $species = $self->get_species() || '' ;
137 my $o = $self->get_resultset('Organism::Organism')->search({
138 genus
=> { 'ilike' => $genus },
139 species
=> { 'ilike' => $species }
140 })->single(); # ->single() for retrieving a single row (there sould be only one genus-species entry)
141 if ($o) { return $o->organism_id(); }
143 # search if the genus+species where set together in the species field
144 if ($species =~ m/(.*)\s(.*)/) {
145 $o = $self->get_resultset('Organism::Organism')->search(
147 genus
=> { 'ilike'=> $1 },
148 species
=> {'ilike' => $2 }
150 if ($o) { return $o->organism_id(); }
158 Usage: $self->get_dbxrefs()
159 Desc: create a list of all dbxref objects associated with the organism
160 via the organism_dbxref table
161 Ret: a list of CXGN::Chado::Dbxref objects
170 my @organism_dbxrefs= $self->get_schema()->resultset('Organism::OrganismDbxref')->search( {organism_id
=> $self->get_organism_id() } );
173 foreach my $od (@organism_dbxrefs) {
174 my $dbxref_id= $od->dbxref_id();
175 push @dbxrefs, $self->get_resultset("General::Dbxref")->search( { dbxref_id
=> $dbxref_id } );
183 Usage: $self->add_dbxref($dbxref)
184 Desc: store a new organism_dbxref
187 Side Effects: accesses the database
196 my $schema=$self->get_schema;
197 my $organism_id= $self->get_organism_id();
198 my $dbxref_id = $dbxref->get_dbxref_id();
199 my $organism_dbxref = $schema->resultset('Organism::OrganismDbxref')->find_or_create(
201 organism_id
=> $organism_id,
202 dbxref_id
=> $dbxref_id,
205 my $id = $organism_dbxref->get_column('dbxref_id');
212 Usage: $self->add_synonym($synonym)
213 Desc: Store a new synonym for this organism
214 Ret: an organismprop object
215 Args: a synonym (text)
216 Side Effects: stores a new organismprop with a type_id of 'synonym'
225 my $cvterm= $self->get_resultset("Cv::Cvterm")->search( {name
=> 'synonym' } )->single();;
228 $type_id= $cvterm->get_column('cvterm_id');
230 my @organismprops= $self->get_schema()->resultset("Organism::Organismprop")->search(
232 organism_id
=>$self->get_organism_id(),
235 if (@organismprops) {
236 my @sorted_ranks = reverse sort { $a <=> $b } ( map($_->get_column('rank'), @organismprops) ) ;
237 my $max_rank = $sorted_ranks[0];
240 my ($organismprop)= $self->get_schema()->resultset("Organism::Organismprop")->search(
242 organism_id
=> $self->get_organism_id(),
246 if (!$organismprop) {
247 $organismprop= $self->get_schema()->resultset("Organism::Organismprop")->create(
249 organism_id
=> $self->get_organism_id(),
254 return $organismprop;
257 $self->d("add_synonym ERROR: 'synonym' is not a cvterm! Please update your cvterm table. a cvterm with name='synonym' is required for storing organismprop for synonyms\n");
264 Usage: my @synonyms= $self->get_synonyms()
265 Desc: find the synonyms for this organism
268 Side Effects: get the organismprops for type_id of cvterm.name = synonym
275 my @props= $self->get_resultset("Organism::Organismprop")->search(
276 { organism_id
=> $self->get_organism_id(),
277 type_id
=> $self->get_resultset("Cv::Cvterm")->search( { name
=> 'synonym'} )->first()->get_column('cvterm_id')
280 foreach my $prop (@props) {
281 push @synonyms, $prop->get_column('value');
290 Usage: my $ploidy= $self->get_ploidy()
291 Desc: find the ploidy value for this organism
294 Side Effects: get the organismprops for type_id of cvterm.name = ploidy
303 my $value = $self->get_organismprop($name);
308 =head2 get_chromosome_number
310 Usage: my $chr= $self->get_chromosome_number()
311 Desc: find the chromosome number value for this organism
314 Side Effects: get the organismprops for type_id of cvterm.name = chromosome_number_variation
319 sub get_chromosome_number
{
321 my $name = "chromosome_number_variation";
322 my $value = $self->get_organismprop($name);
326 =head2 get_genome_size
328 Usage: my $genome_size= $self->get_genome_size()
329 Desc: find the genome size value for this organism
332 Side Effects: get the organismprops for type_id of cvterm.name = 'genome size'
337 sub get_genome_size
{
339 my $name = "genome size";
340 my $value = $self->get_organismprop($name);
345 =head2 get_est_attribution
347 Usage: my $att= $self->get_est_attribution()
348 Desc: find the est attribution for this organism
351 Side Effects: get the organismprops for type_id of cvterm.name = 'est attribution'
356 sub get_est_attribution
{
358 my $name= "est attribution";
359 my $value= $self->get_organismprop($name);
363 =head2 get_organismprop
365 Usage: $self->get_organismprop($value)
366 Desc: find the value of the organismprop for value $value
367 Ret: a string or undef
368 Args: $value (a cvterm name)
374 sub get_organismprop
{
378 my ($prop)= $self->get_resultset("Organism::Organismprop")->search(
379 { organism_id
=> $self->get_organism_id(),
380 type_id
=> $self->get_resultset("Cv::Cvterm")->search( { name
=> $name } )->first()->get_column('cvterm_id')
383 my $value= $prop->get_column('value');
391 Usage: $self->get_parent()
392 Desc: get the parent organism of this object
393 Ret: Chado::Organism object or undef if organism has no parent (i.e. is the root in the tree)
395 Side Effects: accesses the phylonode table
402 my $organism_id = $self->get_organism_id();
404 my ($phylonode)= $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
405 { organism_id
=>$self->get_organism_id() })->search_related('phylonode');
408 my $parent_phylonode_id= $phylonode->get_column('parent_phylonode_id');
410 my ($parent_phylonode)= $self->get_resultset("Phylogeny::Phylonode")->search(
411 { phylonode_id
=> $parent_phylonode_id } );
412 if ($parent_phylonode) {
413 my ($phylonode_organism)= $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
414 { phylonode_id
=> $parent_phylonode->get_column('phylonode_id') } );
416 my $parent_organism= CXGN
::Chado
::Organism
->new($self->get_schema(), $phylonode_organism->organism_id );
417 return $parent_organism;
424 =head2 get_direct_children
426 Usage: $self->get_direct_children()
427 Desc: get the child organisms of this object
428 Ret: list of CXGN::Chado::Organism objects or an empty list if organism has no children (i.e. is a leaf in the tree)
430 Side Effects: accesses the phylonode table
435 sub get_direct_children
{
437 my $organism_id = $self->get_organism_id();
441 my ($phylonode)= $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
442 { organism_id
=>$self->get_organism_id() })->search_related('phylonode');
446 #my $parent_phylonode_id= $phylonode->get_column('parent_phylonode_id');
448 my @child_phylonodes = $self->get_resultset("Phylogeny::Phylonode")->search(
449 { parent_phylonode_id
=> $phylonode->phylonode_id() } );
451 foreach my $d (@child_phylonodes) {
452 my ($phylonode_organism)= $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
453 { phylonode_id
=> $d->get_column('phylonode_id') } );
455 my $child_organism= CXGN
::Chado
::Organism
->new($self->get_schema(), $phylonode_organism->organism_id() );
456 push @children, $child_organism;
464 Usage: $sef->get_taxon()
465 Desc: get the taxon for this organism
468 Side Effects: looks in the phylonode table
475 my ($phylonode) = $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
476 { organism_id
=>$self->get_organism_id() } )->search_related("phylonode");
478 my $type_id = $phylonode->get_column('type_id');
480 my ($cvterm) = $self->get_resultset("Cv::Cvterm")->find( { cvterm_id
=>$type_id });
482 my $taxon = $cvterm->get_column('name');
493 #############################################
495 =head2 accessors get_species, set_species
507 return $self->get_object_row()->get_column('species');
512 my $species=shift || croak
" No argument passed to set_species!!!";
513 $self->get_object_row()->set_column(species
=> $species ) ;
517 =head2 accessors get_genus, set_genus
529 return $self->get_object_row()->get_column("genus");
534 $self->get_object_row()->set_column(genus
=> shift);
537 =head2 accessors get_abbreviation, set_abbreviation
547 sub get_abbreviation
{
549 return $self->get_object_row()->get_column("abbreviation");
552 sub set_abbreviation
{
554 $self->get_object_row()->set_column(abbreviation
=> shift);
557 =head2 accessors get_common_name, set_common_name
567 sub get_common_name
{
569 return $self->get_object_row()->get_column("common_name");
572 sub set_common_name
{
574 $self->get_object_row()->set_column(common_name
=> shift);
578 =head2 accessors get_comment, set_comment
590 return $self->get_object_row()->get_column("comment");
595 $self->get_object_row()->set_column(comment
=> shift);
601 =head2 accessors get_organism_id, set_organism_id
611 sub get_organism_id
{
613 return $self->get_object_row()->get_column('organism_id');
616 sub set_organism_id
{
618 my $organism_id=shift || croak
"No argument passed to organism_id";
620 #check if id is in the database
621 $self->get_object_row()->set_column(organism_id
=>$organism_id);
627 return $self->{object_row
};
632 $self->{object_row
} = shift;
637 Usage: $self->get_resultset(ModuleName::TableName)
638 Desc: Get a ResultSet object for source_name
639 Ret: a ResultSet object
649 return $self->get_schema()->resultset("$source");
654 ###################Functions adapted from the old the organism object
656 =head2 new_with_taxon_id
658 Usage: my $organism = CXGN::Chado::Organism->new_with_taxon_id($dbh, $gb_taxon_id)
659 Desc: create a new organism object using genbank taxon_id instead of organism_id
660 Ret: a new organism object
661 Args: a dbh or a dbic schema object
662 Side Effects: creates a new Bio::Chado::Schema object
667 sub new_with_taxon_id
{
671 $schema = $class->_ensure_dbic( $schema );
673 my $taxon_id = shift;
675 my ($organism)= $schema->resultset("General::Db")->search(
676 { name
=> 'DB:NCBI_taxonomy' }) ->
677 search_related
('dbxrefs', { accession
=> $taxon_id } )->
678 search_related
('organism_dbxrefs')->
679 search_related
('organism');
681 return CXGN
::Chado
::Organism
->new($schema, $organism->get_column('organism_id') );
687 my ($self,$dbh) = @_;
688 return $dbh if $dbh->can('resultset');
690 $dbh = $dbh->get_actual_dbh if $dbh->isa("CXGN::DB::Connection") ;
692 require Bio
::Chado
::Schema
;
693 return Bio
::Chado
::Schema
->connect( sub { $dbh->clone },
694 { on_connect_do
=> ['SET search_path TO public'] },
700 =head2 get_genbank_taxon_id
702 Usage: $self->get_genbank_taxon_id
703 Desc: get the genbank taxon id of this organism
711 sub get_genbank_taxon_id
{
713 my $schema= $self->get_schema();
714 my ($db) = $schema->resultset("General::Db")->search( { name
=> 'DB:NCBI_taxonomy' } );
715 my $db_id = $db->get_column('db_id');
717 my ($dbxref)= $schema->resultset("Organism::OrganismDbxref")->search(
718 { organism_id
=> $self->get_organism_id() })->
719 search_related
('dbxref', {db_id
=> $db_id } ) ;
720 my $accession = $dbxref->get_column('accession');
726 =head2 new_with_common_name
728 Usage: my $organism = CXGN::Chado::Organism->new_with_common_name($schema, $common_name)
729 Desc: create a new organism object using common_name instead of organism_id
730 Each common_name should have a 'default' organism asociated in the organismprop table
731 Ret: a new organism object
733 Side Effects: make a new Bio::Chado::Schema connection if $schema arg is a CXGN::DB::Connection object
739 ###Need to figure out what to do with common names
740 sub new_with_common_name
{
743 my $common_name = shift;
745 $schema = $self->_ensure_dbic( $schema );
747 my ($organism)= $schema->resultset("Cv::Cvterm")->search(
748 { name
=> 'common_name' }) ->
749 search_related
('organismprops', { value
=> $common_name } )->
750 search_related
('organism');
751 return undef if !$organism;
752 return CXGN
::Chado
::Organism
->new($schema, $organism->get_column('organism_id') );
757 =head2 get_intergroup_species_name DEPRECATED see get_group_common_name
767 sub get_intergroup_species_name
{
770 warn "DEPRECATED. Replaced by get_group_common_name";
771 return $self->get_group_common_name();
776 =head2 get_group_common_name
778 Usage: my $group_common_name= $self->get_group_common_name()
779 Desc: The unigenes, loci and phenome are grouped by interspecific group class.
780 e.g. for all tomato species we have the same number of unigene, loci or phenotypes accessions.
781 This function get this common_name for this organism
785 Example: my $species_intergroup= $organism->get_group_common_name()
789 sub get_group_common_name
{
794 FROM sgn.organismgroup
795 JOIN sgn.organismgroup_member USING (organismgroup_id)
796 WHERE organism_id=? AND type = ?";
798 my $sth=$self->get_dbh()->prepare($query);
799 $sth->execute($self->get_organism_id() , 'common name');
801 my ($common_name) = $sth->fetchrow_array();
809 #The following methods need database changes of the organism and common_name FK
815 Usage: my $map_data = $self->get_map_data ();
816 Desc: Get the map link for an organism. The organism could be one of the parents.
817 Ret: array of [ map short_name , map_id ], [], ...
820 Example: my ($short_name, $map_id = $organism->get_map_data();
828 my $query = "SELECT DISTINCT
832 INNER JOIN sgn.accession ON (parent_1=accession_id or parent_2=accession_id or ancestor=accession_id)
833 INNER JOIN public.organism on (public.organism.organism_id = sgn.accession.chado_organism_id)
834 WHERE public.organism.organism_id = ?";
836 my $sth=$self->get_dbh()->prepare($query);
837 $sth->execute($self->get_organism_id() );
840 while (my ($short_name, $map_id) = $sth -> fetchrow_array
()) {
841 push @map_data, [$short_name, $map_id];
847 #####################
849 =head2 get_loci_count
851 Usage: my $loci_count = $self->get_loci_count();
852 Desc: Get the loci data for an organism .
856 Example: my $loci_count = $organism->get_loci_count();
864 my $query = "SELECT COUNT
865 (phenome.locus.locus_id)
867 JOIN sgn.common_name using (common_name_id)
868 JOIN sgn.organismgroup on (common_name.common_name = organismgroup.name )
869 JOIN sgn.organismgroup_member USING (organismgroup_id)
870 JOIN public.organism USING (organism_id)
871 WHERE locus.obsolete = 'f' AND public.organism.organism_id=?";
873 my $sth=$self->get_dbh()->prepare($query);
874 $sth->execute($self->get_organism_id() );
876 my ($locus_count) = $sth->fetchrow_array() ;
880 =head2 get_library_list
882 Usage: my $library_count = $self->get_library_list();
883 Desc: Get the libraries names.
884 Ret: a list of library_shortnames
887 Example: my $lib = $organism->get_library_list();
891 sub get_library_list
{
895 my $query = "SELECT library_shortname
897 JOIN public.organism on (organism.organism_id = library.chado_organism_id)
898 WHERE public.organism.organism_id=?";
899 my $sth = $self->get_dbh()->prepare($query);
900 $sth->execute($self->get_organism_id() );
903 while ( my ($library_shortname)=$sth->fetchrow_array() ) {
904 push @libraries, $library_shortname;
911 Usage: my $est_count = $organism->get_est_count();
912 Desc: Get the EST count for an organism. This number is only for the ESTs where status=0 and flags=0.
915 Side Effects: THIS FUNCTION IS VERY SLOW. Currently not called from the organism page.
916 Example: my $est_n = $organism->get_ests_count();
924 my $query = "SELECT COUNT(
927 JOIN sgn.seqread USING (read_id)
928 JOIN sgn.clone USING (clone_id)
929 JOIN sgn.library USING (library_id)
930 JOIN public.organism ON (organism.organism_id = library.chado_organism_id)
931 WHERE sgn.est.status = 0 and sgn.est.flags = 0 and public.organism.organism_id=?";
933 my $sth = $self->get_dbh()->prepare($query);
934 $sth->execute($self->get_organism_id() );
935 my ($est_n) = $sth->fetchrow_array() ;
939 =head2 get_phenotype_count
941 Usage: my $phenotypes =$self->get_phenotype_count();
942 Desc: Get the phenotypes count for an organism.
943 Ret: An integer or undef
950 sub get_phenotype_count
{
954 my $query=" SELECT COUNT (phenome.individual.individual_id)
955 FROM phenome.individual
956 JOIN sgn.common_name using (common_name_id)
957 JOIN sgn.organismgroup on (common_name.common_name = organismgroup.name )
958 JOIN sgn.organismgroup_member USING (organismgroup_id)
959 JOIN public.organism USING (organism_id)
960 WHERE individual.obsolete = 'f' AND public.organism.organism_id=?";
962 my $sth=$self->get_dbh()->prepare($query);
963 $sth->execute($self->get_organism_id() );
966 while (my ($pheno_count) = $sth->fetchrow_array()) {
967 $phenotypes += $pheno_count;
974 ##########################
976 =head2 get_organism_by_species
978 Usage: CXGN::Chado::Organism::get_organism_by_species($species, $schema)
980 Ret: Organism object or undef
981 Args: species name and a schema object
988 sub get_organism_by_species
{
991 my ($organism)=$schema->resultset("Organism::Organism")->find(
992 { species
=> $species }
993 ); #should be just one species...
995 return $organism || undef ;
999 =head2 get_organism_by_tax
1001 Usage: $self->get_organism_by_tax($taxon)
1002 Desc: Find the organism row for the higher level taxon of the current organism.
1003 Ret: Organism object or undef
1004 Args: taxon order (e.g. order, family , tribe, etc.)
1011 sub get_organism_by_tax
{
1015 ->get_resultset("Cv::Cv")
1016 ->search({ 'me.name' => 'taxonomy'})
1017 ->search_related('cvterms', {
1018 'cvterms.name' => $taxon,
1025 my $type_id = $cvterm->get_column('cvterm_id');
1027 my ($self_phylonode)= $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
1028 { organism_id
=> $self->get_organism_id() } )->search_related('phylonode');
1029 if ($self_phylonode) {
1030 my $left_idx= $self_phylonode->get_column('left_idx');
1031 my $right_idx=$self_phylonode->get_column('right_idx');
1032 my ($phylonode)=$self->get_resultset("Phylogeny::Phylonode")->search_literal(
1033 ('left_idx < ? AND right_idx > ? AND type_id = ?' , ($left_idx, $right_idx, $type_id) ));
1036 my ($organism)= $self->get_resultset("Phylogeny::PhylonodeOrganism")->search(
1037 { phylonode_id
=> $phylonode->get_column('phylonode_id') } )->search_related('organism');
1039 return $organism || undef ;
1041 } else { warn("NO PHYLONODE stored for organism " . $self->get_abbreviation() . "\n"); }
1042 } else { warn("NO CVTERM FOUND for term '$taxon'!! Check your database\n"); }
1047 =head2 new_with_species
1049 Usage: my $o=CXGN::Chado::Organism->new_with_species($schema, 'my node name')
1050 Desc: Instantiate a new organism object with species name.
1051 species name is anything that is stored in the species column
1052 which means it does not have to be a taxonmic species, but can be any tax name,
1053 as long as it is unique.
1054 Ret: an organism object
1055 Args: Bio::Chado::Schema object and a string
1061 sub new_with_species
{
1065 my ($organism) = $schema->resultset('Organism::Organism')->search(
1066 { species
=> $species }
1069 my $o = CXGN
::Chado
::Organism
->new($schema, $organism->get_column('organism_id') );
1074 =head2 has_avail_genome
1076 Usage: my $o=CXGN::Chado::Organism->new()
1077 my $hasgenome = $o->has_avail_genome();
1078 Desc: Return 'yes' if organism has an available genome information; 'no' otherwise.
1085 sub has_avail_genome
{
1089 if (defined($self->get_ploidy()) ||defined( $self->get_genome_size()) || defined($self->get_chromosome_number())){
1090 $avail_genome= 'yes';
1092 $avail_genome= 'no';
1094 return $avail_genome;
1096 =head2 has_avail_map
1098 Usage: my $o=CXGN::Chado::Organism->new()
1099 my $hasmap = $o->has_avail_map();
1100 Desc: Return 'yes' if organism has an available map; 'no' otherwise.
1111 if(defined($self->get_map_data())){