4 SGN::Model::Cvterm - a simple model that provides information on cvterms
10 get_cvterm_object retrieves the term as a CXGN::Chado::Cvterm object.
12 get_cvterm_row retrieves the term as a DBIx::Class row.
14 Both function take a schema object, cvterm name and a cv name as an argument.
16 If a term is not in the database, undef is returned.
24 package SGN
::Model
::Cvterm
;
26 use CXGN
::Chado
::Cvterm
;
29 sub get_cvterm_object
{
32 my $cvterm_name = shift;
35 my $cv = $schema->resultset('Cv::Cv')->find( { name
=> $cv_name });
38 print STDERR
"CV $cv_name not found. Ignoring.";
41 my $term = CXGN
::Chado
::Cvterm
->new_with_term_name(
56 my $cvterm = $schema->resultset('Cv::Cvterm')->find(
59 'cv.name' => $cv_name,
65 sub get_cvterm_row_from_trait_name
{
68 my $trait_name = shift;
70 #print STDERR $trait_name;
72 #fieldbook trait string should be "$trait_name|$dbname:$trait_accession" e.g. plant height|CO_334:0000123. substring on last occurance of |
74 my $full_accession = substr $trait_name, rindex( $trait_name, $delim ) + length($delim);
75 my $full_accession_length = length($full_accession) + length($delim);
76 my $full_cvterm_name = substr($trait_name, 0, -$full_accession_length);
77 my @db_comps = split (/:/ , $full_accession);
78 my $db_name = shift @db_comps;
79 my $accession = join ':', @db_comps;
81 #check if the trait name string does have
82 $accession =~ s/\s+$//;
83 $accession =~ s/^\s+//;
87 my $db_rs = $schema->resultset("General::Db")->search( { 'me.name' => $db_name });
90 $trait_cvterm = $schema->resultset("Cv::Cvterm")
92 'dbxref.db_id' => $db_rs->first()->db_id(),
93 'dbxref.accession' => $accession
100 return $trait_cvterm;
103 # Checks for an ontology trait that has either the short name or matching full name
104 sub find_trait_by_name
{
109 # Checks the cvterm table for traits that match the short name, long name, or id.
110 my $query = "select cvterm.cvterm_id from
113 dbxref using (dbxref_id)
117 cvterm_relationship as rel on rel.subject_id=cvterm.cvterm_id
119 cvterm as reltype on (rel.type_id = reltype.cvterm_id)
121 reltype.name = 'VARIABLE_OF'
126 (cvterm.name || '|' || db.name || ':' || dbxref.accession) ilike ?
128 my $h = $schema->storage->dbh->prepare($query);
129 $h->execute($name, $name);
130 my $cvterm_id = $h->fetchrow();
135 # Get the ontology trait if there is an id for that trait
136 sub find_trait_by_id
{
140 my $cvterm_id = shift;
142 # Checks the cvterm table for traits that match the short name, long name, or id.
143 my $query = "select cvterm.cvterm_id from
146 dbxref using (dbxref_id)
150 cvterm_relationship as rel on rel.subject_id=cvterm.cvterm_id
152 cvterm as reltype on (rel.type_id = reltype.cvterm_id)
154 reltype.name = 'VARIABLE_OF'
156 cvterm.cvterm_id=?;";
157 my $h = $schema->storage->dbh->prepare($query);
158 $h->execute($cvterm_id);
159 my $cvterm_id = $h->fetchrow();
164 sub get_trait_from_exact_components
{
167 my $component_cvterm_ids = shift;
169 my @intersect_selects;
170 foreach my $cvterm_id (@
$component_cvterm_ids){
171 push @intersect_selects, "SELECT object_id FROM cvterm_relationship WHERE subject_id = $cvterm_id";
173 push @intersect_selects, "SELECT object_id FROM cvterm_relationship GROUP BY 1 HAVING count(object_id) = ".scalar(@
$component_cvterm_ids);
174 my $intersect_sql = join ' INTERSECT ', @intersect_selects;
175 my $h = $schema->storage->dbh->prepare($intersect_sql);
177 my @trait_cvterm_ids;
178 while(my ($trait_cvterm_id) = $h->fetchrow_array()){
179 push @trait_cvterm_ids, $trait_cvterm_id;
181 if (scalar(@trait_cvterm_ids) > 1){
182 die "More than one composed trait returned for the given set of exact componenets\n";
184 return $trait_cvterm_ids[0];
187 sub get_trait_from_cvterm_id
{
189 my $cvterm_id = shift;
190 my $format = shift; #can be 'concise' for just the name or 'extended' for name|DB:0000001
191 if ($format eq 'concise'){
192 $q = "SELECT name FROM cvterm WHERE cvterm_id=?;";
194 if ($format eq 'extended'){
195 $q = "SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text FROM cvterm JOIN dbxref USING(dbxref_id) JOIN db USING(db_id) WHERE cvterm_id=?;";
197 my $h = $schema->storage->dbh->prepare($q);
198 $h->execute($cvterm_id);
199 $name = $h->fetchrow();
203 sub _concatenate_cvterm_array
{
205 my $delimiter = shift;
209 #print STDERR "_concatenate_cvterm_array\n";
210 #print STDERR Dumper $first;
211 #print STDERR Dumper $second;
212 my %first_hash = %$first;
213 foreach my $f (keys %first_hash){
214 my $ids = $first_hash{$f};
215 foreach my $s (@
$second){
216 my @component_ids = @
$ids;
217 #print STDERR "_iterate\n";
218 my $name = get_trait_from_cvterm_id
($schema, $s, $format);
219 my $concatenated_cvterm = $f.$delimiter.$name;
220 push @component_ids, $s;
221 delete $first_hash{$f};
222 $first_hash{$concatenated_cvterm} = \
@component_ids;
223 #print STDERR Dumper \%first_hash;
228 sub get_traits_from_component_categories
{
231 my $allowed_composed_cvs = shift;
232 my $composable_cvterm_delimiter = shift;
233 my $composable_cvterm_format = shift;
234 my $cvterm_id_hash = shift;
235 #print STDERR Dumper $cvterm_id_hash;
236 #print STDERR Dumper $allowed_composed_cvs;
237 #print STDERR Dumper $composable_cvterm_format;
239 my %id_hash = %$cvterm_id_hash;
240 delete @id_hash{ grep { scalar @
{$id_hash{$_}} < 1 } keys %id_hash }; #remove cvtypes with no ids
242 my @ordered_id_groups;
243 foreach my $cv_name (@
$allowed_composed_cvs){
244 push @ordered_id_groups, $id_hash{$cv_name};
247 my $id_array_count = scalar(@ordered_id_groups);
248 my $concatenated_cvterms;
249 foreach (@
{$ordered_id_groups[0]}){
250 my $name = get_trait_from_cvterm_id
($schema, $_, $composable_cvterm_format);
251 $concatenated_cvterms->{$name} = [$_];
253 for my $n (0 .. $id_array_count-2){
254 $concatenated_cvterms = _concatenate_cvterm_array
($schema, $composable_cvterm_delimiter, $composable_cvterm_format, $concatenated_cvterms, $ordered_id_groups[$n+1]);
257 #print STDERR "possible traits are: ".Dumper($concatenated_cvterms)."\n";
261 foreach my $key (sort keys %$concatenated_cvterms){
262 #my $existing_cvterm_name = $schema->resultset('Cv::Cvterm')->find({ name=>$key });
263 #if ($existing_cvterm_name){
264 #push @existing_traits, [$existing_cvterm_name->cvterm_id(), $key];
267 my $existing_cvterm_id = $self->get_trait_from_exact_components($schema, $concatenated_cvterms->{$key});
268 if ($existing_cvterm_id){
269 my $existing_name = get_trait_from_cvterm_id
($schema, $existing_cvterm_id, 'extended');
270 push @existing_traits, [$existing_cvterm_id, $existing_name];
273 push @new_traits, [ $concatenated_cvterms->{$key}, $key ];
276 #print STDERR "existing traits are: ".Dumper(/@existing_traits)." and new traits are".Dumper(/@new_traits)."\n";
279 existing_traits
=> \
@existing_traits,
280 new_traits
=> \
@new_traits
284 sub get_traits_from_components
{
287 my $component_cvterm_ids = shift;
288 my @component_cvterm_ids = @
$component_cvterm_ids;
290 my $contains_cvterm_id = $self->get_cvterm_row($schema, 'contains', 'relationship')->cvterm_id();
292 my $q = "SELECT object_id FROM cvterm_relationship WHERE type_id = ? AND subject_id IN (@{[join',', ('?') x @component_cvterm_ids]}) GROUP BY 1";
294 my $h = $schema->storage->dbh->prepare($q);
295 $h->execute($contains_cvterm_id, @component_cvterm_ids);
296 my @trait_cvterm_ids;
297 while(my ($trait_cvterm_id) = $h->fetchrow_array()){
298 push @trait_cvterm_ids, $trait_cvterm_id;
300 return \
@trait_cvterm_ids;
303 sub get_components_from_trait
{
306 my $trait_cvterm_id = shift;
308 my $contains_cvterm_id = $self->get_cvterm_row($schema, 'contains', 'relationship')->cvterm_id();
309 my $q = "SELECT subject_id FROM cvterm_relationship WHERE object_id = $trait_cvterm_id and type_id = $contains_cvterm_id;";
310 my $h = $schema->storage->dbh->prepare($q);
312 my @component_cvterm_ids;
313 while(my ($component_cvterm_id) = $h->fetchrow_array()){
314 push @component_cvterm_ids, $component_cvterm_id;
316 return \
@component_cvterm_ids;
320 =head2 get_cv_names_from_db_name
331 sub get_cv_names_from_db_name
{
337 my $q = "select distinct(cv.name) from db join dbxref using(db_id) join cvterm using(dbxref_id) join cv using(cv_id) where db.name=?";
338 my $h = $schema->storage->dbh()->prepare($q);
339 $h->execute($db_name);
343 while (my $cvn = $h->fetchrow_array()) {
344 push @cv_names, $cvn;
351 =head2 get_vcf_genotyping_cvterm_id
353 Usage: my $cvterm_id = SGN::Model::Cvterm->get_vcf_genotyping_cvterm_id($schema, {'protocol_id' => $protocol_id});
354 Desc: A vcf genotyping file, associated with a genotyping protocol, used to load to the db could be SNP or PHG data. This method queries for the id of the cvterm describing the genotype data type of the vcf file for a genotyping protocol using a protocol id or genotype id. Querying using the genotype id is useful when interested on a single accession genotype data from a genotyping protocol.
355 Ret: the cvterm id for the vcf (snp or phg) genotyping cvterm
356 Args: bcs schema object, and a hashref of either {'protocol_id' => $protocol_id} or {'genotype_id' => $genotype_id}
361 sub get_vcf_genotyping_cvterm_id
{
364 my $search_param = shift;
365 my $protocol_id = $search_param->{protocol_id
};
366 my $genotype_id = $search_param->{genotype_id
};
372 $query = "SELECT genotypeprop.type_id
374 JOIN nd_experiment_stock ON (stock.stock_id=nd_experiment_stock.stock_id)
375 JOIN nd_experiment USING (nd_experiment_id)
376 JOIN nd_experiment_protocol USING (nd_experiment_id)
377 JOIN nd_experiment_project USING(nd_experiment_id)
378 JOIN nd_experiment_genotype USING (nd_experiment_id)
379 JOIN nd_protocol USING (nd_protocol_id)
380 LEFT JOIN nd_protocolprop ON (nd_protocolprop.nd_protocol_id = nd_protocol.nd_protocol_id)
381 JOIN genotype USING (genotype_id)
382 LEFT JOIN genotypeprop USING (genotype_id)
383 LEFT JOIN cvterm ON genotypeprop.type_id = cvterm.cvterm_id
384 WHERE nd_protocol.nd_protocol_id = ?
385 AND cvterm.name ~ 'vcf_\\w+_genotyping'
390 } elsif ($genotype_id) {
391 $query = "SELECT genotypeprop.type_id
393 LEFT JOIN cvterm ON genotypeprop.type_id = cvterm.cvterm_id
394 WHERE genotype_id = ?
395 AND cvterm.name ~ 'vcf_\\w+_genotyping'
400 die "\nSearch for vcf genotyping cvterm id requires either protocol id or genotype id.";
403 my $h = $schema->storage->dbh()->prepare($query);
406 my $vcf_genotype_type_id = $h->fetchrow_array();
408 return $vcf_genotype_type_id;