Merge pull request #5230 from solgenomics/topic/open_pollinated
[sgn.git] / lib / SGN / Model / Cvterm.pm
blobe587cf30af9c2e2724ee1a9b8cdbc2d982d97e9e
2 =head1 NAME
4 SGN::Model::Cvterm - a simple model that provides information on cvterms
6 =head1 DESCRIPTION
8 Retrieves cv terms.
10 get_cvterm_object retrieves the term as a CXGN::Chado::Cvterm object.
12 get_cvterm_row retrieves the term as a DBIx::Class row.
14 Both function take a schema object, cvterm name and a cv name as an argument.
16 If a term is not in the database, undef is returned.
18 =head1 AUTHOR
20 Lukas Mueller
22 =cut
24 package SGN::Model::Cvterm;
26 use CXGN::Chado::Cvterm;
27 use Data::Dumper;
29 sub get_cvterm_object {
30 my $self = shift;
31 my $schema = shift;
32 my $cvterm_name = shift;
33 my $cv_name = shift;
35 my $cv = $schema->resultset('Cv::Cv')->find( { name => $cv_name });
37 if (! $cv) {
38 print STDERR "CV $cv_name not found. Ignoring.";
39 return undef;
41 my $term = CXGN::Chado::Cvterm->new_with_term_name(
42 $self->dbc()->dbh(),
43 $cvterm_name,
44 $cv->cv_id()
47 return $term;
50 sub get_cvterm_row {
51 my $self = shift;
52 my $schema = shift;
53 my $name = shift;
54 my $cv_name = shift;
56 my $cvterm = $schema->resultset('Cv::Cvterm')->find(
58 'me.name' => $name,
59 'cv.name' => $cv_name,
60 }, { join => 'cv' });
62 return $cvterm;
65 sub get_cvterm_row_from_trait_name {
66 my $self = shift;
67 my $schema = shift;
68 my $trait_name = shift;
70 #print STDERR $trait_name;
72 #fieldbook trait string should be "$trait_name|$dbname:$trait_accession" e.g. plant height|CO_334:0000123. substring on last occurance of |
73 my $delim = "|";
74 my $full_accession = substr $trait_name, rindex( $trait_name, $delim ) + length($delim);
75 my $full_accession_length = length($full_accession) + length($delim);
76 my $full_cvterm_name = substr($trait_name, 0, -$full_accession_length);
77 my @db_comps = split (/:/ , $full_accession);
78 my $db_name = shift @db_comps;
79 my $accession = join ':', @db_comps;
81 #check if the trait name string does have
82 $accession =~ s/\s+$//;
83 $accession =~ s/^\s+//;
84 $db_name =~ s/\s+$//;
85 $db_name =~ s/^\s+//;
87 my $db_rs = $schema->resultset("General::Db")->search( { 'me.name' => $db_name });
88 my $trait_cvterm;
89 if ($db_rs->first()){
90 $trait_cvterm = $schema->resultset("Cv::Cvterm")
91 ->find({
92 'dbxref.db_id' => $db_rs->first()->db_id(),
93 'dbxref.accession' => $accession
96 'join' => 'dbxref'
100 return $trait_cvterm;
103 # Checks for an ontology trait that has either the short name or matching full name
104 sub find_trait_by_name {
105 my $self = shift;
106 my $schema = shift;
107 my $name = shift;
109 # Checks the cvterm table for traits that match the short name, long name, or id.
110 my $query = "select cvterm.cvterm_id from
111 cvterm
112 join
113 dbxref using (dbxref_id)
114 join
115 db using (db_id)
116 join
117 cvterm_relationship as rel on rel.subject_id=cvterm.cvterm_id
118 JOIN
119 cvterm as reltype on (rel.type_id = reltype.cvterm_id)
120 where
121 reltype.name = 'VARIABLE_OF'
124 cvterm.name ilike ?
126 (cvterm.name || '|' || db.name || ':' || dbxref.accession) ilike ?
128 my $h = $schema->storage->dbh->prepare($query);
129 $h->execute($name, $name);
130 my $cvterm_id = $h->fetchrow();
132 return $cvterm_id;
135 # Get the ontology trait if there is an id for that trait
136 sub find_trait_by_id {
138 my $self = shift;
139 my $schema = shift;
140 my $cvterm_id = shift;
142 # Checks the cvterm table for traits that match the short name, long name, or id.
143 my $query = "select cvterm.cvterm_id from
144 cvterm
145 join
146 dbxref using (dbxref_id)
147 join
148 db using (db_id)
149 join
150 cvterm_relationship as rel on rel.subject_id=cvterm.cvterm_id
151 JOIN
152 cvterm as reltype on (rel.type_id = reltype.cvterm_id)
153 where
154 reltype.name = 'VARIABLE_OF'
156 cvterm.cvterm_id=?;";
157 my $h = $schema->storage->dbh->prepare($query);
158 $h->execute($cvterm_id);
159 my $cvterm_id = $h->fetchrow();
161 return $cvterm_id;
164 sub get_trait_from_exact_components {
165 my $self= shift;
166 my $schema = shift;
167 my $component_cvterm_ids = shift;
169 my @intersect_selects;
170 foreach my $cvterm_id (@$component_cvterm_ids){
171 push @intersect_selects, "SELECT object_id FROM cvterm_relationship WHERE subject_id = $cvterm_id";
173 push @intersect_selects, "SELECT object_id FROM cvterm_relationship GROUP BY 1 HAVING count(object_id) = ".scalar(@$component_cvterm_ids);
174 my $intersect_sql = join ' INTERSECT ', @intersect_selects;
175 my $h = $schema->storage->dbh->prepare($intersect_sql);
176 $h->execute();
177 my @trait_cvterm_ids;
178 while(my ($trait_cvterm_id) = $h->fetchrow_array()){
179 push @trait_cvterm_ids, $trait_cvterm_id;
181 if (scalar(@trait_cvterm_ids) > 1){
182 die "More than one composed trait returned for the given set of exact componenets\n";
184 return $trait_cvterm_ids[0];
187 sub get_trait_from_cvterm_id {
188 my $schema = shift;
189 my $cvterm_id = shift;
190 my $format = shift; #can be 'concise' for just the name or 'extended' for name|DB:0000001
191 if ($format eq 'concise'){
192 $q = "SELECT name FROM cvterm WHERE cvterm_id=?;";
194 if ($format eq 'extended'){
195 $q = "SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text FROM cvterm JOIN dbxref USING(dbxref_id) JOIN db USING(db_id) WHERE cvterm_id=?;";
197 my $h = $schema->storage->dbh->prepare($q);
198 $h->execute($cvterm_id);
199 $name = $h->fetchrow();
200 return $name;
203 sub _concatenate_cvterm_array {
204 my $schema = shift;
205 my $delimiter = shift;
206 my $format = shift;
207 my $first = shift;
208 my $second = shift;
209 #print STDERR "_concatenate_cvterm_array\n";
210 #print STDERR Dumper $first;
211 #print STDERR Dumper $second;
212 my %first_hash = %$first;
213 foreach my $f (keys %first_hash){
214 my $ids = $first_hash{$f};
215 foreach my $s (@$second){
216 my @component_ids = @$ids;
217 #print STDERR "_iterate\n";
218 my $name = get_trait_from_cvterm_id($schema, $s, $format);
219 my $concatenated_cvterm = $f.$delimiter.$name;
220 push @component_ids, $s;
221 delete $first_hash{$f};
222 $first_hash{$concatenated_cvterm} = \@component_ids;
223 #print STDERR Dumper \%first_hash;
226 return \%first_hash;
228 sub get_traits_from_component_categories {
229 my $self= shift;
230 my $schema = shift;
231 my $allowed_composed_cvs = shift;
232 my $composable_cvterm_delimiter = shift;
233 my $composable_cvterm_format = shift;
234 my $cvterm_id_hash = shift;
235 #print STDERR Dumper $cvterm_id_hash;
236 #print STDERR Dumper $allowed_composed_cvs;
237 #print STDERR Dumper $composable_cvterm_format;
239 my %id_hash = %$cvterm_id_hash;
240 delete @id_hash{ grep { scalar @{$id_hash{$_}} < 1 } keys %id_hash }; #remove cvtypes with no ids
242 my @ordered_id_groups;
243 foreach my $cv_name (@$allowed_composed_cvs){
244 push @ordered_id_groups, $id_hash{$cv_name};
247 my $id_array_count = scalar(@ordered_id_groups);
248 my $concatenated_cvterms;
249 foreach (@{$ordered_id_groups[0]}){
250 my $name = get_trait_from_cvterm_id($schema, $_, $composable_cvterm_format);
251 $concatenated_cvterms->{$name} = [$_];
253 for my $n (0 .. $id_array_count-2){
254 $concatenated_cvterms = _concatenate_cvterm_array($schema, $composable_cvterm_delimiter, $composable_cvterm_format, $concatenated_cvterms, $ordered_id_groups[$n+1]);
257 #print STDERR "possible traits are: ".Dumper($concatenated_cvterms)."\n";
259 my @existing_traits;
260 my @new_traits;
261 foreach my $key (sort keys %$concatenated_cvterms){
262 #my $existing_cvterm_name = $schema->resultset('Cv::Cvterm')->find({ name=>$key });
263 #if ($existing_cvterm_name){
264 #push @existing_traits, [$existing_cvterm_name->cvterm_id(), $key];
265 #next;
267 my $existing_cvterm_id = $self->get_trait_from_exact_components($schema, $concatenated_cvterms->{$key});
268 if ($existing_cvterm_id){
269 my $existing_name = get_trait_from_cvterm_id($schema, $existing_cvterm_id, 'extended');
270 push @existing_traits, [$existing_cvterm_id, $existing_name];
271 next;
273 push @new_traits, [ $concatenated_cvterms->{$key}, $key ];
276 #print STDERR "existing traits are: ".Dumper(/@existing_traits)." and new traits are".Dumper(/@new_traits)."\n";
278 return {
279 existing_traits => \@existing_traits,
280 new_traits => \@new_traits
284 sub get_traits_from_components {
285 my $self= shift;
286 my $schema = shift;
287 my $component_cvterm_ids = shift;
288 my @component_cvterm_ids = @$component_cvterm_ids;
290 my $contains_cvterm_id = $self->get_cvterm_row($schema, 'contains', 'relationship')->cvterm_id();
292 my $q = "SELECT object_id FROM cvterm_relationship WHERE type_id = ? AND subject_id IN (@{[join',', ('?') x @component_cvterm_ids]}) GROUP BY 1";
294 my $h = $schema->storage->dbh->prepare($q);
295 $h->execute($contains_cvterm_id, @component_cvterm_ids);
296 my @trait_cvterm_ids;
297 while(my ($trait_cvterm_id) = $h->fetchrow_array()){
298 push @trait_cvterm_ids, $trait_cvterm_id;
300 return \@trait_cvterm_ids;
303 sub get_components_from_trait {
304 my $self= shift;
305 my $schema = shift;
306 my $trait_cvterm_id = shift;
308 my $contains_cvterm_id = $self->get_cvterm_row($schema, 'contains', 'relationship')->cvterm_id();
309 my $q = "SELECT subject_id FROM cvterm_relationship WHERE object_id = $trait_cvterm_id and type_id = $contains_cvterm_id;";
310 my $h = $schema->storage->dbh->prepare($q);
311 $h->execute();
312 my @component_cvterm_ids;
313 while(my ($component_cvterm_id) = $h->fetchrow_array()){
314 push @component_cvterm_ids, $component_cvterm_id;
316 return \@component_cvterm_ids;
320 =head2 get_cv_names_from_db_name
322 Usage:
323 Desc:
324 Ret:
325 Args:
326 Side Effects:
327 Example:
329 =cut
331 sub get_cv_names_from_db_name {
332 my $self = shift;
333 my $schema = shift;
334 my $db_name = shift;
337 my $q = "select distinct(cv.name) from db join dbxref using(db_id) join cvterm using(dbxref_id) join cv using(cv_id) where db.name=?";
338 my $h = $schema->storage->dbh()->prepare($q);
339 $h->execute($db_name);
341 my @cv_names;
343 while (my $cvn = $h->fetchrow_array()) {
344 push @cv_names, $cvn;
347 return @cv_names;
351 =head2 get_vcf_genotyping_cvterm_id
353 Usage: my $cvterm_id = SGN::Model::Cvterm->get_vcf_genotyping_cvterm_id($schema, {'protocol_id' => $protocol_id});
354 Desc: A vcf genotyping file, associated with a genotyping protocol, used to load to the db could be SNP or PHG data. This method queries for the id of the cvterm describing the genotype data type of the vcf file for a genotyping protocol using a protocol id or genotype id. Querying using the genotype id is useful when interested on a single accession genotype data from a genotyping protocol.
355 Ret: the cvterm id for the vcf (snp or phg) genotyping cvterm
356 Args: bcs schema object, and a hashref of either {'protocol_id' => $protocol_id} or {'genotype_id' => $genotype_id}
357 Side Effects:
358 Example:
360 =cut
361 sub get_vcf_genotyping_cvterm_id {
362 my $self = shift;
363 my $schema = shift;
364 my $search_param = shift;
365 my $protocol_id = $search_param->{protocol_id};
366 my $genotype_id = $search_param->{genotype_id};
368 my $query;
369 my $id;
371 if ($protocol_id) {
372 $query = "SELECT genotypeprop.type_id
373 FROM stock
374 JOIN nd_experiment_stock ON (stock.stock_id=nd_experiment_stock.stock_id)
375 JOIN nd_experiment USING (nd_experiment_id)
376 JOIN nd_experiment_protocol USING (nd_experiment_id)
377 JOIN nd_experiment_project USING(nd_experiment_id)
378 JOIN nd_experiment_genotype USING (nd_experiment_id)
379 JOIN nd_protocol USING (nd_protocol_id)
380 LEFT JOIN nd_protocolprop ON (nd_protocolprop.nd_protocol_id = nd_protocol.nd_protocol_id)
381 JOIN genotype USING (genotype_id)
382 LEFT JOIN genotypeprop USING (genotype_id)
383 LEFT JOIN cvterm ON genotypeprop.type_id = cvterm.cvterm_id
384 WHERE nd_protocol.nd_protocol_id = ?
385 AND cvterm.name ~ 'vcf_\\w+_genotyping'
386 LIMIT 1";
388 $id = $protocol_id;
390 } elsif ($genotype_id) {
391 $query = "SELECT genotypeprop.type_id
392 FROM genotypeprop
393 LEFT JOIN cvterm ON genotypeprop.type_id = cvterm.cvterm_id
394 WHERE genotype_id = ?
395 AND cvterm.name ~ 'vcf_\\w+_genotyping'
396 LIMIT 1";
398 $id = $genotype_id;
399 } else {
400 die "\nSearch for vcf genotyping cvterm id requires either protocol id or genotype id.";
403 my $h = $schema->storage->dbh()->prepare($query);
404 $h->execute($id);
406 my $vcf_genotype_type_id = $h->fetchrow_array();
408 return $vcf_genotype_type_id;