seedlot upload with accession synonyms. seedlot upload works to update existing seedlots
[sgn.git] / lib / SGN / Model / Cvterm.pm
blob150a4b0eae8e02d736f5b5d55206e7e3f1c16679
2 =head1 NAME
4 SGN::Model::Cvterm - a simple model that provides information on cvterms
6 =head1 DESCRIPTION
8 Retrieves cv terms.
10 get_cvterm_object retrieves the term as a CXGN::Chado::Cvterm object.
12 get_cvterm_row retrieves the term as a DBIx::Class row.
14 Both function take a schema object, cvterm name and a cv name as an argument.
16 If a term is not in the database, undef is returned.
18 =head1 AUTHOR
20 Lukas Mueller
22 =cut
24 package SGN::Model::Cvterm;
26 use CXGN::Chado::Cvterm;
27 use Data::Dumper;
29 sub get_cvterm_object {
30 my $self = shift;
31 my $schema = shift;
32 my $cvterm_name = shift;
33 my $cv_name = shift;
35 my $cv = $schema->resultset('Cv::Cv')->find( { name => $cv_name });
37 if (! $cv) {
38 print STDERR "CV $cv_name not found. Ignoring.";
39 return undef;
41 my $term = CXGN::Chado::Cvterm->new_with_term_name(
42 $self->dbc()->dbh(),
43 $cvterm_name,
44 $cv->cv_id()
47 return $term;
50 sub get_cvterm_row {
51 my $self = shift;
52 my $schema = shift;
53 my $name = shift;
54 my $cv_name = shift;
56 my $cvterm = $schema->resultset('Cv::Cvterm')->find(
58 'me.name' => $name,
59 'cv.name' => $cv_name,
60 }, { join => 'cv' });
62 return $cvterm;
65 sub get_cvterm_row_from_trait_name {
66 my $self = shift;
67 my $schema = shift;
68 my $trait_name = shift;
70 #print STDERR $trait_name;
72 #fieldbook trait string should be "$trait_name|$dbname:$trait_accession" e.g. plant height|CO_334:0000123. substring on last occurance of |
73 my $delim = "|";
74 my $full_accession = substr $trait_name, rindex( $trait_name, $delim ) + length($delim);
75 my $full_accession_length = length($full_accession) + length($delim);
76 my $full_cvterm_name = substr($trait_name, 0, -$full_accession_length);
77 my ( $db_name , $accession ) = split (/:/ , $full_accession);
79 #check if the trait name string does have
80 $accession =~ s/\s+$//;
81 $accession =~ s/^\s+//;
82 $db_name =~ s/\s+$//;
83 $db_name =~ s/^\s+//;
85 my $db_rs = $schema->resultset("General::Db")->search( { 'me.name' => $db_name });
86 my $trait_cvterm;
87 if ($db_rs->first()){
88 $trait_cvterm = $schema->resultset("Cv::Cvterm")
89 ->find({
90 'dbxref.db_id' => $db_rs->first()->db_id(),
91 'dbxref.accession' => $accession
94 'join' => 'dbxref'
98 return $trait_cvterm;
101 sub get_trait_from_exact_components {
102 my $self= shift;
103 my $schema = shift;
104 my $component_cvterm_ids = shift;
106 my @intersect_selects;
107 foreach my $cvterm_id (@$component_cvterm_ids){
108 push @intersect_selects, "SELECT object_id FROM cvterm_relationship WHERE subject_id = $cvterm_id";
110 push @intersect_selects, "SELECT object_id FROM cvterm_relationship GROUP BY 1 HAVING count(object_id) = ".scalar(@$component_cvterm_ids);
111 my $intersect_sql = join ' INTERSECT ', @intersect_selects;
112 my $h = $schema->storage->dbh->prepare($intersect_sql);
113 $h->execute();
114 my @trait_cvterm_ids;
115 while(my ($trait_cvterm_id) = $h->fetchrow_array()){
116 push @trait_cvterm_ids, $trait_cvterm_id;
118 if (scalar(@trait_cvterm_ids) > 1){
119 die "More than one composed trait returned for the given set of exact componenets\n";
121 return $trait_cvterm_ids[0];
124 sub get_trait_from_cvterm_id {
125 my $schema = shift;
126 my $cvterm_id = shift;
127 my $format = shift; #can be 'concise' for just the name or 'extended' for name|DB:0000001
128 if ($format eq 'concise'){
129 $q = "SELECT name FROM cvterm WHERE cvterm_id=?;";
131 if ($format eq 'extended'){
132 $q = "SELECT (((cvterm.name::text || '|'::text) || db.name::text) || ':'::text) || dbxref.accession::text FROM cvterm JOIN dbxref USING(dbxref_id) JOIN db USING(db_id) WHERE cvterm_id=?;";
134 my $h = $schema->storage->dbh->prepare($q);
135 $h->execute($cvterm_id);
136 $name = $h->fetchrow();
137 return $name;
140 sub _concatenate_cvterm_array {
141 my $schema = shift;
142 my $delimiter = shift;
143 my $format = shift;
144 my $first = shift;
145 my $second = shift;
146 #print STDERR "_concatenate_cvterm_array\n";
147 #print STDERR Dumper $first;
148 #print STDERR Dumper $second;
149 my %first_hash = %$first;
150 foreach my $f (keys %first_hash){
151 my $ids = $first_hash{$f};
152 foreach my $s (@$second){
153 my @component_ids = @$ids;
154 #print STDERR "_iterate\n";
155 my $name = get_trait_from_cvterm_id($schema, $s, $format);
156 my $concatenated_cvterm = $f.$delimiter.$name;
157 push @component_ids, $s;
158 delete $first_hash{$f};
159 $first_hash{$concatenated_cvterm} = \@component_ids;
160 #print STDERR Dumper \%first_hash;
163 return \%first_hash;
165 sub get_traits_from_component_categories {
166 my $self= shift;
167 my $schema = shift;
168 my $allowed_composed_cvs = shift;
169 my $composable_cvterm_delimiter = shift;
170 my $composable_cvterm_format = shift;
171 my $cvterm_id_hash = shift;
172 my %id_hash = %$cvterm_id_hash;
173 delete @id_hash{ grep { scalar @{$id_hash{$_}} < 1 } keys %id_hash }; #remove cvtypes with no ids
175 my @ordered_id_groups;
176 foreach my $cv_name (@$allowed_composed_cvs){
177 push @ordered_id_groups, $id_hash{$cv_name};
180 my $id_array_count = scalar(@ordered_id_groups);
181 my $concatenated_cvterms;
182 foreach (@{$ordered_id_groups[0]}){
183 my $name = get_trait_from_cvterm_id($schema, $_, $composable_cvterm_format);
184 $concatenated_cvterms->{$name} = [$_];
186 for my $n (0 .. $id_array_count-2){
187 $concatenated_cvterms = _concatenate_cvterm_array($schema, $composable_cvterm_delimiter, $composable_cvterm_format, $concatenated_cvterms, $ordered_id_groups[$n+1]);
190 #print STDERR "possible traits are: ".Dumper($concatenated_cvterms)."\n";
192 my @existing_traits;
193 my @new_traits;
194 foreach my $key (sort keys %$concatenated_cvterms){
195 #my $existing_cvterm_name = $schema->resultset('Cv::Cvterm')->find({ name=>$key });
196 #if ($existing_cvterm_name){
197 #push @existing_traits, [$existing_cvterm_name->cvterm_id(), $key];
198 #next;
200 my $existing_cvterm_id = $self->get_trait_from_exact_components($schema, $concatenated_cvterms->{$key});
201 if ($existing_cvterm_id){
202 my $existing_name = get_trait_from_cvterm_id($schema, $existing_cvterm_id, 'extended');
203 push @existing_traits, [$existing_cvterm_id, $existing_name];
204 next;
206 push @new_traits, [ $concatenated_cvterms->{$key}, $key ];
209 #print STDERR "existing traits are: ".Dumper(/@existing_traits)." and new traits are".Dumper(/@new_traits)."\n";
211 return {
212 existing_traits => \@existing_traits,
213 new_traits => \@new_traits
217 sub get_traits_from_components {
218 my $self= shift;
219 my $schema = shift;
220 my $component_cvterm_ids = shift;
221 my @component_cvterm_ids = @$component_cvterm_ids;
223 my $contains_cvterm_id = $self->get_cvterm_row($schema, 'contains', 'relationship')->cvterm_id();
225 my $q = "SELECT object_id FROM cvterm_relationship WHERE type_id = ? AND subject_id IN (@{[join',', ('?') x @component_cvterm_ids]}) GROUP BY 1";
227 my $h = $schema->storage->dbh->prepare($q);
228 $h->execute($contains_cvterm_id, @component_cvterm_ids);
229 my @trait_cvterm_ids;
230 while(my ($trait_cvterm_id) = $h->fetchrow_array()){
231 push @trait_cvterm_ids, $trait_cvterm_id;
233 return \@trait_cvterm_ids;
236 sub get_components_from_trait {
237 my $self= shift;
238 my $schema = shift;
239 my $trait_cvterm_id = shift;
241 my $contains_cvterm_id = $self->get_cvterm_row($schema, 'contains', 'relationship')->cvterm_id();
242 my $q = "SELECT subject_id FROM cvterm_relationship WHERE object_id = $trait_cvterm_id and type_id = $contains_cvterm_id;";
243 my $h = $schema->storage->dbh->prepare($q);
244 $h->execute();
245 my @component_cvterm_ids;
246 while(my ($component_cvterm_id) = $h->fetchrow_array()){
247 push @component_cvterm_ids, $component_cvterm_id;
249 return \@component_cvterm_ids;