add is_variable accessor.
[sgn.git] / lib / CXGN / Genotype.pm
blob905a0e746d90394beda8b9e53bc3c58a8d7f1d9b
2 package CXGN::Genotype;
4 use Moose;
6 use JSON::Any;
7 use Math::Round qw | :all |;
9 has 'genotypeprop_id' => ( isa => 'Int',
10 is => 'rw',
13 has 'bcs_schema' => (isa => 'Ref',
14 is => 'rw',
17 has 'marker_encoding' => (isa => 'Str',
18 is => 'rw',
19 default => 'DS',
22 has 'id' => ( isa => 'Int',
23 is => 'rw',
26 has 'name' => ( isa => 'Str',
27 is => 'rw',
30 has 'method' => (isa => 'Str',
31 is => 'rw',
34 has 'markerscores' => ( isa => 'HashRef',
35 is => 'rw',
38 has 'rawscores' => (isa => 'HashRef',
39 is => 'rw',
42 has 'markers' => (isa => 'ArrayRef',
43 is => 'rw',
46 #has 'dosages' => (isa => 'HashRef',
47 # is => 'rw',
48 # );
52 sub BUILD {
53 my $self = shift;
55 my $genotypeprop_id = $self->genotypeprop_id();
56 my $bcs_schema = $self->bcs_schema();
58 if ( defined($genotypeprop_id) && defined($bcs_schema)) {
59 print STDERR "Creating CXGN::Genotype object from genotypeprop_id ".$self->genotypeprop_id()." and the schema object\n";
61 my $row = $self->bcs_schema()->resultset("Genetic::Genotypeprop")->find( { genotypeprop_id=> $self->genotypeprop_id() });
63 if ($row) {
64 $self->from_json($row->value());
66 else {
67 die "The CXGN::Genotype object could not be created with the genotypeprop_id ".$self->genotypeprop_id()." and the provided schema.";
69 print STDERR "Done!\n";
73 =head2 from_json
75 Usage: $gt->from_json($json_string)
76 Desc: Use json string to populate the object. Format is identical to
77 jsonb storage format in the database.
78 Ret:
79 Args:
80 Side Effects:
81 Example:
83 =cut
85 sub from_json {
86 my $self = shift;
87 my $json = shift;
89 #print STDERR "JSON = $json\n";
91 my $data = JSON::Any->decode($json);
93 $self->markerscores($data);
95 my @markers = keys(%{$data});
96 $self->markers( \@markers );
100 =head2 to_json
102 Usage: my $json = $gt->to_json()
103 Desc: convert the data in this object to json notation
104 Ret:
105 Args:
106 Side Effects:
107 Example:
109 =cut
112 sub to_json {
113 my $self = shift;
115 my $json = JSON::Any->encode($self->markerscores());
117 return $json;
121 =head2 calculate_consensus_scores
123 Usage: my $score = $gt->calculate_consensus_scores($other_gt);
124 Desc: calculate a consensus score with another genotype
125 returns a hashref containing markers with consensi.
126 Ret:
127 Args:
128 Side Effects:
129 Example:
131 =cut
133 sub calculate_consensus_scores {
134 my $self = shift;
135 my $other_genotype = shift;
137 my $other_scores = $other_genotype->markerscores();
138 my $this_scores = $self->markerscores();
139 my $consensus_scores = {};
140 foreach my $m (@{$self->markers()}) {
141 if (! exists($other_scores->{$m}) ||
142 ! defined($other_scores->{$m}) ||
143 ! $other_scores->{$m} ||
144 $other_scores->{$m} eq "NA") {
145 $consensus_scores->{$m} = $this_scores->{$m};
148 return $consensus_scores;
151 =head2 calculate_distance
153 Usage: my $distance = $gt->calculate_distance($another_genotype)
154 Desc: Calculate the distance to another genotype
155 Ret: A value between 0 and 1, 0 being infinite distance, 1 being
156 identical genotypes.
157 Args:
158 Side Effects:
159 Example:
161 =cut
163 sub calculate_distance {
164 my $self = shift;
165 my $other_genotype = shift;
167 my $total_matches = 0;
168 my $total_mismatches = 0;
169 my $other_genotype_score = $other_genotype->markerscores();
170 my $this_genotype_score = $self->markerscores();
172 foreach my $m (@{$self->markers()}) {
173 if ($self->good_score($other_genotype_score->{$m}->{$self->marker_encoding()}) && $self->good_score($this_genotype_score->{$m}->{$self->marker_encoding() })) {
174 if ($self->scores_are_equal($other_genotype_score->{$m}->{$self->marker_encoding()}, $this_genotype_score->{$m}->{$self->marker_encoding()})) {
175 $total_matches++;
176 #print STDERR "$m: $other_genotype_score->{$m} matches $this_genotype_score->{$m}\n";
178 else {
179 $total_mismatches++;
180 #print STDERR "$m: $other_genotype_score->{$m} no match with $this_genotype_score->{$m}\n";
184 else { #print STDERR "$m has no valid scores\n";
187 return $total_matches / ($total_matches + $total_mismatches);
190 sub read_counts {
191 my $self = shift;
192 my $marker = shift;
194 my $raw = $self->rawscores->{$marker};
195 #print STDERR "RAW: $raw\n";
196 my $counts = (split /\:/, $raw)[1];
198 my ($c1, $c2) = split /\,/, $counts;
200 return ($c1, $c2);
204 sub good_call {
205 my $self = shift;
206 my $marker = shift;
207 my ($c1, $c2) = $self->read_counts($marker);
208 if ( ($c1 + $c2) < 2) {
209 return 0;
211 return 1;
214 =head2 percent_good_calls
216 Usage: my $good_calls = $gt->percent_good_calls();
217 Desc: The number of good calls in this genotype
218 Good call is defined as a numeric value
219 Bad calls are undefined or alphanumeric values
220 (so works only with dosage values for now).
221 Ret:
222 Args:
223 Side Effects:
224 Example:
226 =cut
228 sub percent_good_calls {
229 my $self = shift;
231 my $good_calls = 0;
232 foreach my $m (@{$self->markers()}) {
233 if ($self->good_call($m)) {
234 $good_calls++;
237 return $good_calls / scalar(@{$self->markers()});
240 sub good_score {
241 my $self = shift;
242 my $score = shift;
244 if (!defined($score)) { return 0; }
246 if ($score =~ /0|1\/0|1/) { return 1; }
248 if ($score =~ /^[A-Za-z?]+$/) { return 0; }
250 $score = round($score);
252 if ($score == 0 || $score == 1 || $score ==2 || $score == -1) {
253 return 1;
255 else {
256 return 0;
260 sub scores_are_equal {
261 my $self = shift;
262 my $score1 = shift;
263 my $score2 = shift;
265 if ($self->good_score($score1)
266 && $self->good_score($score2)) {
267 if (round($score1) == round($score2)) {
268 return 1;
271 return 0;
274 =head2 check_parental _genotype_concordance
276 Usage: $concordance = $gt->
277 check_parental_genotype_concordance($female_parent, $male_parent);
278 Desc: the concordance of the parental genotypes with this genotype
279 Ret: a concordance score, between 0 and 1.
280 0 = zero concordance
281 1 = complete concordance
282 only observations on informative markers are returned
283 Args:
284 Side Effects:
285 Example:
287 =cut
289 sub check_parental_genotype_concordance {
290 my $self = shift;
291 my $female_parent_genotype = shift;
292 my $male_parent_genotype = shift;
294 my $self_markers = $self->markerscores();
295 my $mom_markers = $female_parent_genotype->markerscores();
296 my $dad_markers = $male_parent_genotype->markerscores();
298 my $non_informative =0;
299 my $concordant =0;
300 my $non_concordant =0;
301 foreach my $m (keys %$self_markers) {
303 my @matrix; #mom, dad, self, 1=possible 0=impossible
304 $matrix[ 0 ][ 0 ][ 0 ] =1;
305 $matrix[ 0 ][ 0 ][ 1 ] =0;
306 $matrix[ 0 ][ 0 ][ 2 ] =0;
307 $matrix[ 0 ][ 1 ][ 0 ] =1;
308 $matrix[ 0 ][ 1 ][ 1 ] =1;
309 $matrix[ 0 ][ 1 ][ 2 ] =0;
310 $matrix[ 0 ][ 2 ][ 1 ] =1;
311 $matrix[ 0 ][ 2 ][ 0 ] =1;
312 $matrix[ 0 ][ 2 ][ 2 ] =0;
314 $matrix[ 1 ][ 0 ][ 0 ] =1;
315 $matrix[ 1 ][ 0 ][ 1 ] =1;
316 $matrix[ 1 ][ 0 ][ 2 ] =0;
317 $matrix[ 1 ][ 1 ][ 0 ] =-1;
318 $matrix[ 1 ][ 1 ][ 1 ] =-1;
319 $matrix[ 1 ][ 1 ][ 2 ] =0;
320 $matrix[ 1 ][ 2 ][ 0 ] =0;
321 $matrix[ 1 ][ 2 ][ 1 ] =1;
322 $matrix[ 1 ][ 2 ][ 2 ] =1;
324 $matrix[ 2 ][ 0 ][ 0 ] = 0;
325 $matrix[ 2 ][ 0 ][ 1 ] = 1;
326 $matrix[ 2 ][ 0 ][ 2 ] = 0;
327 $matrix[ 2 ][ 1 ][ 0 ] = 1;
328 $matrix[ 2 ][ 1 ][ 1 ] = 1;
329 $matrix[ 2 ][ 1 ][ 2 ] = 1;
330 $matrix[ 2 ][ 2 ][ 0 ] = 0;
331 $matrix[ 2 ][ 2 ][ 1 ] = 0;
332 $matrix[ 2 ][ 2 ][ 2 ] = 1;
335 if (defined($mom_markers->{$m}) && defined($dad_markers->{$m}) && defined($self_markers->{$m})) {
337 my $score = $matrix[ round($mom_markers->{$m})]->[ round($dad_markers->{$m})]->[ round($self_markers->{$m})];
338 if ($score == 1) {
339 $concordant++;
342 elsif ($score == -1) {
343 $non_informative++;
345 else {
347 $non_concordant++;
352 return ($concordant, $non_concordant, $non_informative);
357 sub calculate_encoding_type {
362 sub delete_genotypes_with_protocol_id {
363 my $class = shift;
364 my $schema = shift;
365 my $nd_protocol_id = shift;
367 my $dbh = $schema->storage()->dbh();
369 print STDERR "Deleteing genotype entries... ";
370 my $q1 = "DELETE FROM genotype WHERE genotype_id in (SELECT genotype_id FROM nd_experiment_protocol join nd_experiment_genotype using(nd_experiment_id) JOIN genotype USING(genotype_id) WHERE nd_protocol_id=?)";
371 my $h1 = $dbh->prepare($q1);
372 $h1->execute($nd_protocol_id);
373 print STDERR "Done.\n";
375 print STDERR "Delete nd_experiment_md_files entries... ";
376 my $q8 = "DELETE FROM phenome.nd_experiment_md_files WHERE nd_experiment_id in (SELECT nd_experiment_id FROM nd_experiment_protocol join nd_experiment using(nd_experiment_id) WHERE nd_protocol_id=?)";
377 my $h8 = $dbh->prepare($q8);
378 $h8->execute($nd_protocol_id);
380 print STDERR "Deleting nd_experiment entries... ";
381 my $q2 = "DELETE FROM nd_experiment WHERE nd_experiment_id in (SELECT nd_experiment_id FROM nd_experiment_protocol join nd_experiment using(nd_experiment_id) WHERE nd_protocol_id=?)";
383 my $h2 = $dbh->prepare($q2);
384 $h2 ->execute($nd_protocol_id);
385 print STDERR "Done.\n";
387 print STDERR "Deleting protocol entry... ";
388 my $q3 = "DELETE FROM nd_protocol WHERE nd_protocol_id=?";
389 my $h3 = $dbh->prepare($q3);
390 $h3->execute($nd_protocol_id);
391 print STDERR "Done.\n";
394 __PACKAGE__->meta->make_immutable;