Merge pull request #5248 from solgenomics/topic/batch_update_trials
[sgn.git] / lib / SGN / Controller / solGS / modelAccuracy.pm
blob2a274cd61063f295dc7bfa47dbf1f1056c4bd9af
1 package SGN::Controller::solGS::modelAccuracy;
4 use Moose;
5 use namespace::autoclean;
8 use Carp qw/ carp confess croak /;
9 use File::Slurp qw /write_file read_file/;
10 use JSON;
11 use Math::Round::Var;
12 use Statistics::Descriptive;
15 BEGIN { extends 'Catalyst::Controller' }
17 sub download_validation :Path('/solgs/download/model/validation') Args() {
18 my ($self, $c) = @_;
20 my $args = $c->req->param('arguments');
21 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
23 my $validation_file = $self->download_validation_file($c);
25 my $ret = {'validation_file' => $validation_file};
26 $ret = to_json($ret);
27 $c->res->content_type('application/json');
28 $c->res->body($ret);
33 sub get_model_accuracy_value {
34 my ($self, $c, $model_id, $trait_abbr) = @_;
36 my $cv_stat = $self->cross_validation_stat($c, $model_id, $trait_abbr);
38 my ($accuracy) = grep{ $_->[0] eq 'Mean accuracy'} @$cv_stat;
40 $c->stash->{accuracy_value} = $accuracy->[1];
45 sub get_cross_validations {
46 my ($self, $c, $model_id, $trait_abbr) = @_;
48 $c->stash->{training_pop_id} = $model_id;
49 $c->stash->{trait_abbr} = $trait_abbr;
51 $c->controller('solGS::Files')->validation_file($c);
52 my $file = $c->stash->{validation_file};
54 my $cvs = $c->controller('solGS::Utils')->read_file_data($file);
55 my @raw_cvs = grep { $_->[0] =~ /CV Fold/i } @$cvs;
57 return \@raw_cvs;
61 sub model_accuracy_report {
62 my ($self, $c) = @_;
63 my $file = $c->stash->{validation_file};
65 my $accuracy;
66 if (!-e $file)
68 $accuracy = [["Validation file doesn't exist.", "None"]];
70 elsif (!-s $file)
72 $accuracy = [["There is no cross-validation output report.", "None"]];
74 else
76 my $model_id = $c->stash->{training_pop_id};
77 my $trait_abbr = $c->stash->{trait_abbr};
79 $c->stash->{accuracy_report} = $self->cross_validation_stat($c, $model_id, $trait_abbr);
84 sub append_val_summary_stat {
85 my ($self, $c, $val_file) = @_;
87 my $model_id = $c->stash->{training_pop_id};
88 my $trait_abbr = $c->stash->{trait_abbr};
89 my $summary_stat = $self->cross_validation_stat($c, $model_id, $trait_abbr);
90 my $summary_txt = "\n----summary statistics----\n";
91 $summary_stat = join("\n", map { $_->[0] . "\t" . $_->[1] } @$summary_stat);
92 $summary_txt .= $summary_stat;
94 my $val_txt = read_file($val_file, {binmode=>'utf8'});
95 if ($val_txt !~ /summary statistics/) {
96 write_file($val_file,
97 {append => 1, binmode => 'utf8'},
98 $summary_txt
103 sub cross_validation_stat {
104 my ($self, $c, $model_id, $trait_abbr) = @_;
106 my $cv_data = $self->get_cross_validations($c, $model_id, $trait_abbr);
108 my @data = map {$_->[1] =~ s/\s+//r } @$cv_data;
110 my $stat = Statistics::Descriptive::Full->new();
111 $stat->add_data(@data);
113 my $min = $stat->min;
114 my $max = $stat->max;
115 my $mean = $stat->mean;
116 my $med = $stat->median;
117 my $std = $stat->standard_deviation;
118 my $cv = ($std / $mean) * 100;
119 my $cnt = scalar(@data);
121 my $round = Math::Round::Var->new(0.01);
122 $std = $round->round($std);
123 $mean = $round->round($mean);
124 $cv = $round->round($cv);
126 $cv = $cv . '%';
128 my @desc_stat = (
129 ['No. of K-folds', 10],
130 ['Replications', 2],
131 ['Total cross-validation runs', $cnt],
132 ['Minimum accuracy', $min],
133 ['Maximum accuracy', $max],
134 ['Standard deviation', $std],
135 ['Coefficient of variation', $cv],
136 ['Median accuracy', $med],
137 ['Mean accuracy', $mean]
140 return \@desc_stat;
145 sub create_model_summary {
146 my ($self, $c, $model_id, $trait_id) = @_;
148 my $protocol_id = $c->stash->{genotyping_protocol_id};
150 $c->controller('solGS::Trait')->get_trait_details($c, $trait_id);
151 my $tr_abbr = $c->stash->{trait_abbr};
153 my $path = $c->req->path;
155 my $data_set_type;
156 if ($path =~ /solgs\/traits\/all\/population\//)
159 $data_set_type = 'single_population';
161 elsif ($path =~ /solgs\/models\/combined\/trials\//)
163 $data_set_type = 'combined_populations';
166 my $args = {
167 'trait_id' => $trait_id,
168 'training_pop_id' => $model_id,
169 'genotyping_protocol_id' => $protocol_id,
170 'data_set_type' => $data_set_type
173 my $model_page = $c->controller('solGS::Path')->model_page_url($args);
174 my $trait_page = qq | <a href="$model_page" onclick="solGS.waitPage()">$tr_abbr</a>|;
176 $self->get_model_accuracy_value($c, $model_id, $tr_abbr);
177 my $accuracy_value = $c->stash->{accuracy_value};
179 my $heritability = $c->controller("solGS::gebvPhenoRegression")->get_heritability($c, $model_id, $trait_id);
180 my $additive_variance = $c->controller("solGS::gebvPhenoRegression")->get_additive_variance($c, $model_id, $trait_id);
182 my $model_summary = [$trait_page, $accuracy_value, $additive_variance, $heritability];
184 $c->stash->{model_summary} = $model_summary;
188 sub download_validation_file {
189 my ($self, $c) = @_;
191 $c->controller('solGS::Trait')->get_trait_details($c, $c->stash->{trait_id});
193 my $file = $c->controller('solGS::Files')->validation_file($c);
194 $self->append_val_summary_stat($c, $file);
195 $file = $c->controller('solGS::Files')->copy_to_tempfiles_subdir( $c, $file, 'solgs' );
197 return $file;
201 sub begin : Private {
202 my ($self, $c) = @_;
204 $c->controller('solGS::Files')->get_solgs_dirs($c);
209 ####