Merge pull request #5191 from solgenomics/topic/quality_control
[sgn.git] / lib / SGN / Controller / AJAX / QualityControl.pm
blob51d2024d8bd212085f728c335f135a67bf3566fd
2 package SGN::Controller::AJAX::QualityControl;
4 use Moose;
6 use Data::Dumper;
7 use File::Slurp;
8 use File::Spec qw | catfile |;
9 use File::Path qw(rmtree);
10 use JSON::Any;
11 use File::Basename qw | basename |;
12 use DateTime;
13 use Bio::Chado::Schema;
14 use CXGN::Dataset::File;
15 use CXGN::Phenotypes::File;
16 use CXGN::MixedModels;
17 use SGN::Controller::AJAX::Dataset;
18 use JSON;
21 BEGIN { extends 'Catalyst::Controller::REST' };
23 __PACKAGE__->config(
24 default => 'application/json',
25 stash_key => 'rest',
26 map => { 'application/json' => 'JSON' },
30 sub prepare: Path('/ajax/qualitycontrol/prepare') Args(0) {
31 my $self = shift;
32 my $c = shift;
33 my $dataset_id = $c->req->param('dataset_id');
35 if (! $c->user()) {
36 $c->stash->{rest} = {error=>'You must be logged in first!'};
37 $c->detach;
40 $c->tempfiles_subdir("qualitycontrol");
42 my ($fh, $tempfile) = $c->tempfile(TEMPLATE=>"qualitycontrol/qc_XXXXX");
44 my $people_schema = $c->dbic_schema("CXGN::People::Schema");
45 my $schema = $c->dbic_schema("Bio::Chado::Schema", "sgn_chado");
46 my $temppath = $c->config->{basepath}."/".$tempfile;
48 my $ds = CXGN::Dataset::File->new(people_schema => $people_schema, schema => $schema, sp_dataset_id => $dataset_id, exclude_dataset_outliers => 1, file_name => $temppath, quotes => 0);
49 $ds->retrieve_phenotypes();
50 my $pf = CXGN::Phenotypes::File->new( { file => $temppath."_phenotype.txt" });
52 # my @traits_select = ();
53 my $traits = $pf->traits();
55 my $trait_options = "trait_options";
56 my $trait_html ="";
58 foreach my $trait (@$traits) {
59 if ($trait =~ m/.+\d{7}/){
60 $trait_html .= '<input type="checkbox" class= "trait_box" name="'.$trait_options.'" value="'.$trait.'">'.$trait.'</input> </br>';
65 $c->stash->{rest} = {
66 selected_variable => $trait_html,
67 tempfile => $tempfile."_phenotype.txt",
72 sub extract_trait_data :Path('/ajax/qualitycontrol/grabdata') Args(0) {
73 my $self = shift;
74 my $c = shift;
75 my $dbh = $c->dbc->dbh();
76 my $schema = $c->dbic_schema("Bio::Chado::Schema");
78 my $file = $c->req->param("file");
79 my $trait = $c->req->param("trait");
81 $file = basename($file);
83 my $temppath = File::Spec->catfile($c->config->{basepath}, "static/documents/tempfiles/qualitycontrol/".$file);
84 my $F;
85 if (! open($F, "<", $temppath)) {
86 $c->stash->{rest} = { error => "Can't find data." };
87 return;
90 my $header = <$F>;
91 chomp($header);
93 my @keys = split("\t", $header);
95 my @data = ();
97 while (<$F>) {
98 chomp;
99 my @fields = split "\t";
100 my %line = ();
102 for (my $n = 0; $n < @keys; $n++) {
103 if (exists($fields[$n]) && defined($fields[$n])) {
104 $line{$keys[$n]} = $fields[$n];
107 if (defined $line{$trait} && $line{$trait} ne '') {
108 push @data, \%line;
112 my %unique_names;
113 foreach my $entry (@data) {
114 if (defined $entry->{'studyName'} && $entry->{'studyName'} ne '') {
115 $unique_names{$entry->{'studyName'}} = 1;
119 # Format the unique project names for the SQL query
121 $trait =~ s/\|.*//;
122 my $trait_like = $trait . '%';
124 my $project_names = join(", ", map { "'$_'" } keys %unique_names);
126 my $trait_sql = qq{
127 select project."name" from projectprop
128 join project on project.project_id = projectprop.project_id
129 where projectprop.type_id = (select cvterm_id from cvterm where cvterm."name" = 'validated_phenotype')
130 and project.name in ($project_names)
131 and projectprop.value like '$trait_like'
132 group by project."name";
135 my @validated_projects;
136 eval {
137 # Prepare and execute the query
138 my $sth_trait = $dbh->prepare($trait_sql);
139 $sth_trait->execute();
141 # Collect project names from the query result
142 while (my ($project_name) = $sth_trait->fetchrow_array) {
143 push @validated_projects, $project_name;
147 #print STDERR Dumper \@validated_projects;
148 my $list_projects = join(", ", @validated_projects);
150 if (scalar(@validated_projects) > 0) {
151 my $project_names_str = join(", ", @validated_projects);
152 my $message = "Trait $trait is already validated data for trials: $list_projects";
153 $c->stash->{rest} = { message => $message };
154 } else {
155 $c->stash->{rest} = { data => \@data, trait => $trait};
159 if ($@) {
160 $c->response->body("Failed to search data: $@");
161 return;
165 sub data_restore :Path('/ajax/qualitycontrol/datarestore') Args(0) {
166 my $self = shift;
167 my $c = shift;
168 my $dbh = $c->dbc->dbh();
169 my $schema = $c->dbic_schema("Bio::Chado::Schema");
171 my $file = $c->req->param("file");
172 my $trait = $c->req->param("trait");
174 $file = basename($file);
176 my $temppath = File::Spec->catfile($c->config->{basepath}, "static/documents/tempfiles/qualitycontrol/".$file);
178 my $F;
179 if (! open($F, "<", $temppath)) {
180 $c->stash->{rest} = { error => "Can't find data." };
181 return;
184 my $header = <$F>;
185 chomp($header);
187 my @keys = split("\t", $header);
189 my @data = ();
191 while (<$F>) {
192 chomp;
193 my @fields = split "\t";
194 my %line = ();
196 for (my $n = 0; $n < @keys; $n++) {
197 if (exists($fields[$n]) && defined($fields[$n])) {
198 $line{$keys[$n]} = $fields[$n];
201 if (defined $line{$trait} && $line{$trait} ne '') {
202 push @data, \%line;
206 my %unique_names;
207 foreach my $entry (@data) {
208 if (defined $entry->{'studyName'} && $entry->{'studyName'} ne '') {
209 $unique_names{$entry->{'studyName'}} = 1;
213 # Format the unique project names for the SQL query
214 my $project_names = join(", ", map { "'$_'" } keys %unique_names);
216 $c->stash->{rest} = { data => $project_names, trait => $trait};
219 sub store_outliers : Path('/ajax/qualitycontrol/storeoutliers') Args(0) {
220 my ($self, $c) = @_;
221 my $sp_person_id = $c->user() ? $c->user->get_object()->get_sp_person_id() : undef;
222 my $operator = $c->user()->get_object()->get_first_name()." ".$c->user()->get_object()->get_last_name();
224 my $schema = $c->dbic_schema("Bio::Chado::Schema", undef, $sp_person_id);
226 my @user_roles = $c->user()->roles;
227 my $curator = (grep { $_ eq 'curator' || $_ eq 'breeder' } @user_roles) ? 1 : 0;
229 my $response_data = {
230 is_curator => $curator ? 1 : 0, # 1 if curator, 0 otherwise
234 # Retrieve and decode the outliers from the request
235 my $outliers_string = $c->req->param('outliers');
237 # Now proceed to decode JSON
238 my $outliers_data = decode_json($outliers_string);
240 my %trait_ids;
241 my %study_names;
242 my $trait;
244 my $othertraits_json = $c->req->param('othertraits');
245 my $othertraits = decode_json($othertraits_json);
247 # Remove duplicates using a hash
248 my %unique_traits = map { $_ => 1 } @$othertraits;
249 my @unique_othertraits = keys %unique_traits;
251 # @unique_othertraits = map { s/\|.*//r } @unique_othertraits;
252 # print STDERR Dumper \@unique_othertraits;
254 foreach my $entry (@$outliers_data) {
255 $trait = $entry->{trait}; # Directly use the trait from the entry
256 my $study_name = $entry->{studyName};
257 $study_names{$study_name} = 1 if defined $study_name;
260 my @alltraits = ($trait, @unique_othertraits);
261 foreach my $sel_trait (@alltraits) {
262 $trait_ids{$sel_trait} = SGN::Model::Cvterm->get_cvterm_row_from_trait_name($schema, $sel_trait)->cvterm_id;
266 $trait =~ s/\|.*//;
267 my $trait_operator = $trait."|".$operator;
269 # Convert unique study names to a comma-separated list in SQL format
270 my @unique_study_names = keys %study_names;
271 return $c->response->body('No unique study names found.') unless @unique_study_names;
273 my $study_names_sql = join(", ", map { $schema->storage->dbh->quote($_) } @unique_study_names); # Quote each name
275 # Add validated traits to projectprop
276 my $trial_sql = qq{
277 INSERT INTO projectprop (project_id, type_id, value, rank)
278 SELECT
279 p.project_id,
280 (SELECT cvterm_id FROM cvterm WHERE name = 'validated_phenotype'),
281 '$trait_operator',
282 COALESCE(MAX(pp.rank), 0) + 1 -- Increment rank
283 FROM project p
284 LEFT JOIN projectprop pp
285 ON p.project_id = pp.project_id
286 AND pp.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'validated_phenotype')
287 WHERE p.name in ($study_names_sql)
288 GROUP BY p.project_id;
291 my $experiment_type = SGN::Model::Cvterm->get_cvterm_row($schema, 'phenotyping_experiment', 'experiment_type')->cvterm_id();
293 # Extract plot names from the outliers data
294 my @plot_names = map { $_->{plotName} } @$outliers_data;
295 my @plot_values = map { $_->{value} } @$outliers_data;
297 my %seen;
298 @plot_names = grep { !$seen{$_}++ } @plot_names;
300 my @unique_trait_ids = grep { !$seen{$_}++ } values %trait_ids;
301 my $trait_ids_sql = join(", ", @unique_trait_ids);
304 if (@plot_names && %trait_ids) {
305 # print STDERR Dumper \@plot_names;
306 # Convert plot names and traits into comma-separated lists for SQL
307 my $plot_names_sql = join(", ", map { $schema->storage->dbh()->quote($_) } @plot_names);
309 # Build the SQL query
310 my $outlier_data_sql = "
311 INSERT INTO phenotypeprop (phenotype_id, type_id, value)
312 SELECT phenotype.phenotype_id,
313 (SELECT cvterm_id FROM cvterm WHERE name = 'phenotype_outlier'),
314 phenotype.value
315 FROM phenotype
316 JOIN nd_experiment_phenotype ON nd_experiment_phenotype.phenotype_id = phenotype.phenotype_id
317 JOIN nd_experiment_stock ON nd_experiment_stock.nd_experiment_id = nd_experiment_phenotype.nd_experiment_id
318 WHERE nd_experiment_stock.stock_id IN (
319 SELECT stock.stock_id FROM stock WHERE uniquename IN ($plot_names_sql)
321 AND nd_experiment_stock.type_id = $experiment_type
322 AND phenotype.observable_id IN ($trait_ids_sql)
323 AND NOT EXISTS (
324 SELECT 1 FROM phenotypeprop
325 WHERE phenotypeprop.phenotype_id = phenotype.phenotype_id
326 AND phenotypeprop.type_id = (SELECT cvterm_id FROM cvterm WHERE name = 'phenotype_outlier')
327 );";
329 if($curator == 1){
330 # Execute the SQL query
331 eval {
332 my $sth_trial = $schema->storage->dbh->prepare($trial_sql);
333 $sth_trial->execute();
335 my $sth_outliers = $schema->storage->dbh->prepare($outlier_data_sql);
336 $sth_outliers->execute();
338 $c->stash->{rest} = $response_data;
340 } else {
341 $c->stash->{rest} = $response_data;
344 } else {
345 $c->response->body('No plot names or traits found.');
349 ## celaning tempfiles
350 rmtree(File::Spec->catfile($c->config->{basepath}, "static/documents/tempfiles/qualitycontrol"));
353 sub restore_outliers : Path('/ajax/qualitycontrol/restoreoutliers') Args(0) {
355 my ($self, $c) = @_;
356 my $dbh = $c->dbc->dbh();
357 my $sp_person_id = $c->user() ? $c->user->get_object()->get_sp_person_id() : undef;
358 my $schema = $c->dbic_schema("Bio::Chado::Schema", undef, $sp_person_id);
359 my @user_roles = $c->user()->roles;
361 my $curator = (grep { $_ eq 'curator' } @user_roles) ? 'curator' : undef;
364 # Retrieve and decode the outliers from the request
365 my $outliers_string = $c->req->param('outliers');
366 my $outlier_trials;
367 $outlier_trials = decode_json($outliers_string);
369 # getting trait name
370 my $trait = $c->req->param('trait');
371 $trait =~ s/\|.*//;
373 my $trait_like = $trait . '%';
375 my $response_data = {
376 is_curator => $curator ? 1 : 0, # 1 if curator, 0 otherwise
379 my $trial_clean_sql = qq{
380 DELETE FROM projectprop
381 WHERE projectprop.project_id IN (
382 SELECT projectprop.project_id
383 FROM projectprop
384 JOIN project ON project.project_id = projectprop.project_id
385 WHERE project.name IN ($outlier_trials)
387 AND projectprop.value LIKE '$trait_like'
388 AND projectprop.type_id = (
389 SELECT cvterm_id FROM cvterm WHERE name = 'validated_phenotype'
393 my $outliers_clean_sql = qq{
394 DELETE FROM phenotypeprop
395 WHERE phenotypeprop.phenotype_id IN (
396 SELECT phenotypeprop.phenotype_id
397 FROM phenotypeprop
398 JOIN phenotype ph ON phenotypeprop.phenotype_id = ph.phenotype_id
399 JOIN nd_experiment_phenotype nep ON nep.phenotype_id = phenotypeprop.phenotype_id
400 JOIN nd_experiment_project nes ON nes.nd_experiment_id = nep.nd_experiment_id
401 JOIN project pr ON pr.project_id = nes.project_id
402 WHERE ph.observable_id = (
403 SELECT cvterm_id FROM cvterm WHERE cvterm.name like '$trait_like'
405 AND pr.name IN ($outlier_trials)
411 # Execute the SQL query
412 if ($curator eq 'curator'){
413 eval {
414 my $sth_trial = $dbh->prepare($trial_clean_sql);
415 $sth_trial->execute();
417 my $sth_clean = $dbh->prepare($outliers_clean_sql);
418 $sth_clean->execute();
421 if ($@) {
422 $c->response->body("Failed to store data: $@");
423 return;
424 } else {
425 $c->stash->{rest} = $response_data;
427 } else {
428 $c->stash->{rest} = $response_data;
431 ## celaning tempfiles
432 rmtree(File::Spec->catfile($c->config->{basepath}, "static/documents/tempfiles/qualitycontrol"));