add is_variable accessor.
[sgn.git] / lib / CXGN / Genotype / Protocol.pm
blobcc6766a3cbf8a742d8a2e2a727eab0941c28e449
1 package CXGN::Genotype::Protocol;
3 =head1 NAME
5 CXGN::Genotype::Protocol - an object to handle genotyping protocols (breeding data)
7 To get info for a specific protocol:
9 my $protocol = CXGN::Genotype::Protocol->new({
10 bcs_schema => $schema,
11 nd_protocol_id => $protocol_id
12 });
13 And then use Moose attributes to retrieve markers, refrence name, etc
15 ----------------
17 To get a list of protocols and their info:
18 my $protocol_list = CXGN::Genotype::Protocol::list($schema); #INCLUDES MORE SEARCH PARAMS AND RETURN MARKER INFO
19 my $protocol_list = CXGN::Genotype::Protocol::list_simple($schema); #RETURNS ONLY MARKER COUNT
20 This can take search params in, like protocol_ids, accessions, etc
22 =head1 USAGE
24 =head1 DESCRIPTION
27 =head1 AUTHORS
30 =cut
32 use strict;
33 use warnings;
34 use Moose;
35 use Try::Tiny;
36 use Data::Dumper;
37 use SGN::Model::Cvterm;
38 use CXGN::Trial;
39 use JSON;
41 has 'bcs_schema' => (
42 isa => 'Bio::Chado::Schema',
43 is => 'rw',
44 required => 1,
47 has 'nd_protocol_id' => (
48 isa => 'Int',
49 is => 'rw',
52 has 'protocol_name' => (
53 isa => 'Str',
54 is => 'rw',
57 has 'protocol_description' => (
58 isa => 'Str|Undef',
59 is => 'rw',
62 has 'markers' => (
63 isa => 'HashRef',
64 is => 'rw',
65 lazy => 1,
66 builder => '_retrieve_nd_protocolprop_markers',
69 has 'marker_names' => (
70 isa => 'ArrayRef',
71 is => 'rw'
74 has 'markers_array' => (
75 isa => 'ArrayRef',
76 is => 'rw',
77 lazy => 1,
78 builder => '_retrieve_nd_protocolprop_markers_array',
81 has 'header_information_lines' => (
82 isa => 'ArrayRef',
83 is => 'rw'
86 has 'reference_genome_name' => (
87 isa => 'Str',
88 is => 'rw'
91 has 'species_name' => (
92 isa => 'Str',
93 is => "rw"
96 has 'sample_observation_unit_type_name' => (
97 isa => 'Str',
98 is => 'rw'
101 has 'create_date' => (
102 isa => 'Str',
103 is => 'rw'
106 has 'marker_type' => (
107 isa => 'Str',
108 is => 'rw'
111 has 'marker_info_keys' => (
112 isa => 'ArrayRef[Str]|Undef',
113 is => 'rw'
116 has 'assay_type' => (
117 isa => 'Str',
118 is => 'rw'
121 #Filtering KEYS
123 has 'chromosome_list' => (
124 isa => 'ArrayRef[Int]|ArrayRef[Str]|Undef',
125 is => 'ro',
128 has 'start_position' => (
129 isa => 'Int|Undef',
130 is => 'ro',
133 has 'end_position' => (
134 isa => 'Int|Undef',
135 is => 'ro',
138 has 'marker_name_list' => (
139 isa => 'ArrayRef[Str]|Undef',
140 is => 'ro',
143 sub BUILD {
144 my $self = shift;
145 my $schema = $self->bcs_schema;
146 my $geno_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
147 my $protocol_vcf_details_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
148 my $pcr_marker_protocolprop_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'pcr_marker_details', 'protocol_property')->cvterm_id();
149 my $pcr_marker_protocol_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'pcr_marker_protocol', 'protocol_type')->cvterm_id();
151 my $q = "SELECT nd_protocol.nd_protocol_id, nd_protocol.name, nd_protocolprop.value, nd_protocol.create_date, nd_protocol.description
152 FROM nd_protocol
153 LEFT JOIN nd_protocolprop ON(nd_protocol.nd_protocol_id = nd_protocolprop.nd_protocol_id AND nd_protocolprop.type_id IN (?,?))
154 WHERE nd_protocol.type_id IN (?,?) AND nd_protocol.nd_protocol_id=?;";
155 my $h = $schema->storage->dbh()->prepare($q);
156 $h->execute($protocol_vcf_details_cvterm_id, $pcr_marker_protocolprop_cvterm_id, $geno_cvterm_id, $pcr_marker_protocol_cvterm_id, $self->nd_protocol_id);
157 my ($nd_protocol_id, $nd_protocol_name, $value, $create_date, $description) = $h->fetchrow_array();
159 my $map_details = $value ? decode_json $value : {};
160 my $marker_names = $map_details->{marker_names} || [];
161 my $assay_type = $map_details->{assay_type};
162 if (!defined $assay_type) {
163 $assay_type = 'NA';
165 my $marker_type = $map_details->{marker_type};
166 if (!$marker_type) {
167 $marker_type = 'SNP';
169 my $header_information_lines = $map_details->{header_information_lines} || [];
170 my $reference_genome_name;
171 if ($marker_type eq 'SSR') {
172 $reference_genome_name = 'NA';
173 } else {
174 $reference_genome_name = $map_details->{reference_genome_name} || 'Not set. Please reload these genotypes using new genotype format!';
176 my $species_name = $map_details->{species_name} || 'Not set. Please reload these genotypes using new genotype format!';
177 my $sample_observation_unit_type_name = $map_details->{sample_observation_unit_type_name} || 'Not set. Please reload these genotypes using new genotype format!';
179 $self->marker_names($marker_names);
180 $self->protocol_name($nd_protocol_name);
181 $self->marker_type($marker_type);
182 $self->assay_type($assay_type);
183 if ($header_information_lines) {
184 $self->header_information_lines($header_information_lines);
186 if ($reference_genome_name) {
187 $self->reference_genome_name($reference_genome_name);
189 $self->species_name($species_name);
190 $self->sample_observation_unit_type_name($sample_observation_unit_type_name);
191 if ($create_date) {
192 $self->create_date($create_date);
194 if ($description) {
195 $self->protocol_description($description);
198 my $marker_info_keys = $map_details->{marker_info_keys};
199 $self->marker_info_keys($marker_info_keys);
201 return;
204 sub _retrieve_nd_protocolprop_markers {
205 my $self = shift;
206 my $schema = $self->bcs_schema;
207 my $chromosome_list = $self->chromosome_list;
208 my $start_position = $self->start_position;
209 my $end_position = $self->end_position;
210 my $marker_name_list = $self->marker_name_list;
212 my $geno_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
213 my $protocol_vcf_details_markers_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers', 'protocol_property')->cvterm_id();
215 my $chromosome_where = '';
216 if ($chromosome_list && scalar(@$chromosome_list)>0) {
217 my $chromosome_list_sql = '\'' . join('\', \'', @$chromosome_list) . '\'';
218 $chromosome_where = " AND (s.value->>'chrom')::text IN ($chromosome_list_sql)";
220 my $start_position_where = '';
221 if (defined($start_position)) {
222 $start_position_where = " AND (s.value->>'pos')::int >= $start_position";
224 my $end_position_where = '';
225 if (defined($end_position)) {
226 $end_position_where = " AND (s.value->>'pos')::int <= $end_position";
228 my $marker_name_list_where = '';
229 if ($marker_name_list && scalar(@$marker_name_list)>0) {
230 my $search_vals_sql = '\''.join ('\', \'' , @$marker_name_list).'\'';
231 $marker_name_list_where = "AND (s.value->>'name')::text IN ($search_vals_sql)";
234 my $protocolprop_q = "SELECT nd_protocol_id, s.key, s.value
235 FROM nd_protocolprop, jsonb_each(nd_protocolprop.value) as s
236 WHERE nd_protocol_id = ? and type_id = $protocol_vcf_details_markers_cvterm_id $chromosome_where $start_position_where $end_position_where $marker_name_list_where;";
238 my $h = $schema->storage->dbh()->prepare($protocolprop_q);
239 $h->execute($self->nd_protocol_id);
240 my %markers;
241 while (my ($nd_protocol_id, $marker_name, $value) = $h->fetchrow_array()) {
242 $markers{$marker_name} = decode_json $value;
245 $self->markers(\%markers);
248 sub _retrieve_nd_protocolprop_markers_array {
249 my $self = shift;
250 my $schema = $self->bcs_schema;
251 my $geno_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
252 my $protocol_vcf_details_markers_array_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers_array', 'protocol_property')->cvterm_id();
254 my $q = "SELECT nd_protocol_id, value FROM nd_protocolprop WHERE type_id = $protocol_vcf_details_markers_array_cvterm_id AND nd_protocol_id =?;";
255 my $h = $schema->storage->dbh()->prepare($q);
256 $h->execute($self->nd_protocol_id);
257 my ($nd_protocol_id, $value) = $h->fetchrow_array();
259 my $markers_array = $value ? decode_json $value : [];
260 $self->markers_array($markers_array);
263 #class method
264 sub list {
265 print STDERR "Protocol list search\n";
266 my $schema = shift;
267 my $protocol_list = shift;
268 my $accession_list = shift;
269 my $tissue_sample_list = shift;
270 my $limit = shift;
271 my $offset = shift;
272 my $genotyping_data_project_list = shift;
273 my @where_clause;
275 my $vcf_map_details_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
276 my $vcf_map_details_markers_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers', 'protocol_property')->cvterm_id();
277 my $vcf_map_details_markers_array_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers_array', 'protocol_property')->cvterm_id();
278 my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
279 my $tissue_sample_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'tissue_sample', 'stock_type')->cvterm_id();
280 my $nd_protocol_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
281 my $pcr_marker_details_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'pcr_marker_details', 'protocol_property')->cvterm_id();
282 my $pcr_marker_protocol_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'pcr_marker_protocol', 'protocol_type')->cvterm_id();
284 #push @where_clause, "nd_protocolprop.type_id = $vcf_map_details_cvterm_id";
285 push @where_clause, "nd_protocol.type_id IN ($nd_protocol_type_id, $pcr_marker_protocol_cvterm_id)";
286 if ($protocol_list && scalar(@$protocol_list)>0) {
287 my $protocol_sql = join ("," , @$protocol_list);
288 push @where_clause, "nd_protocol.nd_protocol_id in ($protocol_sql)";
290 if ($genotyping_data_project_list && scalar(@$genotyping_data_project_list)>0) {
291 my $sql = join ("," , @$genotyping_data_project_list);
292 push @where_clause, "project.project_id in ($sql)";
294 if ($accession_list && scalar(@$accession_list)>0) {
295 my $accession_sql = join ("," , @$accession_list);
296 push @where_clause, "stock.stock_id in ($accession_sql)";
297 push @where_clause, "stock.type_id = $accession_cvterm_id";
299 if ($tissue_sample_list && scalar(@$tissue_sample_list)>0) {
300 my $stock_sql = join ("," , @$tissue_sample_list);
301 push @where_clause, "stock.stock_id in ($stock_sql)";
302 push @where_clause, "stock.type_id = $tissue_sample_cvterm_id";
305 my $offset_clause = '';
306 my $limit_clause = '';
307 if ($limit){
308 $limit_clause = " LIMIT $limit ";
310 if ($offset){
311 $offset_clause = " OFFSET $offset ";
313 my $where_clause = scalar(@where_clause) > 0 ? " WHERE " . (join (" AND " , @where_clause)) : '';
315 my $q = "SELECT nd_protocol.nd_protocol_id, nd_protocol.name, nd_protocol.description, nd_protocol.create_date, nd_protocolprop.value, project.project_id, project.name, count(nd_protocol.nd_protocol_id) OVER() AS full_count, nd_protocolprop.value->>'marker_type'
316 FROM stock
317 JOIN cvterm AS stock_cvterm ON(stock.type_id = stock_cvterm.cvterm_id)
318 JOIN nd_experiment_stock USING(stock_id)
319 JOIN nd_experiment USING(nd_experiment_id)
320 JOIN nd_experiment_protocol USING(nd_experiment_id)
321 JOIN nd_experiment_project USING(nd_experiment_id)
322 JOIN nd_protocol USING(nd_protocol_id)
323 LEFT JOIN nd_protocolprop ON(nd_protocolprop.nd_protocol_id = nd_protocol.nd_protocol_id AND nd_protocolprop.type_id IN (?,?))
324 JOIN project USING(project_id)
325 $where_clause
326 GROUP BY (nd_protocol.nd_protocol_id, nd_protocol.name, nd_protocol.description, nd_protocol.create_date, nd_protocolprop.value, project.project_id, project.name)
327 ORDER BY nd_protocol.nd_protocol_id ASC
328 $limit_clause
329 $offset_clause;";
331 #print STDERR Dumper $q;
332 my $h = $schema->storage->dbh()->prepare($q);
333 $h->execute($vcf_map_details_cvterm_id, $pcr_marker_details_cvterm_id);
335 my @results;
336 while (my ($protocol_id, $protocol_name, $protocol_description, $create_date, $protocolprop_json, $project_id, $project_name, $sample_count, $marker_type) = $h->fetchrow_array()) {
337 my $protocol = $protocolprop_json ? decode_json $protocolprop_json : {};
338 my $marker_names = $protocol->{marker_names} || [];
339 my $header_information_lines = $protocol->{header_information_lines} || [];
340 my $species_name = $protocol->{species_name} || 'Not set. Please reload these genotypes using new genotype format!';
341 my $sample_observation_unit_type_name = $protocol->{sample_observation_unit_type_name} || 'Not set. Please reload these genotypes using new genotype format!';
342 my $reference_genome_name = $protocol->{reference_genome_name};
343 $create_date = $create_date || 'Not set. Please reload these genotypes using new genotype format!';
344 if (!$marker_type) {
345 $marker_type = 'SNP';
346 if (!$reference_genome_name) {
347 $reference_genome_name = 'Not set. Please reload these genotypes using new genotype format!';
350 push @results, {
351 protocol_id => $protocol_id,
352 protocol_name => $protocol_name,
353 protocol_description => $protocol_description,
354 marker_names => $marker_names,
355 header_information_lines => $header_information_lines,
356 reference_genome_name => $reference_genome_name,
357 species_name => $species_name,
358 sample_observation_unit_type_name => $sample_observation_unit_type_name,
359 project_name => $project_name,
360 project_id => $project_id,
361 create_date => $create_date,
362 observation_unit_count => $sample_count,
363 marker_count => scalar(@$marker_names),
364 marker_type => $marker_type
367 # print STDERR "PROTOCOL LIST =".Dumper(\@results);
368 return \@results;
371 #class method
372 sub list_simple {
373 print STDERR "Protocol list simple search\n";
374 my $schema = shift;
375 my $protocol_list = shift;
376 my @where_clause;
378 my $vcf_map_details_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
379 my $vcf_map_details_markers_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers', 'protocol_property')->cvterm_id();
380 my $vcf_map_details_markers_array_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers_array', 'protocol_property')->cvterm_id();
381 my $nd_protocol_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
382 my $pcr_marker_protocol_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'pcr_marker_protocol', 'protocol_type')->cvterm_id();
383 my $pcr_marker_details_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'pcr_marker_details', 'protocol_property')->cvterm_id();
385 if ($protocol_list && scalar(@$protocol_list)>0) {
386 my $protocol_sql = join ("," , @$protocol_list);
387 push @where_clause, "nd_protocol.nd_protocol_id in ($protocol_sql)";
388 } else {
389 push @where_clause, "nd_protocol.type_id IN ($nd_protocol_type_id, $pcr_marker_protocol_type_id)"
391 my $where_clause = scalar(@where_clause) > 0 ? " WHERE " . (join (" AND " , @where_clause)) : '';
393 my $q = "SELECT nd_protocol.nd_protocol_id, nd_protocol.name, nd_protocol.description, nd_protocol.create_date, nd_protocolprop.value->>'header_information_lines', nd_protocolprop.value->>'reference_genome_name', nd_protocolprop.value->>'species_name', nd_protocolprop.value->>'sample_observation_unit_type_name', jsonb_array_length(nd_protocolprop.value->'marker_names'), nd_protocolprop.value->>'marker_type'
394 FROM nd_protocol
395 LEFT JOIN nd_protocolprop ON(nd_protocolprop.nd_protocol_id = nd_protocol.nd_protocol_id) AND nd_protocolprop.type_id IN (?,?)
396 $where_clause
397 ORDER BY nd_protocol.nd_protocol_id ASC;";
399 my $h = $schema->storage->dbh()->prepare($q);
400 $h->execute($vcf_map_details_cvterm_id, $pcr_marker_details_type_id);
402 my @results;
403 while (my ($protocol_id, $protocol_name, $protocol_description, $create_date, $header_information_lines, $reference_genome_name, $species_name, $sample_type_name, $marker_count, $marker_type) = $h->fetchrow_array()) {
404 my $header_information_lines = $header_information_lines ? decode_json $header_information_lines : [];
405 my $species_name = $species_name || 'Not set. Please reload these genotypes using new genotype format!';
406 my $sample_observation_unit_type_name = $sample_type_name || 'Not set. Please reload these genotypes using new genotype format!';
407 my $protocol_description = $protocol_description || 'Not set. Please reload these genotypes using new genotype format!';
408 $create_date = $create_date || 'Not set. Please reload these genotypes using new genotype format!';
409 if (!$marker_type) {
410 $marker_type = 'SNP';
411 $reference_genome_name = $reference_genome_name || 'Not set. Please reload these genotypes using new genotype format!';
413 push @results, {
414 protocol_id => $protocol_id,
415 protocol_name => $protocol_name,
416 protocol_description => $protocol_description,
417 marker_count => $marker_count,
418 header_information_lines => $header_information_lines,
419 reference_genome_name => $reference_genome_name,
420 species_name => $species_name,
421 sample_observation_unit_type_name => $sample_observation_unit_type_name,
422 create_date => $create_date,
423 marker_type => $marker_type
426 # print STDERR "SIMPLE LIST =".Dumper (\@results)."\n";
427 return \@results;
431 sub set_name {
432 my $self = shift;
433 my $name = shift;
434 my $protocol_id = $self->nd_protocol_id;
435 my $protocol_row = $self->bcs_schema->resultset("NaturalDiversity::NdProtocol")->find({
436 nd_protocol_id => $protocol_id,
439 if ($protocol_row) {
440 $protocol_row->name($name);
441 $protocol_row->update();
446 sub set_description {
447 my $self = shift;
448 my $description = shift;
449 my $protocol_id = $self->nd_protocol_id;
450 my $protocol_row = $self->bcs_schema->resultset("NaturalDiversity::NdProtocol")->find({
451 nd_protocol_id => $protocol_id,
454 if ($protocol_row) {
455 $protocol_row->description($description);
456 $protocol_row->update();