convert list validate functions to run asynchronously
[sgn.git] / db / 00150 / CreateMaterializedMarkerviewUpdated2.pm
blob87dfb6af52823a32a49d5b0abe0ee488f7f5a227
1 #!/usr/bin/env perl
4 =head1 NAME
6 CreateMaterializedMarkerviewUpdated2.pm
8 =head1 SYNOPSIS
10 mx-run CreateMaterializedMarkerviewUpdated2 [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
17 This patch adds a public.create_materialized_markerview(boolean) function which can be used to build
18 and refresh the unified marker materialized view. The function uses all of the reference genomes / species
19 from the currently stored genotype data in the nd_protocolprop table to build the query for the marker
20 materialized view. If new genotype data is added, this function should be used to rebuild the marker
21 materialized view rather than simply refreshing it in case there are new references or species in the data.
23 Update: does not cast the marker position to int, in case it is not defined or not an int...
25 Update 2: it does cast the marker position to int, but handles cases when the position is not defined, so that
26 way the query properly sorts positions numerically and is more efficient
28 =head1 AUTHOR
30 David Waring <djw64@cornell.edu>
32 =head1 COPYRIGHT & LICENSE
34 Copyright 2010 Boyce Thompson Institute for Plant Research
36 This program is free software; you can redistribute it and/or modify
37 it under the same terms as Perl itself.
39 =cut
42 package CreateMaterializedMarkerviewUpdated2;
44 use Moose;
45 use Bio::Chado::Schema;
46 use Try::Tiny;
47 extends 'CXGN::Metadata::Dbpatch';
50 has '+description' => ( default => <<'' );
51 This patch adds the create_materialized_markerview function to build and populate the materialized_markerview mat view
53 has '+prereq' => (
54 default => sub {
55 [],
60 sub patch {
61 my $self=shift;
63 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
65 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
67 print STDOUT "\nExecuting the SQL commands.\n";
69 $self->dbh->do(<<EOSQL);
71 -- Remove existing matview and function
72 DROP MATERIALIZED VIEW IF EXISTS public.materialized_markerview;
73 DROP FUNCTION IF EXISTS public.create_materialized_markerview(boolean);
75 -- Create the function to build the materialized markerview
76 CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
77 CREATE OR REPLACE FUNCTION public.create_materialized_markerview(refresh boolean)
78 RETURNS boolean
79 LANGUAGE plpgsql
80 AS \$function\$
81 DECLARE
82 maprow RECORD;
83 querystr TEXT;
84 queries TEXT[];
85 emptyquery TEXT;
86 matviewquery TEXT;
87 BEGIN
89 -- Remove exsiting materialized view, if it exists
90 DROP MATERIALIZED VIEW IF EXISTS public.materialized_markerview;
92 -- Get the unique species / reference genome combinations from the nd_protocolprop table
93 FOR maprow IN (
94 SELECT value->>'species_name' AS species,
95 concat(
96 substring(split_part(value->>'species_name', ' ', 1), 1, 1),
97 substring(split_part(value->>'species_name', ' ', 2), 1, 1)
98 ) AS species_abbreviation,
99 value->>'reference_genome_name' AS reference_genome,
100 replace(replace(value->>'reference_genome_name', '_', ''), ' ', '') AS reference_genome_cleaned
101 FROM nd_protocolprop
102 WHERE type_id = (SELECT cvterm_id FROM public.cvterm WHERE name = 'vcf_map_details')
103 GROUP BY species, reference_genome
106 -- Loop through each unique combination of species / reference genome and build the marker query
107 LOOP
108 querystr := 'SELECT nd_protocolprop.nd_protocol_id, ''' || maprow.species || ''' AS species_name, ''' || maprow.reference_genome || ''' AS reference_genome_name, s.value->>''name'' AS marker_name, s.value->>''chrom'' AS chrom, cast(coalesce(nullif(s.value->>''pos'',''''),NULL) as numeric) AS pos, s.value->>''ref'' AS ref, s.value->>''alt'' AS alt, CASE WHEN s.value->>''alt'' < s.value->>''ref'' THEN concat(''' || maprow.species_abbreviation || ''', ''' || maprow.reference_genome_cleaned || ''', ''_'', REGEXP_REPLACE(s.value->>''chrom'', ''^chr?'', ''''), ''_'', s.value->>''pos'', ''_'', s.value->>''alt'', ''_'', s.value->>''ref'') ELSE concat(''' || maprow.species_abbreviation || ''', ''' || maprow.reference_genome_cleaned || ''', ''_'', REGEXP_REPLACE(s.value->>''chrom'', ''^chr?'', ''''), ''_'', s.value->>''pos'', ''_'', s.value->>''ref'', ''_'', s.value->>''alt'') END AS variant_name FROM nd_protocolprop, LATERAL jsonb_each(nd_protocolprop.value) s(key, value) WHERE type_id = (SELECT cvterm_id FROM public.cvterm WHERE name = ''vcf_map_details_markers'') AND nd_protocol_id IN (SELECT nd_protocol_id FROM nd_protocolprop WHERE value->>''species_name'' = ''' || maprow.species || ''' and value->>''reference_genome_name'' = ''' || maprow.reference_genome || ''' AND type_id = (SELECT cvterm_id FROM public.cvterm WHERE name = ''vcf_map_details''))';
109 queries := array_append(queries, querystr);
111 END LOOP;
113 -- Add an empty query in case there is no existing marker data
114 emptyquery := 'SELECT column1::int AS nd_protocol_id, column2::text AS species_name, column3::text AS reference_genome_name, column4::text AS marker_name, column5::text AS chrom, column6::numeric AS pos, column7::text AS ref, column8::text AS alt, column9::text AS variant_name FROM (values (null,null,null,null,null,null,null,null,null)) AS x WHERE false';
115 queries := array_append(queries, emptyquery);
117 -- Combine queries with a UNION
118 matviewquery := array_to_string(queries, ' UNION ');
120 -- Build the materialized view
121 EXECUTE 'CREATE MATERIALIZED VIEW public.materialized_markerview AS (' || matviewquery || ') WITH NO DATA';
122 ALTER MATERIALIZED VIEW public.materialized_markerview OWNER TO web_usr;
124 -- Add indexes
125 CREATE INDEX materialized_markerview_idx1 ON public.materialized_markerview(nd_protocol_id);
126 CREATE INDEX materialized_markerview_idx2 ON public.materialized_markerview(species_name);
127 CREATE INDEX materialized_markerview_idx3 ON public.materialized_markerview(reference_genome_name);
128 CREATE INDEX materialized_markerview_idx4 ON public.materialized_markerview(marker_name);
129 CREATE INDEX materialized_markerview_idx5 ON public.materialized_markerview(UPPER(marker_name));
130 CREATE INDEX materialized_markerview_idx6 ON public.materialized_markerview(chrom);
131 CREATE INDEX materialized_markerview_idx7 ON public.materialized_markerview(pos);
132 CREATE INDEX materialized_markerview_idx8 ON public.materialized_markerview(variant_name);
133 CREATE INDEX materialized_markerview_idx9 ON public.materialized_markerview(UPPER(variant_name));
134 CREATE INDEX materialized_markerview_idx10 ON public.materialized_markerview USING GIN(marker_name gin_trgm_ops);
135 CREATE INDEX materialized_markerview_idx11 ON public.materialized_markerview USING GIN(variant_name gin_trgm_ops);
137 -- Refresh materialzied view, if requested with function argument
138 IF \$1 THEN
139 EXECUTE 'REFRESH MATERIALIZED VIEW public.materialized_markerview';
140 END IF;
142 -- Return true if the materialized view is refreshed
143 RETURN \$1;
146 \$function\$;
148 -- Build a populated materialized view
149 SELECT public.create_materialized_markerview(true);
151 -- Change ownership of matview to web_usr (so it can be rebuilt when needed)
152 ALTER MATERIALIZED VIEW public.materialized_markerview OWNER TO web_usr;
155 EOSQL
157 print "You're done!\n";
161 ####
162 1; #
163 ####