add trial design store subclasses.
[sgn.git] / lib / CXGN / Bulk / CloneEST.pm
blob1f0c7e242f4dd48f58bd74e93157defa7b700b3d
1 # Clone EST download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of Clones of type EST.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
18 =head1 NAME
20 /CXGN/Bulk/CloneEST.pm
21 (A subclass of Bulk)
23 =head1 DESCRIPTION
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 Clone EST Ids entered. It then determines the information the user is
28 searching for (Clone name, SGN_C, SGN_T, SGN_E, SGN_U, Build Number,
29 Chipname, SGN_S, TUS, Maunal Annotation, Autiomatic Annotation,
30 and Estemated Sequence) and preforms the appropriate querying of the
31 database. The results of the database query are formated and presented
32 to the user on a separate page. Options of viewing or downloading
33 in text or fasta are available.
35 =cut
37 use strict;
38 use warnings;
39 use CXGN::Bulk;
41 package CXGN::Bulk::CloneEST;
42 use base "CXGN::Bulk";
44 =head2 process_parameters
46 Desc: sub process_parameters
47 Args: none
48 Ret : 1 if the parameters were OK, 0 if not
50 Modifies some of the parameters received set in get_parameters. Preparing
51 data for the database query.
53 =cut
55 sub process_parameters {
56 my $self = shift;
58 # @output_list defines the identity on order of all fields that can be output
59 my @output_list = ( 'clone_name', 'SGN_C', 'SGN_T', 'SGN_E', 'SGN_U',
60 'build_nr', 'chipname','SGN_S', 'TUS',
61 'manual_annotation', 'automatic_annotation',
62 'evalue');
64 my %links = (clone_name =>
65 "/search/est.pl?request_type=10&search=Search&request_id=",
66 SGN_U => "/search/unigene.pl?unigene_id=",
69 $self->{links} = \%links;
70 my @output_fields = ();
72 $self->debug("Type of identifier: ".($self->{idType})."");
74 # @output_fields is the sub-set of fields that will actually be output.
75 foreach my $o (@output_list)
77 if (my $value = $self->{$o})
79 if ($value eq "on")
81 push @output_fields, $o;
86 if ($self->{sequence} eq "on")
88 push @output_fields, $self->{seq_type};
91 $self->{output_list} = \@output_list;
92 $self->{output_fields} = \@output_fields;
94 my @ids = $self->check_ids();
95 if (!@ids) {
96 print STDERR "No legal ids found. Returning...\n";
97 return 0;
99 $self->debug("IDs to be processed:");
100 foreach my $i (@ids)
102 $i =~ s/^(\w+?)(\d+)(\w+\d+)/$1\-$2\-$3/;
103 $self->debug($i);
105 my $has_valid_id = 0;
106 foreach my $i(@ids){
107 if ($i ne ""){
108 $has_valid_id = 1;
111 if(!$has_valid_id){
112 return 0;
114 $self->{ids} = \@ids;
116 return 1; #params were OK if we got here
119 =head2 process_ids
121 Desc: sub process_ids
122 Args: default;
123 Ret : data from database printed to a file;
125 Queries database using Persistent (see perldoc Persistent) and
126 object oriented perl to obtain data on Bulk Objects using formatted
127 IDs.
129 =cut
131 sub process_ids {
132 my $self = shift;
133 my $db = $self->{db};
134 my @output_fields = @{$self->{output_fields}};
135 my @return_data = ();
136 my @notfound = ();
137 my ($dump_fh, $notfound_fh) = $self->create_dumpfile();
138 # start querying the database
139 my $current_time= time() - $self->{query_start_time};
140 $self->debug("Time point 6: $current_time");
142 my $in_ids = 'IN ('.join(',',(map {$db->quote($_)} @{$self->{ids}})).')'; #makes fragment of SQL query
143 my $query = get_query($in_ids, $self->{build_id});
145 #warn "using query \n",$query;
147 my $sth = $db->prepare($query);
149 $self->{query_start_time} = time();
150 $sth->execute();
151 $current_time = time() - $self->{query_start_time};
153 # execute the query and get the data.
154 while (my $row = $sth->fetchrow_hashref())
156 # crop est_seq if qc_report data is available
158 if ( defined($row->{start}) && defined($row->{length}) )
160 my $start = $row->{start};
161 my $length = $row->{length};
162 $row->{"est_seq"}=substr($row->{est_seq}, $start, $length);
165 $row->{sgn_u}="SGN-U$row->{sgn_u}" if defined($row->{sgn_u});
166 $row->{sgn_c}="SGN-C$row->{sgn_c}" if defined($row->{sgn_c});
167 $row->{sgn_t}="SGN-T$row->{sgn_t} ($row->{direction})" if defined($row->{sgn_t});
169 @return_data = map ($row->{lc($_)}, @{$self -> {output_fields}});
170 # the pesky manual annotation field contains carriage returns!!!
171 foreach my $r (@return_data) {
172 $r =~ s/\n//g if $r;
174 print $dump_fh (join "\t", @return_data)."\n";
176 close($notfound_fh);
177 close($dump_fh);
179 $self->{query_time} = time() - $self -> {query_start_time};
182 =head2 get_query
184 Desc:
185 Args: default;
186 Ret : data from database printed to a file;
188 Queries database using SQL to obtain data on Bulk Objects using formatted
189 IDs.
191 =cut
193 sub get_query
195 my ($in_ids, $build_id) = @_;
196 #person might have picked a specific build they're interested in
197 my $build_condition = ($build_id eq 'all') ? '' : <<EOSQL;
198 AND (unigene_build.unigene_build_id = $build_id
199 OR unigene_build.unigene_build_id IS NULL)
200 EOSQL
202 return <<EOSQL
203 SELECT clone.clone_name,
204 clone.clone_id as SGN_C,
205 seqread.read_id as SGN_T,
206 seqread.direction as direction,
207 est.est_id as SGN_E,
208 unigene_build.build_nr as build_nr,
209 unigene.unigene_id as SGN_U,
210 microarray.chip_name as chipname,
211 microarray.spot_id as SGN_S,
212 microarray.content_specific_tag as TUS,
213 est.seq as est_seq,
214 qc_report.hqi_start as start,
215 qc_report.hqi_length as length,
216 (SELECT array_to_string(array(SELECT '"' || m.annotation_text || '"'
217 || ' -- ' || a.first_name || ' ' || a.last_name
218 FROM manual_annotations as m
219 JOIN sgn_people.sp_person as a
220 ON(m.author_id=a.sp_person_id)
221 WHERE m.annotation_target_id = clone.clone_id
222 AND (m.annotation_target_type_id=1
223 OR m.annotation_target_type_id IS NULL)
224 LIMIT 5
226 ' AND ')
227 ) AS manual_annotation,
228 (SELECT array_to_string(array(SELECT 'MATCHED '
229 || dl.defline
230 || ' (evalue:'
231 || bh.evalue
232 || ')'
233 FROM blast_annotations as ba
234 JOIN blast_hits as bh USING(blast_annotation_id)
235 JOIN blast_defline as dl USING(defline_id)
236 WHERE ba.apply_id=unigene.unigene_id
237 AND ba.blast_target_id=1
238 AND ba.apply_type=15
239 LIMIT 5
241 ' AND ')
242 ) AS automatic_annotation,
243 est.status
244 FROM clone
245 LEFT JOIN seqread ON (clone.clone_id=seqread.clone_id)
246 LEFT JOIN est ON (seqread.read_id=est.read_id)
247 LEFT JOIN unigene_member ON (unigene_member.est_id=est.est_id)
248 LEFT JOIN unigene ON (unigene_member.unigene_id=unigene.unigene_id)
249 LEFT JOIN unigene_build ON (unigene.unigene_build_id=unigene_build.unigene_build_id)
250 LEFT JOIN unigene_consensi ON (unigene.consensi_id=unigene_consensi.consensi_id)
251 LEFT JOIN qc_report ON (est.est_id=qc_report.est_id)
252 LEFT JOIN microarray ON (clone.clone_id=microarray.clone_id)
253 WHERE clone.clone_name $in_ids
254 AND (est.status=0 OR est.status IS NULL)
255 AND (est.flags=0 OR est.flags IS NULL)
256 $build_condition
257 ORDER BY clone.clone_id
258 EOSQL