remove erroneously-commited next i had as part of my FISH loader debugging
[sgn.git] / lib / CXGN / Bulk / UnigeneMemberInfo.pm
blob3da118f5483121410b36a6b74fb06e1816e499b3
1 # Unigene Member Info download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of Unigene Member Info.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
18 =head1 NAME
20 /CXGN/Bulk/UnigeneMemberInfo.pm
21 (A subclass of Bulk)
23 =head1 DESCRIPTION
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 Unigene ID's for Unigene entered. It then determines the information the
28 user is searching for (SGN_U, Build Number, Automatic Annotation and
29 Unigene Sequence) and preforms the appropriate querying of the
30 database. The results of the database query are formated and presented
31 to the user on a separate page. Options of viewing or downloading
32 in text or fasta are available.
34 =cut
36 use strict;
37 use warnings;
38 use CXGN::Bulk;
40 package CXGN::Bulk::UnigeneMemberInfo;
41 use base "CXGN::Bulk";
43 sub new
45 my $class = shift;
46 my $self = $class->SUPER::new(@_);
47 return $self;
50 =head2 process_parameters
52 Desc:
53 Args: none
54 Ret : 1 if the parameters were OK, 0 if not
56 Modifies some of the parameters received set in get_parameters. Preparing
57 data for the database query.
59 =cut
61 sub process_parameters
63 my $self = shift;
65 # @output_list defines the identity on order of all fields that can be output
66 #'SGN_U' would be here but the Unigene ID page has SGN_U twice so we need to differentiate between SGN_U for Unigene Info and SGN_U for Member Info
67 my @output_list = ('build_nr', 'SGN_E', 'SGN_T',
68 'SGN_C', 'clone_name', 'SGN_S', 'TUS',
69 'chipname', 'manual_annotation');
70 my %links = (clone_name =>
71 "/search/est.pl?request_type=10&search=Search&request_id=",
72 SGN_U => "/search/unigene.pl?unigene_id=",
75 # adjust some parameters for the unigene download
76 # the unigene download page doesn't have the same extra sequence checkbox
77 # so we have to set it here if either est_seq, protein_seq, or unigene_seq have been chosen.
78 # we also disable automatic_annotation with the unigene_member query.
80 $self -> {idType}="unigene_member";
81 $self->{automatic_annotation} = "";
82 if ($self -> {est_seq} )
84 $self -> {seq_type} = "est_seq";
85 $self->{sequence} = "on";
88 $self->{links} = \%links;
89 my @output_fields = ();
91 $self->debug("Type of identifier: ".($self->{idType})."");
93 # @output_fields is the sub-set of fields that will actually be output.
95 #first check condition for SGN_U
96 if(my $value = $self->{SGN_U_M})
98 if ($value eq "on")
100 push @output_fields, 'SGN_U';
104 for my $o (@output_list)
106 if (my $value = $self->{$o})
108 if ($value eq "on")
110 push @output_fields, $o;
115 if ($self->{sequence} eq "on") { push @output_fields, $self->{seq_type}; }
117 $self->{output_list} = \@output_list;
118 $self->{output_fields} = \@output_fields;
120 my @ids = $self->check_ids();
121 if (@ids == ()) {return 0;}
122 $self->debug("IDs to be processed:");
123 foreach my $i (@ids)
125 $i =~ s/^.*?(\d+).*?$/$1/;
126 if(!($i =~ m/\d+/))
128 $i = "";
130 $self->debug($i);
132 my $has_valid_id = 0;
133 foreach my $i(@ids)
135 if ($i ne "")
137 $has_valid_id = 1;
140 if(!$has_valid_id)
142 return 0;
144 $self->{ids} = \@ids;
146 return 1; #params were OK if we got here
149 =head2 proces_ids
151 Desc: sub process_[idType]_ids
152 Args: default;
153 Ret : data from database printed to a file;
155 Queries database using Persistent (see perldoc Persistent) and
156 object oriented perl to obtain data on Bulk Objects using formatted
157 IDs.
159 =cut
161 sub process_ids
163 my $self = shift;
164 my $db = $self->{db};
165 my @output_fields = @{$self -> {output_fields}};
166 my @return_data = ();
167 my @notfound = ();
168 my ($dump_fh, $notfound_fh) = $self -> create_dumpfile();
169 # start querying the database
170 my $current_time= time() - $self -> {query_start_time};
171 $self->debug("Time point 6: $current_time");
173 my $in_ids = 'IN ('.join(',',(map {$db->quote($_)} @{$self->{ids}})).')'; #makes fragment of SQL query
174 my $query = get_query($in_ids);
176 #warn "using query \n",$query;
178 my $sth = $db -> prepare($query);
180 $self -> {query_start_time} = time();
181 $sth -> execute();
182 $current_time = time() - $self->{query_start_time};
184 # execute the query and get the data.
185 while (my $row = $sth -> fetchrow_hashref()) {
186 # crop est_seq if qc_report data is available
188 if ( defined($row->{start}) && defined($row->{length}) ) {
189 my $start = $row->{start};
190 my $length = $row->{length};
191 $row->{"est_seq"}=substr($row->{est_seq}, $start, $length);
194 $row->{sgn_u}="SGN-U$row->{sgn_u}" if defined($row->{sgn_u});
195 $row->{sgn_c}="SGN-C$row->{sgn_c}" if defined($row->{sgn_c});
196 $row->{sgn_t}="SGN-T$row->{sgn_t} ($row->{direction})" if defined($row->{sgn_t});
198 @return_data = map ($row->{lc($_)}, @{$self -> {output_fields}});
199 # the pesky manual annotation field contains carriage returns!!!
200 foreach my $r (@return_data) {
201 $r =~ s/\n//g;
203 print $dump_fh (join "\t", @return_data)."\n";
205 close($notfound_fh);
206 close($dump_fh);
208 $self->{query_time} = time() - $self -> {query_start_time}
212 =head2 get_query
214 Desc:
215 Args: default;
216 Ret : data from database printed to a file;
218 Queries database using SQL to obtain data on Bulk Objects using formatted
219 IDs.
221 =cut
224 sub get_query
226 my $in_ids = shift;
227 return <<EOSQL
228 SELECT clone.clone_name,
229 clone.clone_id as SGN_C,
230 seqread.read_id as SGN_T,
231 seqread.direction as direction,
232 est.est_id as SGN_E,
233 (unigene_build.build_nr) as build_nr,
234 unigene.unigene_id as SGN_U,
235 microarray.chip_name as chipname,
236 microarray.spot_id as SGN_S,
237 microarray.content_specific_tag as TUS,
238 est.seq as est_seq,
239 qc_report.hqi_start as start,
240 qc_report.hqi_length as length,
241 unigene_consensi.seq as unigene_seq,
242 (SELECT array_to_string(array(SELECT '"' || m.annotation_text || '"'
243 || ' -- ' || a.first_name || ' ' || a.last_name
244 FROM manual_annotations as m
245 JOIN sgn_people.sp_person as a
246 ON(m.author_id=a.sp_person_id)
247 WHERE m.annotation_target_id = clone.clone_id
248 AND (m.annotation_target_type_id=1
249 OR m.annotation_target_type_id IS NULL)
250 LIMIT 5
252 ' AND ')
253 ) as manual_annotation,
254 (SELECT array_to_string(array(SELECT 'MATCHED '
255 || dl.defline
256 || ' (evalue:'
257 || bh.evalue
258 || ')'
259 FROM blast_annotations as ba
260 JOIN blast_hits as bh USING(blast_annotation_id)
261 JOIN blast_defline as dl USING(defline_id)
262 WHERE ba.apply_id=unigene.unigene_id
263 AND ba.blast_target_id=1
264 AND ba.apply_type=15
265 LIMIT 5
267 ' AND ')
268 ) as automatic_annotation,
269 est.status
270 FROM unigene
271 LEFT JOIN unigene_member ON (unigene_member.unigene_id=unigene.unigene_id)
272 LEFT JOIN est ON (est.est_id=unigene_member.est_id)
273 LEFT JOIN seqread ON (seqread.read_id=est.read_id)
274 LEFT JOIN clone ON (clone.clone_id=seqread.clone_id)
275 LEFT JOIN microarray ON (clone.clone_id=microarray.clone_id)
276 LEFT JOIN unigene_build on (unigene_build.unigene_build_id=unigene.unigene_build_id)
277 LEFT JOIN unigene_consensi ON (unigene.consensi_id=unigene_consensi.consensi_id)
278 LEFT JOIN qc_report ON (est.est_id=qc_report.est_id)
279 WHERE unigene.unigene_id $in_ids
280 AND (est.status=0 OR est.status IS NULL)
281 AND (est.flags=0 OR est.flags IS NULL)
282 ORDER BY unigene.unigene_id
283 EOSQL