1 # Unigene Member Info download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of Unigene Member Info.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
20 /CXGN/Bulk/UnigeneMemberInfo.pm
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 Unigene ID's for Unigene entered. It then determines the information the
28 user is searching for (SGN_U, Build Number, Automatic Annotation and
29 Unigene Sequence) and preforms the appropriate querying of the
30 database. The results of the database query are formated and presented
31 to the user on a separate page. Options of viewing or downloading
32 in text or fasta are available.
40 package CXGN
::Bulk
::UnigeneMemberInfo
;
41 use base
"CXGN::Bulk";
46 my $self = $class->SUPER::new
(@_);
50 =head2 process_parameters
54 Ret : 1 if the parameters were OK, 0 if not
56 Modifies some of the parameters received set in get_parameters. Preparing
57 data for the database query.
61 sub process_parameters
65 # @output_list defines the identity on order of all fields that can be output
66 #'SGN_U' would be here but the Unigene ID page has SGN_U twice so we need to differentiate between SGN_U for Unigene Info and SGN_U for Member Info
67 my @output_list = ('build_nr', 'SGN_E', 'SGN_T',
68 'SGN_C', 'clone_name', 'SGN_S', 'TUS',
69 'chipname', 'manual_annotation');
70 my %links = (clone_name
=>
71 "/search/est.pl?request_type=10&search=Search&request_id=",
72 SGN_U
=> "/search/unigene.pl?unigene_id=",
75 # adjust some parameters for the unigene download
76 # the unigene download page doesn't have the same extra sequence checkbox
77 # so we have to set it here if either est_seq, protein_seq, or unigene_seq have been chosen.
78 # we also disable automatic_annotation with the unigene_member query.
80 $self -> {idType
}="unigene_member";
81 $self->{automatic_annotation
} = "";
82 if ($self -> {est_seq
} )
84 $self -> {seq_type
} = "est_seq";
85 $self->{sequence
} = "on";
88 $self->{links
} = \
%links;
89 my @output_fields = ();
91 $self->debug("Type of identifier: ".($self->{idType
})."");
93 # @output_fields is the sub-set of fields that will actually be output.
95 #first check condition for SGN_U
96 if(my $value = $self->{SGN_U_M
})
100 push @output_fields, 'SGN_U';
104 for my $o (@output_list)
106 if (my $value = $self->{$o})
110 push @output_fields, $o;
115 if ($self->{sequence
} eq "on") { push @output_fields, $self->{seq_type
}; }
117 $self->{output_list
} = \
@output_list;
118 $self->{output_fields
} = \
@output_fields;
120 my @ids = $self->check_ids();
121 if (@ids == ()) {return 0;}
122 $self->debug("IDs to be processed:");
125 $i =~ s/^.*?(\d+).*?$/$1/;
132 my $has_valid_id = 0;
144 $self->{ids
} = \
@ids;
146 return 1; #params were OK if we got here
151 Desc: sub process_[idType]_ids
153 Ret : data from database printed to a file;
155 Queries database using Persistent (see perldoc Persistent) and
156 object oriented perl to obtain data on Bulk Objects using formatted
164 my $db = $self->{db
};
165 my @output_fields = @
{$self -> {output_fields
}};
166 my @return_data = ();
168 my ($dump_fh, $notfound_fh) = $self -> create_dumpfile
();
169 # start querying the database
170 my $current_time= time() - $self -> {query_start_time
};
171 $self->debug("Time point 6: $current_time");
173 my $in_ids = 'IN ('.join(',',(map {$db->quote($_)} @
{$self->{ids
}})).')'; #makes fragment of SQL query
174 my $query = get_query
($in_ids);
176 #warn "using query \n",$query;
178 my $sth = $db -> prepare
($query);
180 $self -> {query_start_time
} = time();
182 $current_time = time() - $self->{query_start_time
};
184 # execute the query and get the data.
185 while (my $row = $sth -> fetchrow_hashref
()) {
186 # crop est_seq if qc_report data is available
188 if ( defined($row->{start
}) && defined($row->{length}) ) {
189 my $start = $row->{start
};
190 my $length = $row->{length};
191 $row->{"est_seq"}=substr($row->{est_seq
}, $start, $length);
194 $row->{sgn_u
}="SGN-U$row->{sgn_u}" if defined($row->{sgn_u
});
195 $row->{sgn_c
}="SGN-C$row->{sgn_c}" if defined($row->{sgn_c
});
196 $row->{sgn_t
}="SGN-T$row->{sgn_t} ($row->{direction})" if defined($row->{sgn_t
});
198 @return_data = map ($row->{lc($_)}, @
{$self -> {output_fields
}});
199 # the pesky manual annotation field contains carriage returns!!!
200 foreach my $r (@return_data) {
203 print $dump_fh (join "\t", @return_data)."\n";
208 $self->{query_time
} = time() - $self -> {query_start_time
}
216 Ret : data from database printed to a file;
218 Queries database using SQL to obtain data on Bulk Objects using formatted
228 SELECT clone.clone_name,
229 clone.clone_id as SGN_C,
230 seqread.read_id as SGN_T,
231 seqread.direction as direction,
233 (unigene_build.build_nr) as build_nr,
234 unigene.unigene_id as SGN_U,
235 microarray.chip_name as chipname,
236 microarray.spot_id as SGN_S,
237 microarray.content_specific_tag as TUS,
239 qc_report.hqi_start as start,
240 qc_report.hqi_length as length,
241 unigene_consensi.seq as unigene_seq,
242 (SELECT array_to_string(array(SELECT '"' || m.annotation_text || '"'
243 || ' -- ' || a.first_name || ' ' || a.last_name
244 FROM manual_annotations as m
245 JOIN sgn_people.sp_person as a
246 ON(m.author_id=a.sp_person_id)
247 WHERE m.annotation_target_id = clone.clone_id
248 AND (m.annotation_target_type_id=1
249 OR m.annotation_target_type_id IS NULL)
253 ) as manual_annotation,
254 (SELECT array_to_string(array(SELECT 'MATCHED '
259 FROM blast_annotations as ba
260 JOIN blast_hits as bh USING(blast_annotation_id)
261 JOIN blast_defline as dl USING(defline_id)
262 WHERE ba.apply_id=unigene.unigene_id
263 AND ba.blast_target_id=1
268 ) as automatic_annotation,
271 LEFT JOIN unigene_member ON (unigene_member.unigene_id=unigene.unigene_id)
272 LEFT JOIN est ON (est.est_id=unigene_member.est_id)
273 LEFT JOIN seqread ON (seqread.read_id=est.read_id)
274 LEFT JOIN clone ON (clone.clone_id=seqread.clone_id)
275 LEFT JOIN microarray ON (clone.clone_id=microarray.clone_id)
276 LEFT JOIN unigene_build on (unigene_build.unigene_build_id=unigene.unigene_build_id)
277 LEFT JOIN unigene_consensi ON (unigene.consensi_id=unigene_consensi.consensi_id)
278 LEFT JOIN qc_report ON (est.est_id=qc_report.est_id)
279 WHERE unigene.unigene_id $in_ids
280 AND (est.status=0 OR est.status IS NULL)
281 AND (est.flags=0 OR est.flags IS NULL)
282 ORDER BY unigene.unigene_id