retrieve map information through Cview objects.
[sgn.git] / lib / CXGN / Bulk / BACEndRaw.pm
blob7b3823e4af35bb1d9c355c0525eebe560b9c4049
1 # bulk BAC End Raw download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of BAC Ends of type Raw.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
18 =head1 NAME
20 /CXGN/Bulk/BACEndRaw.pm
21 (A subclass of Bulk)
23 =head1 DESCRIPTION
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 BAC End Raw Ids entered. It then determines the information the user is
28 searching for (Bac Id, Clone Type, Orgonism Name, Accession Name,
29 Library Name, Estimated Length, Genbank Accession, Bac End Sequence,
30 and Qual Value Sequence) and preforms the appropriate querying of the
31 database. The results of the database query are formated and presented
32 to the user on a separate page. Options of viewing or downloading
33 in text or fasta are available.
35 =cut
37 use strict;
38 use warnings;
39 use CXGN::Bulk;
40 use CXGN::DB::DBICFactory;
41 use CXGN::Genomic::CloneNameParser;
42 use CXGN::Genomic::Chromat;
43 use CXGN::Genomic::GSS;
45 package CXGN::Bulk::BACEndRaw;
46 use base "CXGN::Bulk";
48 sub new
50 my $class = shift;
51 my $self = $class->SUPER::new(@_);
52 return $self;
55 =head2 process_parameters
57 Desc:
58 Args: none
59 Ret : 1 if the parameters were OK, 0 if not
61 Modifies some of the parameters received set in get_parameters. Preparing
62 data for the database query.
64 =cut
66 sub process_parameters
68 my $self = shift;
70 # @output_list defines the identity on order of all fields that can be output
72 my @output_list = ('bac_id', 'clone_type', 'org_name',
73 'accession_name', 'library_name', 'estimated_length',
74 'genbank_accession', 'overgo_matches',
75 'bac_end_sequence', 'qual_value_seq');
77 my @output_fields = ();
79 $self->debug("Type of identifier: ".($self->{idType})."");
81 # @output_fields is the sub-set of fields that will actually be output.
82 for my $o (@output_list)
84 if (my $value = $self->{$o})
86 if ($value eq "on")
88 push @output_fields, $o;
93 $self->{output_list} = \@output_list;
94 $self->{output_fields} = \@output_fields;
96 my @ids = $self->check_ids();
97 if (@ids == ()) {return 0;}
98 $self->debug("IDs to be processed:");
99 foreach my $i (@ids)
101 $self->debug($i);
103 my $has_valid_id = 0;
104 foreach my $i(@ids)
106 if ($i ne "")
108 $has_valid_id = 1;
111 if(!$has_valid_id)
113 return 0;
115 $self->{ids} = \@ids;
117 return 1; #params were OK if we got here
120 =head2 process_ids
122 Desc: sub process_ids
123 Args: default;
124 Ret : data from database printed to a file;
126 Queries database using Persistent (see perldoc Persistent) and
127 object oriented perl to obtain data on Bulk Objects using formatted
128 IDs.
130 =cut
132 sub process_ids
134 my $self = shift;
135 $self->{query_start_time} = time();
136 my $dbh = $self->{db};
137 my $chado = CXGN::DB::DBICFactory->open_schema('Bio::Chado::Schema');
138 my @output_fields = @{$self->{output_fields}};
139 my @notfound = ();
140 my @return_data = ();
141 my ($dump_fh, $notfound_fh) = $self->create_dumpfile();
142 my @bac_output;
143 # time counting
144 my $current_time= time() - $self->{query_start_time};
145 my $foundcount=0;
146 my $notfoundcount=0;
147 my $count=0;
149 # iterate through identifiers
150 foreach my $id (@{$self->{ids}}) {
151 $count++;
152 my $bac_end_parser = CXGN::Genomic::CloneNameParser->new(); # parse name
153 my $parsed_bac_end = $bac_end_parser->BAC_end_external_id ($id);
155 # parsed clone returns undef if parsing did not succeed
156 unless ($parsed_bac_end) {
157 print $notfound_fh (">$id\n");
158 next;
161 #look up the chromat
162 my $chromat = CXGN::Genomic::Chromat->retrieve($parsed_bac_end->{chromat_id});
163 unless ($chromat) {
164 print $notfound_fh (">$id\n");
165 next;
168 my $clone = $chromat->clone_object;
169 my $lib = $clone->library_object;
170 my ($gss) = CXGN::Genomic::GSS->search(chromat_id => $chromat->chromat_id,
171 version => $parsed_bac_end->{version},
173 unless($gss) {
174 print $notfound_fh ">$id\n";
175 next;
178 # get organism name and accession
179 my (undef, $oname, $cname) = $lib->accession_name();
181 # raw seq and qual value
182 my $bacseq = $gss->seq;
183 my $qualvalue = $gss->qual;
186 print STDERR "GENBANK ACCESSION:". ref($clone->genbank_accession($chado)) ."\n";
187 # # check which parameters were selected
188 # my @use_flags = @{$self}{qw/ bac_id
189 # clone_type
190 # org_name
191 # accession_name
192 # library_name
193 # estimated_length
194 # genbank_accession
195 # overgo_matches
196 # bac_end_sequence
197 # qual_value_seq
198 # /};
200 # will be added soon
201 my $bac_id = $chromat->clone_read_external_identifier();
202 my $clone_type = $parsed_bac_end->{clonetype};
203 my $library_name = $lib->name();
204 my $estimated_length = $clone->estimated_length();
205 my $genbank_accession = $clone->genbank_accession($chado);
206 my $overgo = "overgo";
208 my %field_vals = ( "bac_id" => $bac_id,
209 "clone_type" => $clone_type,
210 "org_name" => $oname,
211 "accession_name" => $cname,
212 "library_name" => $library_name,
213 "estimated_length" => $estimated_length,
214 "genbank_accession" => $genbank_accession ,
215 "overgo_matches" => $overgo,
216 "bac_end_sequence" => $bacseq,
217 "qual_value_seq" => $qualvalue,
219 #warn 'made field vals ',join(', ',@field_vals);
221 my @data_array = ();
223 print STDERR "OUTPUT FIELDS: ". (join "\t", @output_fields)."\n\n";
224 foreach my $selected_field (@output_fields) {
225 print STDERR "PUSHING $selected_field = $field_vals{$selected_field}\n";
226 push @data_array, $field_vals{$selected_field};
229 # my @field_vals = map { $_ || '' } ($chromat->clone_read_external_identifier,
230 # $parsed_bac_end->{clonetype},
231 # $oname,
232 # $cname,
233 # $lib->name,
234 # $clone->estimated_length,
235 # $clone->genbank_accession,
236 # $overgo,
237 # $bacseq,
238 # $qualvalue,
239 # );
240 # #warn 'made field vals ',join(', ',@field_vals);
241 # my @data_array = map { my $val = shift @field_vals;
242 # $_ ? ($val) : ()
243 # } @output_fields;
244 # warn "information from query: $oname, $cname,\n";
246 # print query results to dumpfile
247 my $linecolumns = join("\t", @data_array)."\n";
248 print $dump_fh $linecolumns ;
249 print STDERR "LINE: ". $linecolumns;
253 $current_time = time() - $self->{query_start_time};
254 close($dump_fh);
255 close($notfound_fh);
256 $self->{foundcount}= $foundcount;
257 $self->{notfoundcount}= $notfoundcount;
258 $current_time = time() - $self->{query_start_time};
259 $self->{query_time} = time() - $self->{query_start_time};