Merge branch 'master' into topic/analyze_phenotypes_page
[sgn.git] / lib / CXGN / Bulk / BACEndTrim.pm
blob4e5dee7f9371d5662dd096dde3f1944fc3877c16
1 # Bulk BAC End Trim download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of BAC Ends of type Trimed.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
18 =head1 NAME
20 /CXGN/Bulk/BACEndTrim.pm
21 (A subclass of Bulk)
23 =head1 DESCRIPTION
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 BAC End Trimed Ids entered. It then determines the information the user is
28 searching for (Bac IDs, Clone Type, Orgonism Name, Accession Name,
29 Library Name, Estimated Length, Genbank Accessionm, Bac End Sequence,
30 and Quakl Value Sequence) and preforms the appropriate querying of the
31 database. The results of the database query are formated and presented
32 to the user on a separate page. Options of viewing or downloading
33 in text or fasta are available.
35 =cut
38 package CXGN::Bulk::BACEndTrim;
39 use strict;
40 use warnings;
41 use CXGN::Bulk;
42 use CXGN::Genomic::CloneNameParser;
43 use CXGN::Genomic::Chromat;
44 use CXGN::Genomic::GSS;
46 use CXGN::DB::DBICFactory;
48 use base "CXGN::Bulk";
50 sub new
52 my $class = shift;
53 my $self = $class->SUPER::new(@_);
54 return $self;
57 =head2 process_parameters
59 Desc:
60 Args: none
61 Ret : 1 if the parameters were OK, 0 if not
63 Modifies some of the parameters received set in get_parameters. Preparing
64 data for the database query.
66 =cut
68 sub process_parameters
70 my $self = shift;
72 # @output_list defines the identity on order of all fields that can be output
74 my @output_list = ('bac_id', 'clone_type', 'org_name',
75 'accession_name', 'library_name', 'estimated_length',
76 'genbank_accession', 'overgo_matches',
77 'bac_end_sequence', 'qual_value_seq');
79 my @output_fields = ();
81 $self->debug("Type of identifier: ".($self->{idType})."");
83 # @output_fields is the sub-set of fields that will actually be output.
84 foreach my $o (@output_list)
86 if (my $value = $self->{$o})
88 if ($value eq "on")
90 push @output_fields, $o;
95 $self->{output_list} = \@output_list;
96 $self->{output_fields} = \@output_fields;
98 my @ids = $self->check_ids();
99 if (@ids == ()) {return 0;}
100 $self->debug("IDs to be processed:");
101 foreach my $i (@ids)
103 $self->debug($i);
105 my $has_valid_id = 0;
106 foreach my $i(@ids)
108 if ($i ne "")
110 $has_valid_id = 1;
113 if(!$has_valid_id)
115 return 0;
117 $self->{ids} = \@ids;
119 return 1; #params were OK if we got here
122 =head2 proces_sids
124 Desc: sub process_[idType]_ids
125 Args: default;
126 Ret : data from database printed to a file;
128 Queries database using Persistent (see perldoc Persistent) and
129 object oriented perl to obtain data on Bulk Objects using formatted
130 IDs.
132 =cut
134 sub process_ids
136 my $self = shift;
137 $self->{query_start_time} = time();
138 my $dbh = $self->{db};
139 my $chado = CXGN::DB::DBICFactory->open_schema('Bio::Chado::Schema');
140 my @output_fields = @{$self->{output_fields}};
141 my @notfound = ();
142 my @return_data = ();
143 my ($dump_fh, $notfound_fh) = $self->create_dumpfile();
144 my @bac_output;
145 # time counting
146 my $current_time= time() - $self->{query_start_time};
147 my $foundcount=0;
148 my $notfoundcount=0;
149 my $count=0;
151 # iterate through identifiers
152 foreach my $id (@{$self->{ids}}) {
153 $count++;
154 my $bac_end_parser = CXGN::Genomic::CloneNameParser->new(); # parse name
155 my $parsed_bac_end = $bac_end_parser->BAC_end_external_id ($id);
157 # parsed clone returns undef if parsing did not succeed
158 unless ($parsed_bac_end) {
159 print $notfound_fh (">$id\n");
160 next;
163 #look up the chromat
164 my $chromat = CXGN::Genomic::Chromat->retrieve($parsed_bac_end->{chromat_id});
165 unless ($chromat) {
166 print $notfound_fh (">$id\n");
167 next;
170 my $clone = $chromat->clone_object;
171 my $lib = $clone->library_object;
172 my ($gss) = CXGN::Genomic::GSS->search(chromat_id => $chromat->chromat_id,
173 version => $parsed_bac_end->{version},
175 unless($gss) {
176 print $notfound_fh ">$id\n";
177 next;
180 # get organism name and accession
181 my (undef, $oname, $cname) = $lib->accession_name();
184 # trim seq and qual value
185 my $qualvalue = $gss->trimmed_qual;
186 my $bacseq = $gss->trimmed_seq;
188 # # check which parameters were selected
189 # my @use_flags = @{$self}{qw/ bac_id
190 # clone_type
191 # org_name
192 # accession_name
193 # library_name
194 # estimated_length
195 # genbank_accession
196 # overgo_matches
197 # bac_end_sequence
198 # qual_value_seq
199 # /};
201 # will be added soon
202 my $bac_id = $chromat->clone_read_external_identifier();
203 my $clone_type = $parsed_bac_end->{clonetype};
204 my $library_name = $lib->name();
205 my $estimated_length = $clone->estimated_length();
206 my $genbank_accession = $clone->genbank_accession($chado);
207 my $overgo = "overgo";
209 my %field_vals = ( "bac_id" => $bac_id,
210 "clone_type" => $clone_type,
211 "org_name" => $oname,
212 "accession_name" => $cname,
213 "library_name" => $library_name,
214 "estimated_length" => $estimated_length,
215 "genbank_accession" => $genbank_accession ,
216 "overgo_matches" => $overgo,
217 "bac_end_sequence" => $bacseq,
218 "qual_value_seq" => $qualvalue,
221 #warn 'made field vals ',join(', ',@field_vals);
223 my @data_array = ();
224 foreach my $selected_field (@output_fields) {
225 push @data_array, $field_vals{$selected_field};
228 # my @data_array = map { my $val = shift @field_vals;
229 # $_ ? ($val) : ()
230 # } @output_fields;
231 # warn "information from query: $oname, $cname,\n";
233 # print query results to dumpfile
234 print $dump_fh join("\t", @data_array)."\n";
238 $current_time = time() - $self->{query_start_time};
239 close($dump_fh);
240 close($notfound_fh);
241 $self->{foundcount}= $foundcount;
242 $self->{notfoundcount}= $notfoundcount;
243 $current_time = time() - $self->{query_start_time};
244 $self->{query_time} = time() - $self->{query_start_time};