1 # bulk BAC End Raw download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of BAC Ends of type Raw.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
20 /CXGN/Bulk/BACEndRaw.pm
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 BAC End Raw Ids entered. It then determines the information the user is
28 searching for (Bac Id, Clone Type, Orgonism Name, Accession Name,
29 Library Name, Estimated Length, Genbank Accession, Bac End Sequence,
30 and Qual Value Sequence) and preforms the appropriate querying of the
31 database. The results of the database query are formated and presented
32 to the user on a separate page. Options of viewing or downloading
33 in text or fasta are available.
40 use CXGN
::DB
::DBICFactory
;
41 use CXGN
::Genomic
::CloneNameParser
;
42 use CXGN
::Genomic
::Chromat
;
43 use CXGN
::Genomic
::GSS
;
45 package CXGN
::Bulk
::BACEndRaw
;
46 use base
"CXGN::Bulk";
51 my $self = $class->SUPER::new
(@_);
55 =head2 process_parameters
59 Ret : 1 if the parameters were OK, 0 if not
61 Modifies some of the parameters received set in get_parameters. Preparing
62 data for the database query.
66 sub process_parameters
70 # @output_list defines the identity on order of all fields that can be output
72 my @output_list = ('bac_id', 'clone_type', 'org_name',
73 'accession_name', 'library_name', 'estimated_length',
74 'genbank_accession', 'overgo_matches',
75 'bac_end_sequence', 'qual_value_seq');
77 my @output_fields = ();
79 $self->debug("Type of identifier: ".($self->{idType
})."");
81 # @output_fields is the sub-set of fields that will actually be output.
82 for my $o (@output_list)
84 if (my $value = $self->{$o})
88 push @output_fields, $o;
93 $self->{output_list
} = \
@output_list;
94 $self->{output_fields
} = \
@output_fields;
96 my @ids = $self->check_ids();
97 if (@ids == ()) {return 0;}
98 $self->debug("IDs to be processed:");
103 my $has_valid_id = 0;
115 $self->{ids
} = \
@ids;
117 return 1; #params were OK if we got here
122 Desc: sub process_ids
124 Ret : data from database printed to a file;
126 Queries database using Persistent (see perldoc Persistent) and
127 object oriented perl to obtain data on Bulk Objects using formatted
135 $self->{query_start_time
} = time();
136 my $dbh = $self->{db
};
137 my $chado = CXGN
::DB
::DBICFactory
->open_schema('Bio::Chado::Schema');
138 my @output_fields = @
{$self->{output_fields
}};
140 my @return_data = ();
141 my ($dump_fh, $notfound_fh) = $self->create_dumpfile();
144 my $current_time= time() - $self->{query_start_time
};
149 # iterate through identifiers
150 foreach my $id (@
{$self->{ids
}}) {
152 my $bac_end_parser = CXGN
::Genomic
::CloneNameParser
->new(); # parse name
153 my $parsed_bac_end = $bac_end_parser->BAC_end_external_id ($id);
155 # parsed clone returns undef if parsing did not succeed
156 unless ($parsed_bac_end) {
157 print $notfound_fh (">$id\n");
162 my $chromat = CXGN
::Genomic
::Chromat
->retrieve($parsed_bac_end->{chromat_id
});
164 print $notfound_fh (">$id\n");
168 my $clone = $chromat->clone_object;
169 my $lib = $clone->library_object;
170 my ($gss) = CXGN
::Genomic
::GSS
->search(chromat_id
=> $chromat->chromat_id,
171 version
=> $parsed_bac_end->{version
},
174 print $notfound_fh ">$id\n";
178 # get organism name and accession
179 my (undef, $oname, $cname) = $lib->accession_name();
181 # raw seq and qual value
182 my $bacseq = $gss->seq;
183 my $qualvalue = $gss->qual;
186 print STDERR
"GENBANK ACCESSION:". ref($clone->genbank_accession($chado)) ."\n";
187 # # check which parameters were selected
188 # my @use_flags = @{$self}{qw/ bac_id
201 my $bac_id = $chromat->clone_read_external_identifier();
202 my $clone_type = $parsed_bac_end->{clonetype
};
203 my $library_name = $lib->name();
204 my $estimated_length = $clone->estimated_length();
205 my $genbank_accession = $clone->genbank_accession($chado);
206 my $overgo = "overgo";
208 my %field_vals = ( "bac_id" => $bac_id,
209 "clone_type" => $clone_type,
210 "org_name" => $oname,
211 "accession_name" => $cname,
212 "library_name" => $library_name,
213 "estimated_length" => $estimated_length,
214 "genbank_accession" => $genbank_accession ,
215 "overgo_matches" => $overgo,
216 "bac_end_sequence" => $bacseq,
217 "qual_value_seq" => $qualvalue,
219 #warn 'made field vals ',join(', ',@field_vals);
223 print STDERR
"OUTPUT FIELDS: ". (join "\t", @output_fields)."\n\n";
224 foreach my $selected_field (@output_fields) {
225 print STDERR
"PUSHING $selected_field = $field_vals{$selected_field}\n";
226 push @data_array, $field_vals{$selected_field};
229 # my @field_vals = map { $_ || '' } ($chromat->clone_read_external_identifier,
230 # $parsed_bac_end->{clonetype},
234 # $clone->estimated_length,
235 # $clone->genbank_accession,
240 # #warn 'made field vals ',join(', ',@field_vals);
241 # my @data_array = map { my $val = shift @field_vals;
244 # warn "information from query: $oname, $cname,\n";
246 # print query results to dumpfile
247 my $linecolumns = join("\t", @data_array)."\n";
248 print $dump_fh $linecolumns ;
249 print STDERR
"LINE: ". $linecolumns;
253 $current_time = time() - $self->{query_start_time
};
256 $self->{foundcount
}= $foundcount;
257 $self->{notfoundcount
}= $notfoundcount;
258 $current_time = time() - $self->{query_start_time
};
259 $self->{query_time
} = time() - $self->{query_start_time
};