1 # Bulk BAC End Trim download script for SGN database
2 # Lukas Mueller, August 12, 2003
4 # This bulk download option handles the query
5 # Of BAC Ends of type Trimed.
6 # Many of its methods are in the Bulk object.
8 # Modified July 15, 2005
9 # Modified more August 11, 2005
10 # Summer Intern Caroline N. Nyenke
12 # Modified July 7, 2006
13 # Summer Intern Emily Hart
15 # Modified July 3rd, 2007
16 # Alexander Naydich and Matthew Crumb
20 /CXGN/Bulk/BACEndTrim.pm
25 This perl script is used on the bulk download page. The script collects
26 identifiers submitted by the user and returns information based on the
27 BAC End Trimed Ids entered. It then determines the information the user is
28 searching for (Bac IDs, Clone Type, Orgonism Name, Accession Name,
29 Library Name, Estimated Length, Genbank Accessionm, Bac End Sequence,
30 and Quakl Value Sequence) and preforms the appropriate querying of the
31 database. The results of the database query are formated and presented
32 to the user on a separate page. Options of viewing or downloading
33 in text or fasta are available.
38 package CXGN
::Bulk
::BACEndTrim
;
42 use CXGN
::Genomic
::CloneNameParser
;
43 use CXGN
::Genomic
::Chromat
;
44 use CXGN
::Genomic
::GSS
;
46 use CXGN
::DB
::DBICFactory
;
48 use base
"CXGN::Bulk";
53 my $self = $class->SUPER::new
(@_);
57 =head2 process_parameters
61 Ret : 1 if the parameters were OK, 0 if not
63 Modifies some of the parameters received set in get_parameters. Preparing
64 data for the database query.
68 sub process_parameters
72 # @output_list defines the identity on order of all fields that can be output
74 my @output_list = ('bac_id', 'clone_type', 'org_name',
75 'accession_name', 'library_name', 'estimated_length',
76 'genbank_accession', 'overgo_matches',
77 'bac_end_sequence', 'qual_value_seq');
79 my @output_fields = ();
81 $self->debug("Type of identifier: ".($self->{idType
})."");
83 # @output_fields is the sub-set of fields that will actually be output.
84 foreach my $o (@output_list)
86 if (my $value = $self->{$o})
90 push @output_fields, $o;
95 $self->{output_list
} = \
@output_list;
96 $self->{output_fields
} = \
@output_fields;
98 my @ids = $self->check_ids();
99 if (@ids == ()) {return 0;}
100 $self->debug("IDs to be processed:");
105 my $has_valid_id = 0;
117 $self->{ids
} = \
@ids;
119 return 1; #params were OK if we got here
124 Desc: sub process_[idType]_ids
126 Ret : data from database printed to a file;
128 Queries database using Persistent (see perldoc Persistent) and
129 object oriented perl to obtain data on Bulk Objects using formatted
137 $self->{query_start_time
} = time();
138 my $dbh = $self->{db
};
139 my $chado = CXGN
::DB
::DBICFactory
->open_schema('Bio::Chado::Schema');
140 my @output_fields = @
{$self->{output_fields
}};
142 my @return_data = ();
143 my ($dump_fh, $notfound_fh) = $self->create_dumpfile();
146 my $current_time= time() - $self->{query_start_time
};
151 # iterate through identifiers
152 foreach my $id (@
{$self->{ids
}}) {
154 my $bac_end_parser = CXGN
::Genomic
::CloneNameParser
->new(); # parse name
155 my $parsed_bac_end = $bac_end_parser->BAC_end_external_id ($id);
157 # parsed clone returns undef if parsing did not succeed
158 unless ($parsed_bac_end) {
159 print $notfound_fh (">$id\n");
164 my $chromat = CXGN
::Genomic
::Chromat
->retrieve($parsed_bac_end->{chromat_id
});
166 print $notfound_fh (">$id\n");
170 my $clone = $chromat->clone_object;
171 my $lib = $clone->library_object;
172 my ($gss) = CXGN
::Genomic
::GSS
->search(chromat_id
=> $chromat->chromat_id,
173 version
=> $parsed_bac_end->{version
},
176 print $notfound_fh ">$id\n";
180 # get organism name and accession
181 my (undef, $oname, $cname) = $lib->accession_name();
184 # trim seq and qual value
185 my $qualvalue = $gss->trimmed_qual;
186 my $bacseq = $gss->trimmed_seq;
188 # # check which parameters were selected
189 # my @use_flags = @{$self}{qw/ bac_id
202 my $bac_id = $chromat->clone_read_external_identifier();
203 my $clone_type = $parsed_bac_end->{clonetype
};
204 my $library_name = $lib->name();
205 my $estimated_length = $clone->estimated_length();
206 my $genbank_accession = $clone->genbank_accession($chado);
207 my $overgo = "overgo";
209 my %field_vals = ( "bac_id" => $bac_id,
210 "clone_type" => $clone_type,
211 "org_name" => $oname,
212 "accession_name" => $cname,
213 "library_name" => $library_name,
214 "estimated_length" => $estimated_length,
215 "genbank_accession" => $genbank_accession ,
216 "overgo_matches" => $overgo,
217 "bac_end_sequence" => $bacseq,
218 "qual_value_seq" => $qualvalue,
221 #warn 'made field vals ',join(', ',@field_vals);
224 foreach my $selected_field (@output_fields) {
225 push @data_array, $field_vals{$selected_field};
228 # my @data_array = map { my $val = shift @field_vals;
231 # warn "information from query: $oname, $cname,\n";
233 # print query results to dumpfile
234 print $dump_fh join("\t", @data_array)."\n";
238 $current_time = time() - $self->{query_start_time
};
241 $self->{foundcount
}= $foundcount;
242 $self->{notfoundcount
}= $notfoundcount;
243 $current_time = time() - $self->{query_start_time
};
244 $self->{query_time
} = time() - $self->{query_start_time
};