warn that patch 00068/AddMissingStockAndTrialProps is so far only designed for some dbs
[sgn.git] / lib / CXGN / Bulk / BAC.pm
blobaaff8d3333bbc5562867c581fa17a6e3117044a2
1 package CXGN::Bulk::BAC;
2 use strict;
3 use warnings;
5 use CXGN::DB::DBICFactory;
6 use CXGN::Genomic::Clone;
7 use CXGN::Genomic::CloneIdentifiers;
8 use CXGN::Genomic::Library;
9 use CXGN::Tools::List qw/any/;
11 use base "CXGN::Bulk";
13 sub new {
14 my $class = shift;
15 my $self = $class->SUPER::new(@_);
17 #debug start#
18 my $paramhash = $_[0];
19 $self->debug("ID String from BAC.pm constructor");
20 $self->debug("ids_string is " . $paramhash->{ids_string});
21 #debug end#
23 return $self;
30 our @field_list = qw(
31 chr_clone_name
32 cornell_clone_name
33 arizona_clone_name
34 clone_type
35 org_name
36 accession_name
37 library_name
38 estimated_length
39 genbank_accession
42 sub process_parameters
44 my $self = shift;
46 return 0
47 unless length($self->{ids}) <= 1_000_000 && $self->{ids} =~ /\w/;
49 $self->{output_fields} = [grep $self->{$_} eq 'on', @field_list];
51 # clean up data retrieved
52 my $ids = $self->{ids};
53 $ids =~ s/[\n\s\r]+/ /g;
54 my @ids = grep $_, split /\s+/, $ids;
55 return 0 if @ids > 10_000; #limit to 10_000 ids to process
56 return 0 unless any(@ids);
58 $self->{ids} = \@ids;
60 return 1; #params were OK if we got here
63 sub process_ids
65 my $self = shift;
66 $self -> {query_start_time} = time();
67 my $dbh = $self->{db};
68 my @output_fields = @{$self -> {output_fields}};
69 my @notfound = ();
70 my @return_data = ();
71 my ($dump_fh, $notfound_fh) = $self -> create_dumpfile();
72 my @bac_output;
73 my $current_time= time() - $self -> {query_start_time};
74 $self->debug("Time point 1: $current_time");
76 my $foundcount=0;
77 my $notfoundcount=0;
78 my $count=0;
80 my $chado = CXGN::DB::DBICFactory->open_schema('Bio::Chado::Schema');
82 # iterate through identifiers
83 foreach my $id (@{$self->{ids}}) {
84 $count++;
86 my $clone = CXGN::Genomic::Clone->retrieve_from_clone_name($id);
88 #ask rob if parser should choke when given zero
89 unless ($clone) {
90 print $notfound_fh (">$id\n");
91 next;
93 my $lib = $clone->library_object;
95 # get organism name and accession
96 my (undef, $oname, $cname) = $lib->accession_name();
98 my %data;
99 @data{@field_list} = ($clone->clone_name_with_chromosome || '',
100 $clone->cornell_clone_name || '',
101 $clone->clone_name,
102 $clone->clone_type_object->name,
103 $oname,
104 $cname,
105 $lib->name,
106 $clone->estimated_length,
107 $clone->genbank_accession( $chado ),
109 my @dump_fields = grep $self->{$_},@field_list;
111 print $dump_fh join("\t", @data{@dump_fields})."\n";
114 $current_time = time() - $self->{query_start_time};
115 $self->debug("Time point 2: $current_time");
116 close $dump_fh;
117 close $notfound_fh;
119 $self->{foundcount}= $foundcount;
120 $self->{notfoundcount}= $notfoundcount;
121 $current_time = time() - $self->{query_start_time};
122 $self->{query_time} = time() - $self -> {query_start_time};
123 $self->debug("Time point 3: $current_time");