8 The CXGN::Bulk package is a superclass of all the different Bulk packages
9 used by the download.pl script. It is never instantiated itself, but contains
10 many methods its subclasses share.
20 use File
::Spec
::Functions
;
23 use File
::Slurp qw
/slurp/;
28 Args: hash reference containing parameters from user
31 All subclasses of Bulk share the same constructor. The constructor takes a reference to a hash
32 which contains the parameters as an argument and creates a new variable and value for every
33 key-value pair in the hash reference. It creates several other variables for linking to the
34 database and printing a web page.
42 foreach ( keys %$params ) {
43 $self->{$_} = $params->{$_};
45 $self->{tempdir
} or confess
"must provide a tempdir argument to bulk constructor";
46 -d
$self->{tempdir
} or confess
"no such directory $self->{tempdir}";
47 $self->{content
} = ""; # the content of the page
48 $self->{db
} = $self->{dbc
}
49 or confess
"must provide a dbc argument (database handle) to bulk constructor";
52 $self->{data
} = \
@empty_array;
57 =head2 create_dumpfile
59 Desc: sub create_dumpfile
61 Ret : file handle for dumpfile and notfound file
63 Creates a dumpfile and a notfound file and returns file handles for them.
70 my @output_fields = @
{ $self->{output_fields
} };
73 if ( !exists( $self->{dump_fh
} )
74 || !exists( $self->{notfound_fh
} ) )
77 # generate tmp file names and open files for writing
78 $self->{dumpfile
} = File
::Temp
->new( TEMPLATE
=> catfile
( $self->{tempdir
}, "bulk-XXXXXX" ), UNLINK
=> 0 )->filename;
79 $self->{dumpfile
} =~ s!$self->{tempdir}/!!;
81 $self->debug( "FILENAME: " . $self->{dumpfile
} );
82 my $filepath = catfile
($self->{tempdir
}, $self->{dumpfile
});
83 open( $self->{dump_fh
}, '>', $filepath )
84 || die "Can't open $filepath";
85 $self->{notfoundfile
} = $self->{dumpfile
} . ".notfound";
86 open( $self->{notfound_fh
}, '>', "$self->{tempdir}/$self->{notfoundfile}" )
87 || die "Can't open $self -> {notfoundfile}";
90 @output_fields = @
{ $self->{output_fields
} };
91 my $dump_fh = $self->{dump_fh
};
92 print $dump_fh join( "\t", @output_fields ) . "\n";
95 # warn "dumpfile: $filepath \n";
96 return $self->{dump_fh
}, $self->{notfound_fh
};
102 my $cache_root = catfile
($self->{tempdir
}, $self->{dumpfile
} . ".summary");
104 my $lines_read = $self->get_file_lines( catfile
($self->{tempdir
}, $self->{dumpfile
}) ) - 1;
106 my $notfoundcount = $self->get_file_lines( catfile
($self->{tempdir
}, $self->{notfoundfile
} ));
107 my $total = $lines_read;
108 my $file = $self->{dumpfile
};
109 my $notfoundfile = $self->{notfoundfile
};
110 my $idType = $self->{idType
};
111 my $query_time = $self->{query_time
};
112 my $seq_type = $self->{seq_type
};
113 my $est_seq = $self->{est_seq
};
114 my $unigene_seq = $self->{unigene_seq
};
116 (stat( catfile
($self->{tempdir
}, $self->{dumpfile
})))[7] / 1000;
117 my $missing_ids = @
{ $self->{ids
} } - $notfoundcount - $lines_read;
118 my $missing_ids_msg = "";
119 my $file_lines = 0; #fix
120 #$self->getFileLines( $self->{tempdir} . "/" . $self->{dumpfile} );
122 my $numlines = scalar( @
{ $self->{ids
} } );
123 if ( $missing_ids > 0 ) {
124 $missing_ids_msg = "$missing_ids ids were not retrieved from the database because they were not part of the corresponding unigene set or because there were duplicate entries in the submitted id list.";
127 my $fastalink = " Fasta ";
128 my $fastadownload = " Fasta ";
129 my $fastamessage = "Note: Fasta option is not available because you didn't
130 choose a sequence to download.<br />";
131 if ( join( " ", @
{ $self->{output_fields
} } ) =~ /seq/i ) {
133 "<a href=\"/tools/bulk/display?outputType=Fasta&dumpfile=$file&unigene_seq=$unigene_seq&est_seq=$est_seq\">Fasta</a>";
135 "<a href=\"/tools/bulk/display?outputType=Fasta&dumpfile=$file&unigene_seq=$unigene_seq&est_seq=$est_seq&download=1\">Fasta</a>";
139 my $cache = Cache
::File
->new( cache_root
=> $cache_root );
142 fastalink
=> $fastalink,
143 fastadownload
=> $fastadownload,
144 fastamessage
=> $fastamessage,
145 missing_ids_msg
=> $missing_ids_msg,
148 query_time
=> $query_time,
149 filesize
=> $filesize,
150 numlines
=> $numlines, # number of lines in query
151 lines_read
=> $lines_read, # number of lines in file
153 seq_type
=> $seq_type,
155 unigene_seq
=> $unigene_seq,
158 foreach my $k (keys(%$summary_data)) {
159 $cache->set($k, $summary_data->{$k});
166 Desc: sub error_message
170 This method is called by download.pl if the Bulk object has a problem with the
171 input parameters. It prints a page that contains an error message.
179 my $html = $self ->{content
} . "\n";
181 <h3>Bulk download error</h3>
182 I could not process your input. Possible reasons for this include:
184 <li>You may not have entered any identifiers for download</li>
185 <li>You may not have entered any identifiers that exist</li>
186 <li>Your identifiers may not be one-per-line, with no quotation marks, etc (see input example)</li>
187 <li>The input set may be larger than the limit of 10,000 identifiers</li>
189 Please check your input and use your browser\'s "back" button to go back and try again!
195 =head2 get_file_lines
197 Desc: sub get_file_lines
198 Args: file; example. $self -> get_file_lines($self->{tempdir});
201 Counts file lines (used on temp directories).
208 open my $f, $file or die "$! opening $file";
218 Args: default; example. $bulk -> clean_up();
221 Drops unigene, blast, and submitted_ids temp tables (i.e. cleans up after
228 $self->{db
}->disconnect();
234 Args: string; example. $self -> debug("input_ok: Input is NOT OK!");
237 Function for printing adds break and new line to messages.
244 #print messages if debug flag is set
246 if ( $self->{debug
} ) {
248 <h4>Bulk download summary</h4>
258 Ret : @ids, array of IDs from input
260 A common method between all types of Bulk objects, it checks that there are
261 less than 10,000 IDs from input and puts them all into an array, returning
262 the array of ids if everything went well, otherwise 0.
270 #do some simple parameter checking
272 print STDERR
"PROCESSING IDS: $self->{ids}. ($self->{idType})\n";
274 return @ids if ( $self->{idType
} eq "" );
275 return @ids if ( $self->{ids
} !~ /\w/ );
277 #make sure the input string isn't too big
278 return @ids if length( $self->{ids
} ) > 1_000_000
;
281 # clean up data retrieved
282 my $ids = $self->{ids
};
285 $ids =~ s/\s+/ /g; # compress multiple returns into one
286 $ids =~ s/\r+/ /g; # convert carriage returns to space
287 @ids = split /\s+/, $ids;
288 @ids = () if @ids > 10_000
; #limit to 10_000 ids to process
292 =head2 process_parameters
294 Desc: sub process_parameters
296 Ret : 1 if the parameters were OK, 0 if not
298 Modifies some of the parameters received set in get_parameters. Preparing
299 data for the database query.
303 sub process_parameters
{
309 Desc: sub process_ids
311 Ret : data from database printed to a file;
313 Queries database using Persistent (see perldoc Persistent) and
314 object oriented perl to obtain data on Bulk Objects using formatted
323 =head2 accessors get_dumpfile, set_dumpfile
335 return $self->{dumpfile
};
340 $self->{dumpfile
} = shift;
343 =head2 accessors get_notfoundfile, set_notfoundfile
353 sub get_notfoundfile
{
355 return $self->{notfoundfile
};
358 sub set_notfoundfile
{
360 $self->{notfoundfile
} = shift;
363 =head2 accessors get_tempdir, set_tempdir
375 return $self->{tempdir
};
380 $self->{tempdir
} = shift;