Bio::Tools::CodonTable and Bio::Tools::IUPAC: prepare with dzil.
[bioperl-live.git] / lib / Bio / Root / Utilities.pm
blobed4bb71226533931fd3e05b3da2da46ea4e04953
1 package Bio::Root::Utilities;
3 use strict;
4 use Bio::Root::IO;
5 use Bio::Root::Exception;
6 use base qw(Bio::Root::Root Exporter);
8 =head1 NAME
10 Bio::Root::Utilities - general-purpose utilities
12 =head1 SYNOPSIS
14 =head2 Object Creation
16 # Using the supplied singleton object:
17 use Bio::Root::Utilities qw(:obj);
18 $Util->some_method();
20 # Create an object manually:
21 use Bio::Root::Utilities;
22 my $util = Bio::Root::Utilities->new();
23 $util->some_method();
25 $date_stamp = $Util->date_format('yyy-mm-dd');
27 $clean = $Util->untaint($dirty);
29 $compressed = $Util->compress('/home/me/myfile.txt')
31 my ($mean, $stdev) = $Util->mean_stdev( @data );
33 $Util->authority("me@example.com");
34 $Util->mail_authority("Something you should know about...");
36 ...and a host of other methods. See below.
38 =head1 DESCRIPTION
40 Provides general-purpose utilities of potential interest to any Perl script.
42 The C<:obj> tag is a convenience that imports a $Util symbol into your
43 namespace representing a Bio::Root::Utilities object. This saves you
44 from creating your own Bio::Root::Utilities object via
45 C<Bio::Root::Utilities-E<gt>new()> or from prefixing all method calls with
46 C<Bio::Root::Utilities>, though feel free to do these things if desired.
47 Since there should normally not be a need for a script to have more
48 than one Bio::Root::Utilities object, this module thus comes with it's
49 own singleton.
51 =head1 INSTALLATION
53 This module is included with the central Bioperl distribution:
55 http://www.bioperl.org/wiki/Getting_BioPerl
56 ftp://bio.perl.org/pub/DIST
58 Follow the installation instructions included in the README file.
60 =head1 DEPENDENCIES
62 Inherits from L<Bio::Root::Root>, and uses L<Bio::Root::IO>
63 and L<Bio::Root::Exception>.
65 Relies on external executables for file compression/uncompression
66 and sending mail. No paths to these are hard coded but are located
67 as needed.
69 =head1 SEE ALSO
71 http://bioperl.org - Bioperl Project Homepage
73 =head1 ACKNOWLEDGEMENTS
75 This module was originally developed under the auspices of the
76 Saccharomyces Genome Database: http://www.yeastgenome.org/
78 =head1 AUTHOR Steve Chervitz
80 =cut
82 use vars qw(@EXPORT_OK %EXPORT_TAGS);
83 @EXPORT_OK = qw($Util);
84 %EXPORT_TAGS = ( obj => [qw($Util)],
85 std => [qw($Util)],);
87 use vars qw($ID $Util $GNU_PATH $TIMEOUT_SECS
88 @COMPRESSION_UTILS @UNCOMPRESSION_UTILS
89 $DEFAULT_NEWLINE $NEWLINE $AUTHORITY
90 @MONTHS @DAYS $BASE_YEAR $DEFAULT_CENTURY
93 $ID = 'Bio::Root::Utilities';
94 # Number of seconds to wait before timing out when reading input (taste_file())
95 $TIMEOUT_SECS = 30;
96 $NEWLINE = $ENV{'NEWLINE'} || undef;
97 $BASE_YEAR = 1900; # perl's localtime() assumes this for it's year data.
98 # TODO: update this every hundred years. Y2K-sensitive code.
99 $DEFAULT_CENTURY = $BASE_YEAR + 100;
100 @MONTHS = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
101 @DAYS = qw(Sun Mon Tue Wed Thu Fri Sat);
102 # Sets the preference for compression utilities to be used by compress().
103 # The first executable in this list to be found in the current PATH will be used,
104 # unless overridden in the call to that function. See docs for details.
105 @COMPRESSION_UTILS = qw(gzip bzip2 zip compress);
106 @UNCOMPRESSION_UTILS = qw(gunzip gzip bunzip2 unzip uncompress);
108 # Default person to receive feedback from users and possibly automatic error messages.
109 $AUTHORITY = '';
111 # Note: $GNU_PATH is now deprecated, shouldn't be needed since now this module
112 # will automatically locate the compression utility in the current PATH.
113 # Retaining $GNU_PATH for backward compatibility.
115 # $GNU_PATH points to the directory containing the gzip and gunzip
116 # executables. It may be required for executing gzip/gunzip
117 # in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
118 # Customize $GNU_PATH for your site if the compress() or
119 # uncompress() functions are generating exceptions.
120 $GNU_PATH = '';
121 #$GNU_PATH = '/tools/gnu/bin/';
123 $DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason)
125 ## Static UTIL object.
126 $Util = Bio::Root::Root->new();
129 =head2 date_format
131 Title : date_format
132 Usage : $Util->date_format( [FMT], [DATE])
133 Purpose : -- Get a string containing the formatted date or time
134 : taken when this routine is invoked.
135 : -- Provides a way to avoid using `date`.
136 : -- Provides an interface to localtime().
137 : -- Interconverts some date formats.
139 : (For additional functionality, use Date::Manip or
140 : Date::DateCalc available from CPAN).
141 Example : $Util->date_format();
142 : $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
143 Returns : String (unless 'list' is provided as argument, see below)
145 : 'yyyy-mm-dd' = 1996-05-03 # default format.
146 : 'yyyy-dd-mm' = 1996-03-05
147 : 'yyyy-mmm-dd' = 1996-May-03
148 : 'd-m-y' = 3-May-1996
149 : 'd m y' = 3 May 1996
150 : 'dmy' = 3may96
151 : 'mdy' = May 3, 1996
152 : 'ymd' = 96may3
153 : 'md' = may3
154 : 'year' = 1996
155 : 'hms' = 23:01:59 # when not converting a format, 'hms' can be
156 : # tacked on to any of the above options
157 : # to add the time stamp: eg 'dmyhms'
158 : 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
159 : 'list' = the contents of localtime(time) in an array.
160 Argument : (all are optional)
161 : FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
162 : mdy | ymd | md | d-m-y | hms | hm
163 : ('hms' may be appended to any of these to
164 : add a time stamp)
166 : DATE = String containing date to be converted.
167 : Acceptable input formats:
168 : 12/1/97 (for 1 December 1997)
169 : 1997-12-01
170 : 1997-Dec-01
171 Throws :
172 Comments : If you don't care about formatting or using backticks, you can
173 : always use: $date = `date`;
175 : For more features, use Date::Manip.pm, (which I should
176 : probably switch to...)
178 See Also : L<file_date()|file_date>, L<month2num()|month2num>
180 =cut
182 #---------------'
183 sub date_format {
184 #---------------
185 my $self = shift;
186 my $option = shift;
187 my $date = shift; # optional date to be converted.
189 my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
191 $option ||= 'yyyy-mm-dd';
193 my ($month_txt, $day_txt, $month_num, $fullYear);
194 my ($converting, @date);
196 # Load a supplied date for conversion:
197 if(defined($date) && ($date =~ /[\D-]+/)) {
198 $converting = 1;
199 if( $date =~ m{/}) {
200 ($mon,$mday,$year) = split(m{/}, $date);
201 } elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
202 ($year,$mon,$mday) = ($1, $2, $3);
203 } elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
204 ($year,$mon,$mday) = ($1, $2, $3);
205 $mon = $self->month2num($2);
206 } else {
207 print STDERR "\n*** Unsupported input date format: $date\n";
209 if(length($year) == 4) {
210 $fullYear = $year;
211 $year = substr $year, 2;
212 } else {
213 # Heuristics to guess what century was intended when a 2-digit year is given
214 # If number is over 50, assume it's for prev century; under 50 = default century.
215 # TODO: keep an eye on this Y2K-sensitive code
216 if ($year > 50) {
217 $fullYear = $DEFAULT_CENTURY + $year - 100;
218 } else {
219 $fullYear = $DEFAULT_CENTURY + $year;
222 $mon -= 1;
223 } else {
224 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date =
225 localtime(($date ? $date : time()));
226 return @date if $option =~ /list/i;
227 $fullYear = $BASE_YEAR+$year;
229 $month_txt = $MONTHS[$mon];
230 $day_txt = $DAYS[$wday] if defined $wday;
231 $month_num = $mon+1;
233 # print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
235 if( $option =~ /yyyy-mm-dd/i ) {
236 $date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday;
237 } elsif( $option =~ /yyyy-dd-mm/i ) {
238 $date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num;
239 } elsif( $option =~ /yyyy-mmm-dd/i ) {
240 $date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday;
241 } elsif( $option =~ /full|unix/i ) {
242 $date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear;
243 } elsif( $option =~ /mdy/i ) {
244 $date = "$month_txt $mday, $fullYear";
245 } elsif( $option =~ /ymd/i ) {
246 $date = $year."\l$month_txt$mday";
247 } elsif( $option =~ /dmy/i ) {
248 $date = $mday."\l$month_txt$year";
249 } elsif( $option =~ /md/i ) {
250 $date = "\l$month_txt$mday";
251 } elsif( $option =~ /d-m-y/i ) {
252 $date = "$mday-$month_txt-$fullYear";
253 } elsif( $option =~ /d m y/i ) {
254 $date = "$mday $month_txt $fullYear";
255 } elsif( $option =~ /year/i ) {
256 $date = $fullYear;
257 } elsif( $option =~ /dmy/i ) {
258 $date = $mday.'-'.$month_txt.'-'.$fullYear;
259 } elsif($option and $option !~ /hms/i) {
260 print STDERR "\n*** Unrecognized date format request: $option\n";
263 if( $option =~ /hms/i and not $converting) {
264 $date .= " $hour:$min:$sec" if $date;
265 $date ||= "$hour:$min:$sec";
268 return $date || join(" ", @date);
272 =head2 month2num
274 Title : month2num
275 Purpose : Converts a string containing a name of a month to integer
276 : representing the number of the month in the year.
277 Example : $Util->month2num("march"); # returns 3
278 Argument : The string argument must contain at least the first
279 : three characters of the month's name. Case insensitive.
280 Throws : Exception if the conversion fails.
282 =cut
284 #--------------'
285 sub month2num {
286 #--------------
287 my ($self, $str) = @_;
289 # Get string in proper format for conversion.
290 $str = substr($str, 0, 3);
291 for my $month (0..$#MONTHS) {
292 return $month+1 if $str =~ /$MONTHS[$month]/i;
294 $self->throw("Invalid month name: $str");
297 =head2 num2month
299 Title : num2month
300 Purpose : Does the opposite of month2num.
301 : Converts a number into a string containing a name of a month.
302 Example : $Util->num2month(3); # returns 'Mar'
303 Throws : Exception if supplied number is out of range.
305 =cut
307 #-------------
308 sub num2month {
309 #-------------
310 my ($self, $num) = @_;
312 $self->throw("Month out of range: $num") if $num < 1 or $num > 12;
313 return $MONTHS[$num-1];
316 =head2 compress
318 Title : compress
319 Usage : $Util->compress(full-path-filename);
320 : $Util->compress(<named parameters>);
321 Purpose : Compress a file.
322 Example : $Util->compress("/usr/people/me/data.txt");
323 : $Util->compress(-file=>"/usr/people/me/data.txt",
324 : -tmp=>1,
325 : -outfile=>"/usr/people/share/data.txt.gz",
326 : -exe=>"/usr/local/bin/fancyzip");
327 Returns : String containing full, absolute path to compressed file
328 Argument : Named parameters (case-insensitive):
329 : -FILE => String (name of file to be compressed, full path).
330 : If the supplied filename ends with '.gz' or '.Z',
331 : that extension will be removed before attempting to compress.
332 : Optional:
333 : -TMP => boolean. If true, (or if user is not the owner of the file)
334 : the file is compressed to a temp file. If false, file may be
335 : clobbered with the compressed version (if using a utility like
336 : gzip, which is the default)
337 : -OUTFILE => String (name of the output compressed file, full path).
338 : -EXE => Name of executable for compression utility to use.
339 : Will supersede those in @COMPRESSION_UTILS defined by
340 : this module. If the absolute path to the executable is not provided,
341 : it will be searched in the PATH env variable.
342 Throws : Exception if file cannot be compressed.
343 : If user is not owner of the file, generates a warning and compresses to
344 : a tmp file. To avoid this warning, use the -o file test operator
345 : and call this function with -TMP=>1.
346 Comments : Attempts to compress using utilities defined in the @COMPRESSION_UTILS
347 : defined by this module, in the order defined. The first utility that is
348 : found to be executable will be used. Any utility defined in optional -EXE param
349 : will be tested for executability first.
350 : To minimize security risks, the -EXE parameter value is untained using
351 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
353 See Also : L<uncompress()|uncompress>
355 =cut
357 #------------'
358 sub compress {
359 #------------
360 my ($self, @args) = @_;
361 # This method formerly didn't use named params and expected fileName, tmp
362 # in that order. This should be backward compatibile.
363 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
364 my ($file, $get, $fmt);
366 # in case the supplied name already has a compressed extension
367 if($fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName =~ s/$1$//; };
368 $self->debug("compressing file $fileName");
370 my @util_to_use = @COMPRESSION_UTILS;
372 if (defined $exe){
373 $exe = $self->untaint($exe, 1);
374 unshift @util_to_use, $exe;
377 my @checked = @util_to_use;
378 $exe ||= '';
379 while (not -x $exe and scalar(@util_to_use)) {
380 $exe = $self->find_exe(shift @util_to_use);
383 unless (-x $exe) {
384 $self->throw("Can't find compression utility. Looked for @checked");
387 my ($compressed, @cmd, $handle);
389 if(defined($outfile) or $tmp or not -o $fileName) {
390 if (defined $outfile) {
391 $compressed = $outfile;
392 } else {
393 # obtain a temporary file name (not using the handle)
394 # and insert some special text to flag it as a bioperl-based temp file
395 my $io = Bio::Root::IO->new();
396 ($handle, $compressed) = $io->tempfile();
397 $compressed .= '.tmp.bioperl.gz';
400 # Use double quotes if executable path have empty spaces
401 if ($exe =~ m/ /) {
402 $exe = "\"$exe\"";
405 if ($exe =~ /gzip|bzip2|compress/) {
406 @cmd = ("$exe -f < \"$fileName\" > \"$compressed\"");
407 } elsif ($exe eq 'zip') {
408 @cmd = ("$exe -r \"$fileName.zip\" \"$fileName\"");
410 not $tmp and
411 $self->warn("Not owner of file $fileName. Compressing to temp file $compressed.");
412 $tmp = 1;
413 } else {
414 # Need to compute the compressed name based on exe since we're returning it.
415 $compressed = $fileName;
416 if ($exe =~ /gzip/) {
417 $compressed .= '.gz';
418 } elsif ($exe =~ /bzip2/) {
419 $compressed .= '.bz2';
420 } elsif ($exe =~ /zip/) {
421 $compressed .= '.zip';
422 } elsif ($exe =~ /compress/) {
423 $compressed .= '.Z';
425 if ($exe =~ /gzip|bzip2|compress/) {
426 @cmd = ($exe, '-f', $fileName);
427 } elsif ($exe eq 'zip') {
428 @cmd = ($exe, '-r', "$compressed", $fileName);
432 if(system(@cmd) != 0) {
433 $self->throw( -class => 'Bio::Root::SystemException',
434 -text => "Failed to compress file $fileName using $exe: $!");
437 return $compressed;
440 =head2 uncompress
442 Title : uncompress
443 Usage : $Util->uncompress(full-path-filename);
444 : $Util->uncompress(<named parameters>);
445 Purpose : Uncompress a file.
446 Example : $Util->uncompress("/usr/people/me/data.txt");
447 : $Util->uncompress(-file=>"/usr/people/me/data.txt.gz",
448 : -tmp=>1,
449 : -outfile=>"/usr/people/share/data.txt",
450 : -exe=>"/usr/local/bin/fancyzip");
451 Returns : String containing full, absolute path to uncompressed file
452 Argument : Named parameters (case-insensitive):
453 : -FILE => String (name of file to be uncompressed, full path).
454 : If the supplied filename ends with '.gz' or '.Z',
455 : that extension will be removed before attempting to uncompress.
456 : Optional:
457 : -TMP => boolean. If true, (or if user is not the owner of the file)
458 : the file is uncompressed to a temp file. If false, file may be
459 : clobbered with the uncompressed version (if using a utility like
460 : gzip, which is the default)
461 : -OUTFILE => String (name of the output uncompressed file, full path).
462 : -EXE => Name of executable for uncompression utility to use.
463 : Will supersede those in @UNCOMPRESSION_UTILS defined by
464 : this module. If the absolute path to the executable is not provided,
465 : it will be searched in the PATH env variable.
466 Throws : Exception if file cannot be uncompressed.
467 : If user is not owner of the file, generates a warning and uncompresses to
468 : a tmp file. To avoid this warning, use the -o file test operator
469 : and call this function with -TMP=>1.
470 Comments : Attempts to uncompress using utilities defined in the @UNCOMPRESSION_UTILS
471 : defined by this module, in the order defined. The first utility that is
472 : found to be executable will be used. Any utility defined in optional -EXE param
473 : will be tested for executability first.
474 : To minimize security risks, the -EXE parameter value is untained using
475 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
477 See Also : L<compress()|compress>
479 =cut
481 #------------'
482 sub uncompress {
483 #------------
484 my ($self, @args) = @_;
485 # This method formerly didn't use named params and expected fileName, tmp
486 # in that order. This should be backward compatibile.
487 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
488 my ($file, $get, $fmt);
490 # in case the supplied name lacks a compressed extension
491 if(not $fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName .= $1; };
492 $self->debug("uncompressing file $fileName");
494 my @util_to_use = @UNCOMPRESSION_UTILS;
496 if (defined $exe){
497 $exe = $self->untaint($exe, 1);
498 unshift @util_to_use, $exe;
501 $exe ||= '';
502 while (not -x $exe and scalar(@util_to_use)) {
503 $exe = $self->find_exe(shift @util_to_use);
506 unless (-x $exe) {
507 $self->throw("Can't find compression utility. Looked for @util_to_use");
510 my ($uncompressed, @cmd, $handle);
512 $uncompressed = $fileName;
513 $uncompressed =~ s/\.\w+$//;
515 if(defined($outfile) or $tmp or not -o $fileName) {
516 if (defined $outfile) {
517 $uncompressed = $outfile;
518 } else {
519 # obtain a temporary file name (not using the handle)
520 my $io = Bio::Root::IO->new();
521 ($handle, $uncompressed) = $io->tempfile();
522 # insert some special text to flag it as a bioperl-based temp file
523 $uncompressed .= '.tmp.bioperl';
526 # Use double quotes if executable path have empty spaces
527 if ($exe =~ m/ /) {
528 $exe = "\"$exe\"";
531 if ($exe =~ /gunzip|bunzip2|uncompress/) {
532 @cmd = ("$exe -f < \"$fileName\" > \"$uncompressed\"");
533 } elsif ($exe =~ /gzip/) {
534 @cmd = ("$exe -df < \"$fileName\" > \"$uncompressed\"");
535 } elsif ($exe eq 'unzip') {
536 @cmd = ("$exe -p \"$fileName\" > \"$uncompressed\"");
538 not $tmp and
539 $self->warn("Not owner of file $fileName. Uncompressing to temp file $uncompressed.");
540 $tmp = 1;
541 } else {
542 if ($exe =~ /gunzip|bunzip2|uncompress/) {
543 @cmd = ($exe, '-f', $fileName);
544 } elsif ($exe =~ /gzip/) {
545 @cmd = ($exe, '-df', $fileName);
546 } elsif ($exe eq 'zip') {
547 @cmd = ($exe, $fileName);
551 if(system(@cmd) != 0) {
552 $self->throw( -class => 'Bio::Root::SystemException',
553 -text => "Failed to uncompress file $fileName using $exe: $!");
556 return $uncompressed;
560 =head2 file_date
562 Title : file_date
563 Usage : $Util->file_date( filename [,date_format])
564 Purpose : Obtains the date of a given file.
565 : Provides flexible formatting via date_format().
566 Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
567 Argument : filename = string, full path name for file
568 : date_format = string, desired format for date (see date_format()).
569 : Default = yyyy-mm-dd
570 Thows : Exception if no file is provided or does not exist.
571 Comments : Uses the mtime field as obtained by stat().
573 =cut
575 #--------------
576 sub file_date {
577 #--------------
578 my ($self, $file, $fmt) = @_;
580 $self->throw("No such file: $file") if not $file or not -e $file;
582 $fmt ||= 'yyyy-mm-dd';
584 my @file_data = stat($file);
585 return $self->date_format($fmt, $file_data[9]); # mtime field
589 =head2 untaint
591 Title : untaint
592 Purpose : To remove nasty shell characters from untrusted data
593 : and allow a script to run with the -T switch.
594 : Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
595 : Accept only the first block of contiguous characters:
596 : Default allowed chars = "-\w.', ()"
597 : If $relax is true = "-\w.', ()\/=%:^<>*"
598 Usage : $Util->untaint($value, $relax)
599 Returns : String containing the untained data.
600 Argument: $value = string
601 : $relax = boolean
602 Comments:
603 This general untaint() function may not be appropriate for every situation.
604 To allow only a more restricted subset of special characters
605 (for example, untainting a regular expression), then using a custom
606 untainting mechanism would permit more control.
608 Note that special trusted vars (like $0) require untainting.
610 =cut
612 #------------`
613 sub untaint {
614 #------------
615 my($self,$value,$relax) = @_;
616 $relax ||= 0;
617 my $untainted;
619 $self->debug("\nUNTAINT: $value\n");
621 unless (defined $value and $value ne '') {
622 return $value;
625 if( $relax ) {
626 $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
627 $untainted = $1
628 # } elsif( $relax == 2 ) { # Could have several degrees of relax.
629 # $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
630 # $untainted = $1
631 } else {
632 $value =~ /([-\w.\', ()]+)/;
633 $untainted = $1
636 $self->debug("UNTAINTED: $untainted\n");
638 $untainted;
642 =head2 mean_stdev
644 Title : mean_stdev
645 Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
646 Purpose : Calculates the mean and standard deviation given a list of numbers.
647 Returns : 2-element list (mean, stdev)
648 Argument : list of numbers (ints or floats)
649 Thows : n/a
651 =cut
653 #---------------
654 sub mean_stdev {
655 #---------------
656 my ($self, @data) = @_;
657 return (undef, undef) if not @data; # case of empty @data list
658 my $mean = 0;
659 my $N = 0;
660 foreach my $num (@data) {
661 $mean += $num;
662 $N++
664 $mean /= $N;
665 my $sum_diff_sqd = 0;
666 foreach my $num (@data) {
667 $sum_diff_sqd += ($mean - $num) * ($mean - $num);
669 # if only one element in @data list, unbiased stdev is undefined
670 my $stdev = $N <= 1 ? undef : sqrt( $sum_diff_sqd / ($N-1) );
671 return ($mean, $stdev);
675 =head2 count_files
677 Title : count_files
678 Purpose : Counts the number of files/directories within a given directory.
679 : Also reports the number of text and binary files in the dir
680 : as well as names of these files and directories.
681 Usage : count_files(\%data)
682 : $data{-DIR} is the directory to be analyzed. Default is ./
683 : $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
684 Argument : Hash reference (empty)
685 Returns : n/a;
686 : Modifies the hash ref passed in as the sole argument.
687 : $$href{-TOTAL} scalar
688 : $$href{-NUM_TEXT_FILES} scalar
689 : $$href{-NUM_BINARY_FILES} scalar
690 : $$href{-NUM_DIRS} scalar
691 : $$href{-T_FILE_NAMES} array ref
692 : $$href{-B_FILE_NAMES} array ref
693 : $$href{-DIRNAMES} array ref
695 =cut
697 #----------------
698 sub count_files {
699 #----------------
700 my $self = shift;
701 my $href = shift; # Reference to an empty hash.
702 my( $name, @fileLine);
703 my $dir = $$href{-DIR} || './'; # THIS IS UNIX SPECIFIC? FIXME/TODO
704 my $print = $$href{-PRINT} || 0;
706 ### Make sure $dir ends with /
707 $dir !~ m{/$} and do{ $dir .= '/'; $$href{-DIR} = $dir; };
709 open ( my $PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!");
711 ### Initialize the hash data.
712 $$href{-TOTAL} = 0;
713 $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} = $$href{-NUM_DIRS} = 0;
714 $$href{-T_FILE_NAMES} = [];
715 $$href{-B_FILE_NAMES} = [];
716 $$href{-DIR_NAMES} = [];
717 while( my $line = <$PIPE> ) {
718 chomp();
719 $$href{-TOTAL}++;
720 if( -T $dir.$line ) {
721 $$href{-NUM_TEXT_FILES}++;
722 push @{$$href{-T_FILE_NAMES}}, $line; }
723 if( -B $dir.$line and not -d $dir.$line) {
724 $$href{-NUM_BINARY_FILES}++;
725 push @{$$href{-B_FILE_NAMES}}, $line; }
726 if( -d $dir.$line ) {
727 $$href{-NUM_DIRS}++;
728 push @{$$href{-DIR_NAMES}}, $line; }
730 close $PIPE;
732 if( $print) {
733 printf( "\n%4d %s\n", $$href{-TOTAL}, "total files+dirs in $dir");
734 printf( "%4d %s\n", $$href{-NUM_TEXT_FILES}, "text files");
735 printf( "%4d %s\n", $$href{-NUM_BINARY_FILES}, "binary files");
736 printf( "%4d %s\n", $$href{-NUM_DIRS}, "directories");
741 =head2 file_info
743 Title : file_info
744 Purpose : Obtains a variety of date for a given file.
745 : Provides an interface to Perl's stat().
746 Status : Under development. Not ready. Don't use!
748 =cut
750 #--------------
751 sub file_info {
752 #--------------
753 my ($self, %param) = @_;
754 my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
755 $get ||= 'all';
756 $fmt ||= 'yyyy-mm-dd';
758 my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
759 $atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
761 if($get =~ /date/i) {
762 ## I can get the elapsed time since the file was modified but
763 ## it's not so straightforward to get the date in a nice format...
764 ## Think about using a standard CPAN module for this, like
765 ## Date::Manip or Date::DateCalc.
767 my $date = $mtime;
768 my $elsec = time - $mtime;
769 printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN>;
770 my $days = sprintf "%.0f", $elsec/(3600*24);
771 } elsif($get eq 'all') {
772 return stat $file;
776 =head2 delete
778 Title : delete
779 Purpose :
781 =cut
783 #------------
784 sub delete {
785 #------------
786 my $self = shift;
787 my $fileName = shift;
788 if(not -e $fileName) {
789 $self->throw("Could not delete file '$fileName': Does not exist.");
790 } elsif(not -o $fileName) {
791 $self->throw("Could not delete file '$fileName': Not owner.");
793 my $ulval = unlink($fileName) > 0
794 or $self->throw("Failed to delete file '$fileName': $!");
798 =head2 create_filehandle
800 Usage : $object->create_filehandle(<named parameters>);
801 Purpose : Create a FileHandle object from a file or STDIN.
802 : Mainly used as a helper method by read() and get_newline().
803 Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
804 Argument : Named parameters (case-insensitive):
805 : (all optional)
806 : -CLIENT => object reference for the object submitting
807 : the request. Default = $Util.
808 : -FILE => string (full path to file) or a reference
809 : to a FileHandle object or typeglob. This is an
810 : optional parameter (if not defined, STDIN is used).
811 Returns : Reference to a FileHandle object.
812 Throws : Exception if cannot open a supplied file or if supplied with a
813 : reference that is not a FileHandle ref.
814 Comments : If given a FileHandle reference, this method simply returns it.
815 : This method assumes the user wants to read ascii data. So, if
816 : the file is binary, it will be treated as a compressed (gzipped)
817 : file and access it using gzip -ce. The problem here is that not
818 : all binary files are necessarily compressed. Therefore,
819 : this method should probably have a -mode parameter to
820 : specify ascii or binary.
822 See Also : L<get_newline()|get_newline>
824 =cut
826 #---------------------
827 sub create_filehandle {
828 #---------------------
829 my($self, @param) = @_;
830 my($client, $file, $handle) =
831 $self->_rearrange([qw( CLIENT FILE HANDLE )], @param);
833 if(not ref $client) { $client = $self; }
834 $file ||= $handle;
835 if( $client->can('file')) {
836 $file = $client->file($file);
839 my $FH;
840 my ($handle_ref);
842 if($handle_ref = ref($file)) {
843 if($handle_ref eq 'FileHandle') {
844 $FH = $file;
845 $client->{'_input_type'} = "FileHandle";
846 } elsif($handle_ref eq 'GLOB') {
847 $FH = $file;
848 $client->{'_input_type'} = "Glob";
849 } else {
850 $self->throw(-class => 'Bio::Root::IOException',
851 -text => "Could not read file '$file': Not a FileHandle or GLOB ref.");
853 $self->verbose > 0 and printf STDERR "$ID: reading data from FileHandle\n";
855 } elsif($file) {
856 $client->{'_input_type'} = "FileHandle for $file";
858 # Use gzip -cd to access compressed data.
859 if( -B $file ) {
860 $client->{'_input_type'} .= " (compressed)";
861 my $gzip = $self->find_exe('gzip');
862 $file = "$gzip -cd $file |"
865 require FileHandle;
866 $FH = FileHandle->new();
867 open ($FH, $file) || $self->throw(-class=>'Bio::Root::FileOpenException',
868 -text =>"Could not access data file '$file': $!");
869 $self->verbose > 0 and printf STDERR "$ID: reading data from file '$file'\n";
871 } else {
872 # Read from STDIN.
873 $FH = \*STDIN;
874 $self->verbose > 0 and printf STDERR "$ID: reading data from STDIN\n";
875 $client->{'_input_type'} = "STDIN";
878 return $FH;
881 =head2 get_newline
883 Usage : $object->get_newline(<named parameters>);
884 Purpose : Determine the character(s) used for newlines in a given file or
885 : input stream. Delegates to Bio::Root::Utilities::get_newline()
886 Example : $data = $object->get_newline(-CLIENT => $anObj,
887 : -FILE =>'usr/people/me/data.txt')
888 Argument : Same arguemnts as for create_filehandle().
889 Returns : Reference to a FileHandle object.
890 Throws : Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
892 See Also : L<taste_file()|taste_file>, L<create_filehandle()|create_filehandle>
894 =cut
896 #-----------------
897 sub get_newline {
898 #-----------------
899 my($self, @param) = @_;
901 return $NEWLINE if defined $NEWLINE;
903 my($client ) =
904 $self->_rearrange([qw( CLIENT )], @param);
906 my $FH = $self->create_filehandle(@param);
908 if(not ref $client) { $client = $self; }
910 if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) {
911 # Can't taste from STDIN since we can't seek 0 on it.
912 # Are other non special Glob refs seek-able?
913 # Attempt to guess newline based on platform.
914 # Not robust since we could be reading Unix files on a Mac, e.g.
915 if(defined $ENV{'MACPERL'}) {
916 $NEWLINE = "\015"; # \r
917 } else {
918 $NEWLINE = "\012"; # \n
920 } else {
921 $NEWLINE = $self->taste_file($FH);
924 close ($FH) unless ($client->{'_input_type'} eq 'STDIN' ||
925 $client->{'_input_type'} eq 'FileHandle' ||
926 $client->{'_input_type'} eq 'Glob' );
928 delete $client->{'_input_type'};
930 return $NEWLINE || $DEFAULT_NEWLINE;
934 =head2 taste_file
936 Usage : $object->taste_file( <FileHandle> );
937 : Mainly a utility method for get_newline().
938 Purpose : Sample a filehandle to determine the character(s) used for a newline.
939 Example : $char = $Util->taste_file($FH)
940 Argument : Reference to a FileHandle object.
941 Returns : String containing an octal represenation of the newline character string.
942 : Unix = "\012" ("\n")
943 : Win32 = "\012\015" ("\r\n")
944 : Mac = "\015" ("\r")
945 Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
946 : Exception if argument is not FileHandle object reference.
947 : Warning if cannot determine neewline char(s).
948 Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
950 See Also : L<get_newline()|get_newline>
952 =cut
954 #---------------
955 sub taste_file {
956 #---------------
957 my ($self, $FH) = @_;
958 my $BUFSIZ = 256; # Number of bytes read from the file handle.
959 my ($buffer, $octal, $str, $irs, $i);
961 ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref");
963 $buffer = '';
965 # this is a quick hack to check for availability of alarm(); just copied
966 # from Bio/Root/IOManager.pm HL 02/19/01
967 my $alarm_available = 1;
968 eval {
969 alarm(0);
971 if($@) {
972 # alarm() not available (ActiveState perl for win32 doesn't have it.
973 # See jitterbug PR#98)
974 $alarm_available = 0;
976 $SIG{ALRM} = sub { die "Timed out!"; };
977 my $result;
978 eval {
979 $alarm_available && alarm( $TIMEOUT_SECS );
980 $result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file
981 $alarm_available && alarm(0);
983 if($@ =~ /Timed out!/) {
984 $self->throw( "Timed out while waiting for input.",
985 "Timeout period = $TIMEOUT_SECS seconds.\n"
986 ."For longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Utilities.pm.");
988 } elsif(not $result) {
989 my $err = $@;
990 $self->throw("read taste failed to read from FileHandle.", $err);
992 } elsif($@ =~ /\S/) {
993 my $err = $@;
994 $self->throw("Unexpected error during read: $err");
997 seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle.");
999 my @chars = split(//, $buffer);
1000 my $flavor;
1002 for ($i = 0; $i <$BUFSIZ; $i++) {
1003 if (($chars[$i] eq "\012")) {
1004 unless ($chars[$i-1] eq "\015") {
1005 $flavor='Unix';
1006 $octal = "\012";
1007 $str = '\n';
1008 $irs = "^J";
1009 last;
1011 } elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) {
1012 $flavor='DOS';
1013 $octal = "\015\012";
1014 $str = '\r\n';
1015 $irs = "^M^J";
1016 last;
1017 } elsif (($chars[$i] eq "\015")) {
1018 $flavor='Mac';
1019 $octal = "\015";
1020 $str = '\r';
1021 $irs = "^M";
1022 last;
1025 if (not $octal) {
1026 $self->warn("Could not determine newline char. Using '\012'");
1027 $octal = "\012";
1028 } else {
1029 #print STDERR "FLAVOR=$flavor, NEWLINE CHAR = $irs\n";
1031 return($octal);
1034 =head2 file_flavor
1036 Usage : $object->file_flavor( <filename> );
1037 Purpose : Returns the 'flavor' of a given file (unix, dos, mac)
1038 Example : print "$file has flavor: ", $Util->file_flavor($file);
1039 Argument : filename = string, full path name for file
1040 Returns : String describing flavor of file and handy info about line endings.
1041 : One of these is returned:
1042 : unix (\n or 012 or ^J)
1043 : dos (\r\n or 015,012 or ^M^J)
1044 : mac (\r or 015 or ^M)
1045 : unknown
1046 Throws : Exception if argument is not a file
1047 : Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
1049 See Also : L<get_newline()|get_newline>, L<taste_file()|taste_file>
1051 =cut
1053 #---------------
1054 sub file_flavor {
1055 #---------------
1056 my ($self, $file) = @_;
1057 my %flavors=("\012" =>'unix (\n or 012 or ^J)',
1058 "\015\012" =>'dos (\r\n or 015,012 or ^M^J)',
1059 "\015" =>'mac (\r or 015 or ^M)'
1062 -f $file or $self->throw("Could not determine flavor: arg '$file' is either non existant or is not a file.\n");
1063 my $octal = $self->get_newline($file);
1064 my $flavor = $flavors{$octal} || "unknown";
1065 return $flavor;
1068 ######################################
1069 ##### Mail Functions ########
1070 ######################################
1072 =head2 mail_authority
1074 Title : mail_authority
1075 Usage : $Util->mail_authority( $message )
1076 Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
1078 See Also : L<send_mail()|send_mail>
1080 =cut
1082 #---------------
1083 sub mail_authority {
1084 #---------------
1085 my( $self, $message ) = @_;
1086 my $script = $self->untaint($0,1);
1088 my $email = $self->{'_auth_email'} || $AUTHORITY;
1089 if (defined $email) {
1090 $self->send_mail( -TO=>$AUTHORITY, -SUBJ=>$script, -MSG=>$message);
1091 } else {
1092 $self->throw("Can't email authority. No email defined.");
1096 =head2 authority
1098 Title : authority
1099 Usage : $Util->authority('admin@example.com');
1100 Purpose : Set/get the email address that should be notified by mail_authority()
1102 See Also : L<mail_authority()|mail_authority>
1104 =cut
1106 #-------------
1107 sub authority {
1108 #-------------
1109 my( $self, $email ) = @_;
1110 $self->{'_auth_email'} = $email if defined $email;
1111 return $self->{'_auth_email'};
1115 =head2 send_mail
1117 Title : send_mail
1118 Usage : $Util->send_mail( named_parameters )
1119 Purpose : Provides an interface to mail or sendmail, if available
1120 Returns : n/a
1121 Argument : Named parameters: (case-insensitive)
1122 : -TO => e-mail address to send to
1123 : -SUBJ => subject for message (optional)
1124 : -MSG => message to be sent (optional)
1125 : -CC => cc: e-mail address (optional)
1126 Thows : Exception if TO: address appears bad or is missing.
1127 : Exception if mail cannot be sent.
1128 Comments : Based on TomC's tip at:
1129 : http://www.perl.com/CPAN/doc/FMTEYEWTK/safe_shellings
1131 : Using default 'From:' information.
1132 : sendmail options used:
1133 : -t: ignore the address given on the command line and
1134 : get To:address from the e-mail header.
1135 : -oi: prevents send_mail from ending the message if it
1136 : finds a period at the start of a line.
1138 See Also : L<mail_authority()|mail_authority>
1140 =cut
1143 #-------------
1144 sub send_mail {
1145 #-------------
1146 my( $self, @param) = @_;
1147 my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param);
1149 $self->throw("Invalid or missing e-mail address: $recipient")
1150 if not $recipient =~ /\S+\@\S+/;
1152 $subj ||= 'empty subject'; $message ||= '';
1154 # Best to use mail rather than sendmail. Permissions on sendmail in
1155 # linux distros have been significantly locked down in recent years,
1156 # due to the perception that it is insecure.
1157 my ($exe, $ccinfo);
1158 if ($exe = $self->find_exe('mail')) {
1159 if (defined $cc) {
1160 $ccinfo = "-c $cc";
1162 $self->debug("send_mail: $exe -s '$subj' $ccinfo $recipient\n");
1163 open (MAIL, "| $exe -s '$subj' $ccinfo $recipient") ||
1164 $self->throw("Can't send email: mail cannot fork: $!");
1165 print MAIL <<QQ_EOFM_QQ;
1166 $message
1167 QQ_EOFM_QQ
1168 $? and $self->warn("mail didn't exit nicely: $?");
1169 close(MAIL);
1170 } elsif ($exe = $self->find_exe('sendmail')) {
1171 open (SENDMAIL, "| $exe -oi -t") ||
1172 $self->throw("Can't send email: sendmail cannot fork: $!");
1173 print SENDMAIL <<QQ_EOFSM_QQ;
1174 To: $recipient
1175 Subject: $subj
1176 Cc: $cc
1178 $message
1180 QQ_EOFSM_QQ
1181 $? and $self->warn("sendmail didn't exit nicely: $?");
1183 close(SENDMAIL);
1184 } else {
1185 $self->throw("Can't find executable for mail or sendmail.");
1190 =head2 find_exe
1192 Title : find_exe
1193 Usage : $Util->find_exe(name);
1194 Purpose : Locate an executable (for use in a system() call, e.g.))
1195 Example : $Util->find_exe("gzip");
1196 Returns : String containing executable that passes the -x test.
1197 Returns undef if an executable of the supplied name cannot be found.
1198 Argument : Name of executable to be found.
1199 : Can be a full path. If supplied name is not executable, an executable
1200 : of that name will be searched in all directories in the currently
1201 : defined PATH environment variable.
1202 Throws : No exceptions, but issues a warning if multiple paths are found
1203 : for a given name. The first one is used.
1204 Comments : TODO: Confirm functionality on all bioperl-supported platforms.
1205 May get tripped up by variation in path separator character used
1206 for splitting ENV{PATH}.
1207 See Also :
1209 =cut
1211 #------------
1212 sub find_exe {
1213 #------------
1214 my ($self, $name) = @_;
1215 my @bindirs;
1216 if ($^O =~ m/mswin/i) {
1217 @bindirs = split ';', $ENV{'PATH'};
1218 # Add usual executable extension if missing or -x won't work
1219 $name.= '.exe' if ($name !~ m/\.exe$/i);
1221 else {
1222 @bindirs = split ':', $ENV{'PATH'};
1224 my $exe = $name;
1225 unless (-x $exe) {
1226 undef $exe;
1227 my @exes;
1228 foreach my $d (@bindirs) {
1229 # Note: Windows also understand '/' as folder separator,
1230 # so there is no need to use a conditional with '\'
1231 push(@exes, "$d/$name") if -x "$d/$name";
1233 if (scalar @exes) {
1234 $exe = $exes[0];
1235 if (defined $exes[1]) {
1236 $self->warn("find_exe: Multiple paths to '$name' found. Using $exe.");
1240 return $exe;
1244 ######################################
1245 ### Interactive Functions #####
1246 ######################################
1249 =head2 yes_reply
1251 Title : yes_reply()
1252 Usage : $Util->yes_reply( [query_string]);
1253 Purpose : To test an STDIN input value for affirmation.
1254 Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
1255 : $Util->yes_reply('Continue') || die;
1256 Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
1257 Argument: query_string = string to be used to prompt user (optional)
1258 : If not provided, 'Yes or no' will be used.
1259 : Question mark is automatically appended.
1261 =cut
1263 #-------------
1264 sub yes_reply {
1265 #-------------
1266 my $self = shift;
1267 my $query = shift;
1268 my $reply;
1269 $query ||= 'Yes or no';
1270 print "\n$query? (y/n) [n] ";
1271 chomp( $reply = <STDIN> );
1272 $reply =~ /^y/i;
1277 =head2 request_data
1279 Title : request_data()
1280 Usage : $Util->request_data( [value_name]);
1281 Purpose : To request data from a user to be entered via keyboard (STDIN).
1282 Example : $name = $Util->request_data('Name');
1283 : # User will see: % Enter Name:
1284 Returns : String, (data entered from keyboard, sans terminal newline.)
1285 Argument: value_name = string to be used to prompt user.
1286 : If not provided, 'data' will be used, (not very helpful).
1287 : Question mark is automatically appended.
1289 =cut
1291 #----------------
1292 sub request_data {
1293 #----------------
1294 my $self = shift;
1295 my $data = shift || 'data';
1296 print "Enter $data: ";
1297 # Remove the terminal newline char.
1298 chomp($data = <STDIN>);
1299 $data;
1302 =head2 quit_reply
1304 Title : quit_reply
1305 Usage :
1306 Purpose :
1308 =cut
1310 sub quit_reply {
1311 # Not much used since you can use request_data()
1312 # and test for an empty string.
1313 my $self = shift;
1314 my $reply;
1315 chop( $reply = <STDIN> );
1316 $reply =~ /^q.*/i;
1320 =head2 verify_version
1322 Purpose : Checks the version of Perl used to invoke the script.
1323 : Aborts program if version is less than the given argument.
1324 Usage : verify_version('5.000')
1326 =cut
1328 #------------------
1329 sub verify_version {
1330 #------------------
1331 my $self = shift;
1332 my $reqVersion = shift;
1334 $] < $reqVersion and do {
1335 printf STDERR ( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion);
1336 printf STDERR ( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" );
1337 exit(1);
1343 __END__