1 package Bio
::Root
::Utilities
;
5 use Bio
::Root
::Exception
;
6 use base
qw(Bio::Root::Root Exporter);
10 Bio::Root::Utilities - general-purpose utilities
14 =head2 Object Creation
16 # Using the supplied singleton object:
17 use Bio::Root::Utilities qw(:obj);
20 # Create an object manually:
21 use Bio::Root::Utilities;
22 my $util = Bio::Root::Utilities->new();
25 $date_stamp = $Util->date_format('yyy-mm-dd');
27 $clean = $Util->untaint($dirty);
29 $compressed = $Util->compress('/home/me/myfile.txt')
31 my ($mean, $stdev) = $Util->mean_stdev( @data );
33 $Util->authority("me@example.com");
34 $Util->mail_authority("Something you should know about...");
36 ...and a host of other methods. See below.
40 Provides general-purpose utilities of potential interest to any Perl script.
42 The C<:obj> tag is a convenience that imports a $Util symbol into your
43 namespace representing a Bio::Root::Utilities object. This saves you
44 from creating your own Bio::Root::Utilities object via
45 C<Bio::Root::Utilities-E<gt>new()> or from prefixing all method calls with
46 C<Bio::Root::Utilities>, though feel free to do these things if desired.
47 Since there should normally not be a need for a script to have more
48 than one Bio::Root::Utilities object, this module thus comes with it's
53 This module is included with the central Bioperl distribution:
55 http://www.bioperl.org/wiki/Getting_BioPerl
56 ftp://bio.perl.org/pub/DIST
58 Follow the installation instructions included in the README file.
62 Inherits from L<Bio::Root::Root>, and uses L<Bio::Root::IO>
63 and L<Bio::Root::Exception>.
65 Relies on external executables for file compression/uncompression
66 and sending mail. No paths to these are hard coded but are located
71 http://bioperl.org - Bioperl Project Homepage
73 =head1 ACKNOWLEDGEMENTS
75 This module was originally developed under the auspices of the
76 Saccharomyces Genome Database: http://www.yeastgenome.org/
78 =head1 AUTHOR Steve Chervitz
82 use vars
qw(@EXPORT_OK %EXPORT_TAGS);
83 @EXPORT_OK = qw($Util);
84 %EXPORT_TAGS = ( obj => [qw($Util)],
87 use vars qw($ID $Util $GNU_PATH $TIMEOUT_SECS
88 @COMPRESSION_UTILS @UNCOMPRESSION_UTILS
89 $DEFAULT_NEWLINE $NEWLINE $AUTHORITY
90 @MONTHS @DAYS $BASE_YEAR $DEFAULT_CENTURY
93 $ID = 'Bio::Root::Utilities';
94 # Number of seconds to wait before timing out when reading input (taste_file())
96 $NEWLINE = $ENV{'NEWLINE'} || undef;
97 $BASE_YEAR = 1900; # perl's localtime() assumes this for it's year data.
98 # TODO: update this every hundred years. Y2K-sensitive code.
99 $DEFAULT_CENTURY = $BASE_YEAR + 100;
100 @MONTHS = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
101 @DAYS = qw(Sun Mon Tue Wed Thu Fri Sat);
102 # Sets the preference for compression utilities to be used by compress().
103 # The first executable in this list to be found in the current PATH will be used,
104 # unless overridden in the call to that function. See docs for details.
105 @COMPRESSION_UTILS = qw(gzip bzip2 zip compress);
106 @UNCOMPRESSION_UTILS = qw(gunzip gzip bunzip2 unzip uncompress);
108 # Default person to receive feedback from users and possibly automatic error messages.
111 # Note: $GNU_PATH is now deprecated, shouldn't be needed since now this module
112 # will automatically locate the compression utility in the current PATH.
113 # Retaining $GNU_PATH for backward compatibility.
115 # $GNU_PATH points to the directory containing the gzip and gunzip
116 # executables. It may be required for executing gzip/gunzip
117 # in some situations (e.g., when $ENV{PATH} doesn't contain this dir.
118 # Customize $GNU_PATH for your site if the compress() or
119 # uncompress() functions are generating exceptions.
121 #$GNU_PATH = '/tools/gnu/bin/';
123 $DEFAULT_NEWLINE = "\012"; # \n (used if get_newline() fails for some reason)
125 ## Static UTIL object.
126 $Util = Bio
::Root
::Root
->new();
132 Usage : $Util->date_format( [FMT], [DATE])
133 Purpose : -- Get a string containing the formatted date or time
134 : taken when this routine is invoked.
135 : -- Provides a way to avoid using `date`.
136 : -- Provides an interface to localtime().
137 : -- Interconverts some date formats.
139 : (For additional functionality, use Date::Manip or
140 : Date::DateCalc available from CPAN).
141 Example : $Util->date_format();
142 : $date = $Util->date_format('yyyy-mmm-dd', '11/22/92');
143 Returns : String (unless 'list' is provided as argument, see below)
145 : 'yyyy-mm-dd' = 1996-05-03 # default format.
146 : 'yyyy-dd-mm' = 1996-03-05
147 : 'yyyy-mmm-dd' = 1996-May-03
148 : 'd-m-y' = 3-May-1996
149 : 'd m y' = 3 May 1996
151 : 'mdy' = May 3, 1996
155 : 'hms' = 23:01:59 # when not converting a format, 'hms' can be
156 : # tacked on to any of the above options
157 : # to add the time stamp: eg 'dmyhms'
158 : 'full' | 'unix' = UNIX-style date: Tue May 5 22:00:00 1998
159 : 'list' = the contents of localtime(time) in an array.
160 Argument : (all are optional)
161 : FMT = yyyy-mm-dd | yyyy-dd-mm | yyyy-mmm-dd |
162 : mdy | ymd | md | d-m-y | hms | hm
163 : ('hms' may be appended to any of these to
166 : DATE = String containing date to be converted.
167 : Acceptable input formats:
168 : 12/1/97 (for 1 December 1997)
172 Comments : If you don't care about formatting or using backticks, you can
173 : always use: $date = `date`;
175 : For more features, use Date::Manip.pm, (which I should
176 : probably switch to...)
178 See Also : L<file_date()|file_date>, L<month2num()|month2num>
187 my $date = shift; # optional date to be converted.
189 my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst);
191 $option ||= 'yyyy-mm-dd';
193 my ($month_txt, $day_txt, $month_num, $fullYear);
194 my ($converting, @date);
196 # Load a supplied date for conversion:
197 if(defined($date) && ($date =~ /[\D-]+/)) {
200 ($mon,$mday,$year) = split(m{/}, $date);
201 } elsif($date =~ /(\d{4})-(\d{1,2})-(\d{1,2})/) {
202 ($year,$mon,$mday) = ($1, $2, $3);
203 } elsif($date =~ /(\d{4})-(\w{3,})-(\d{1,2})/) {
204 ($year,$mon,$mday) = ($1, $2, $3);
205 $mon = $self->month2num($2);
207 print STDERR
"\n*** Unsupported input date format: $date\n";
209 if(length($year) == 4) {
211 $year = substr $year, 2;
213 # Heuristics to guess what century was intended when a 2-digit year is given
214 # If number is over 50, assume it's for prev century; under 50 = default century.
215 # TODO: keep an eye on this Y2K-sensitive code
217 $fullYear = $DEFAULT_CENTURY + $year - 100;
219 $fullYear = $DEFAULT_CENTURY + $year;
224 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = @date =
225 localtime(($date ?
$date : time()));
226 return @date if $option =~ /list/i;
227 $fullYear = $BASE_YEAR+$year;
229 $month_txt = $MONTHS[$mon];
230 $day_txt = $DAYS[$wday] if defined $wday;
233 # print "sec: $sec, min: $min, hour: $hour, month: $mon, m-day: $mday, year: $year\nwday: $wday, yday: $yday, dst: $isdst";<STDIN>;
235 if( $option =~ /yyyy-mm-dd/i ) {
236 $date = sprintf "%4d-%02d-%02d",$fullYear,$month_num,$mday;
237 } elsif( $option =~ /yyyy-dd-mm/i ) {
238 $date = sprintf "%4d-%02d-%02d",$fullYear,$mday,$month_num;
239 } elsif( $option =~ /yyyy-mmm-dd/i ) {
240 $date = sprintf "%4d-%3s-%02d",$fullYear,$month_txt,$mday;
241 } elsif( $option =~ /full|unix/i ) {
242 $date = sprintf "%3s %3s %2d %02d:%02d:%02d %d",$day_txt, $month_txt, $mday, $hour, $min, $sec, $fullYear;
243 } elsif( $option =~ /mdy/i ) {
244 $date = "$month_txt $mday, $fullYear";
245 } elsif( $option =~ /ymd/i ) {
246 $date = $year."\l$month_txt$mday";
247 } elsif( $option =~ /dmy/i ) {
248 $date = $mday."\l$month_txt$year";
249 } elsif( $option =~ /md/i ) {
250 $date = "\l$month_txt$mday";
251 } elsif( $option =~ /d-m-y/i ) {
252 $date = "$mday-$month_txt-$fullYear";
253 } elsif( $option =~ /d m y/i ) {
254 $date = "$mday $month_txt $fullYear";
255 } elsif( $option =~ /year/i ) {
257 } elsif( $option =~ /dmy/i ) {
258 $date = $mday.'-'.$month_txt.'-'.$fullYear;
259 } elsif($option and $option !~ /hms/i) {
260 print STDERR
"\n*** Unrecognized date format request: $option\n";
263 if( $option =~ /hms/i and not $converting) {
264 $date .= " $hour:$min:$sec" if $date;
265 $date ||= "$hour:$min:$sec";
268 return $date || join(" ", @date);
275 Purpose : Converts a string containing a name of a month to integer
276 : representing the number of the month in the year.
277 Example : $Util->month2num("march"); # returns 3
278 Argument : The string argument must contain at least the first
279 : three characters of the month's name. Case insensitive.
280 Throws : Exception if the conversion fails.
287 my ($self, $str) = @_;
289 # Get string in proper format for conversion.
290 $str = substr($str, 0, 3);
291 for my $month (0..$#MONTHS) {
292 return $month+1 if $str =~ /$MONTHS[$month]/i;
294 $self->throw("Invalid month name: $str");
300 Purpose : Does the opposite of month2num.
301 : Converts a number into a string containing a name of a month.
302 Example : $Util->num2month(3); # returns 'Mar'
303 Throws : Exception if supplied number is out of range.
310 my ($self, $num) = @_;
312 $self->throw("Month out of range: $num") if $num < 1 or $num > 12;
313 return $MONTHS[$num-1];
319 Usage : $Util->compress(full-path-filename);
320 : $Util->compress(<named parameters>);
321 Purpose : Compress a file.
322 Example : $Util->compress("/usr/people/me/data.txt");
323 : $Util->compress(-file=>"/usr/people/me/data.txt",
325 : -outfile=>"/usr/people/share/data.txt.gz",
326 : -exe=>"/usr/local/bin/fancyzip");
327 Returns : String containing full, absolute path to compressed file
328 Argument : Named parameters (case-insensitive):
329 : -FILE => String (name of file to be compressed, full path).
330 : If the supplied filename ends with '.gz' or '.Z',
331 : that extension will be removed before attempting to compress.
333 : -TMP => boolean. If true, (or if user is not the owner of the file)
334 : the file is compressed to a temp file. If false, file may be
335 : clobbered with the compressed version (if using a utility like
336 : gzip, which is the default)
337 : -OUTFILE => String (name of the output compressed file, full path).
338 : -EXE => Name of executable for compression utility to use.
339 : Will supersede those in @COMPRESSION_UTILS defined by
340 : this module. If the absolute path to the executable is not provided,
341 : it will be searched in the PATH env variable.
342 Throws : Exception if file cannot be compressed.
343 : If user is not owner of the file, generates a warning and compresses to
344 : a tmp file. To avoid this warning, use the -o file test operator
345 : and call this function with -TMP=>1.
346 Comments : Attempts to compress using utilities defined in the @COMPRESSION_UTILS
347 : defined by this module, in the order defined. The first utility that is
348 : found to be executable will be used. Any utility defined in optional -EXE param
349 : will be tested for executability first.
350 : To minimize security risks, the -EXE parameter value is untained using
351 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
353 See Also : L<uncompress()|uncompress>
360 my ($self, @args) = @_;
361 # This method formerly didn't use named params and expected fileName, tmp
362 # in that order. This should be backward compatibile.
363 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
364 my ($file, $get, $fmt);
366 # in case the supplied name already has a compressed extension
367 if($fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName =~ s/$1$//; };
368 $self->debug("compressing file $fileName");
370 my @util_to_use = @COMPRESSION_UTILS;
373 $exe = $self->untaint($exe, 1);
374 unshift @util_to_use, $exe;
377 my @checked = @util_to_use;
379 while (not -x
$exe and scalar(@util_to_use)) {
380 $exe = $self->find_exe(shift @util_to_use);
384 $self->throw("Can't find compression utility. Looked for @checked");
387 my ($compressed, @cmd, $handle);
389 if(defined($outfile) or $tmp or not -o
$fileName) {
390 if (defined $outfile) {
391 $compressed = $outfile;
393 # obtain a temporary file name (not using the handle)
394 # and insert some special text to flag it as a bioperl-based temp file
395 my $io = Bio
::Root
::IO
->new();
396 ($handle, $compressed) = $io->tempfile();
397 $compressed .= '.tmp.bioperl.gz';
400 # Use double quotes if executable path have empty spaces
405 if ($exe =~ /gzip|bzip2|compress/) {
406 @cmd = ("$exe -f < \"$fileName\" > \"$compressed\"");
407 } elsif ($exe eq 'zip') {
408 @cmd = ("$exe -r \"$fileName.zip\" \"$fileName\"");
411 $self->warn("Not owner of file $fileName. Compressing to temp file $compressed.");
414 # Need to compute the compressed name based on exe since we're returning it.
415 $compressed = $fileName;
416 if ($exe =~ /gzip/) {
417 $compressed .= '.gz';
418 } elsif ($exe =~ /bzip2/) {
419 $compressed .= '.bz2';
420 } elsif ($exe =~ /zip/) {
421 $compressed .= '.zip';
422 } elsif ($exe =~ /compress/) {
425 if ($exe =~ /gzip|bzip2|compress/) {
426 @cmd = ($exe, '-f', $fileName);
427 } elsif ($exe eq 'zip') {
428 @cmd = ($exe, '-r', "$compressed", $fileName);
432 if(system(@cmd) != 0) {
433 $self->throw( -class => 'Bio::Root::SystemException',
434 -text
=> "Failed to compress file $fileName using $exe: $!");
443 Usage : $Util->uncompress(full-path-filename);
444 : $Util->uncompress(<named parameters>);
445 Purpose : Uncompress a file.
446 Example : $Util->uncompress("/usr/people/me/data.txt");
447 : $Util->uncompress(-file=>"/usr/people/me/data.txt.gz",
449 : -outfile=>"/usr/people/share/data.txt",
450 : -exe=>"/usr/local/bin/fancyzip");
451 Returns : String containing full, absolute path to uncompressed file
452 Argument : Named parameters (case-insensitive):
453 : -FILE => String (name of file to be uncompressed, full path).
454 : If the supplied filename ends with '.gz' or '.Z',
455 : that extension will be removed before attempting to uncompress.
457 : -TMP => boolean. If true, (or if user is not the owner of the file)
458 : the file is uncompressed to a temp file. If false, file may be
459 : clobbered with the uncompressed version (if using a utility like
460 : gzip, which is the default)
461 : -OUTFILE => String (name of the output uncompressed file, full path).
462 : -EXE => Name of executable for uncompression utility to use.
463 : Will supersede those in @UNCOMPRESSION_UTILS defined by
464 : this module. If the absolute path to the executable is not provided,
465 : it will be searched in the PATH env variable.
466 Throws : Exception if file cannot be uncompressed.
467 : If user is not owner of the file, generates a warning and uncompresses to
468 : a tmp file. To avoid this warning, use the -o file test operator
469 : and call this function with -TMP=>1.
470 Comments : Attempts to uncompress using utilities defined in the @UNCOMPRESSION_UTILS
471 : defined by this module, in the order defined. The first utility that is
472 : found to be executable will be used. Any utility defined in optional -EXE param
473 : will be tested for executability first.
474 : To minimize security risks, the -EXE parameter value is untained using
475 : the untaint() method of this module (in 'relaxed' mode to permit path separators).
477 See Also : L<compress()|compress>
484 my ($self, @args) = @_;
485 # This method formerly didn't use named params and expected fileName, tmp
486 # in that order. This should be backward compatibile.
487 my ($fileName, $tmp, $outfile, $exe) = $self->_rearrange([qw(FILE TMP OUTFILE EXE)], @args);
488 my ($file, $get, $fmt);
490 # in case the supplied name lacks a compressed extension
491 if(not $fileName =~ /(\.gz|\.Z|\.bz2|\.zip)$/) { $fileName .= $1; };
492 $self->debug("uncompressing file $fileName");
494 my @util_to_use = @UNCOMPRESSION_UTILS;
497 $exe = $self->untaint($exe, 1);
498 unshift @util_to_use, $exe;
502 while (not -x
$exe and scalar(@util_to_use)) {
503 $exe = $self->find_exe(shift @util_to_use);
507 $self->throw("Can't find compression utility. Looked for @util_to_use");
510 my ($uncompressed, @cmd, $handle);
512 $uncompressed = $fileName;
513 $uncompressed =~ s/\.\w+$//;
515 if(defined($outfile) or $tmp or not -o
$fileName) {
516 if (defined $outfile) {
517 $uncompressed = $outfile;
519 # obtain a temporary file name (not using the handle)
520 my $io = Bio
::Root
::IO
->new();
521 ($handle, $uncompressed) = $io->tempfile();
522 # insert some special text to flag it as a bioperl-based temp file
523 $uncompressed .= '.tmp.bioperl';
526 # Use double quotes if executable path have empty spaces
531 if ($exe =~ /gunzip|bunzip2|uncompress/) {
532 @cmd = ("$exe -f < \"$fileName\" > \"$uncompressed\"");
533 } elsif ($exe =~ /gzip/) {
534 @cmd = ("$exe -df < \"$fileName\" > \"$uncompressed\"");
535 } elsif ($exe eq 'unzip') {
536 @cmd = ("$exe -p \"$fileName\" > \"$uncompressed\"");
539 $self->warn("Not owner of file $fileName. Uncompressing to temp file $uncompressed.");
542 if ($exe =~ /gunzip|bunzip2|uncompress/) {
543 @cmd = ($exe, '-f', $fileName);
544 } elsif ($exe =~ /gzip/) {
545 @cmd = ($exe, '-df', $fileName);
546 } elsif ($exe eq 'zip') {
547 @cmd = ($exe, $fileName);
551 if(system(@cmd) != 0) {
552 $self->throw( -class => 'Bio::Root::SystemException',
553 -text
=> "Failed to uncompress file $fileName using $exe: $!");
556 return $uncompressed;
563 Usage : $Util->file_date( filename [,date_format])
564 Purpose : Obtains the date of a given file.
565 : Provides flexible formatting via date_format().
566 Returns : String = date of the file as: yyyy-mm-dd (e.g., 1997-10-15)
567 Argument : filename = string, full path name for file
568 : date_format = string, desired format for date (see date_format()).
569 : Default = yyyy-mm-dd
570 Thows : Exception if no file is provided or does not exist.
571 Comments : Uses the mtime field as obtained by stat().
578 my ($self, $file, $fmt) = @_;
580 $self->throw("No such file: $file") if not $file or not -e
$file;
582 $fmt ||= 'yyyy-mm-dd';
584 my @file_data = stat($file);
585 return $self->date_format($fmt, $file_data[9]); # mtime field
592 Purpose : To remove nasty shell characters from untrusted data
593 : and allow a script to run with the -T switch.
594 : Potentially dangerous shell meta characters: &;`'\"|*?!~<>^()[]{}$\n\r
595 : Accept only the first block of contiguous characters:
596 : Default allowed chars = "-\w.', ()"
597 : If $relax is true = "-\w.', ()\/=%:^<>*"
598 Usage : $Util->untaint($value, $relax)
599 Returns : String containing the untained data.
600 Argument: $value = string
603 This general untaint() function may not be appropriate for every situation.
604 To allow only a more restricted subset of special characters
605 (for example, untainting a regular expression), then using a custom
606 untainting mechanism would permit more control.
608 Note that special trusted vars (like $0) require untainting.
615 my($self,$value,$relax) = @_;
619 $self->debug("\nUNTAINT: $value\n");
621 unless (defined $value and $value ne '') {
626 $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
628 # } elsif( $relax == 2 ) { # Could have several degrees of relax.
629 # $value =~ /([-\w.\', ()\/=%:^<>*]+)/;
632 $value =~ /([-\w.\', ()]+)/;
636 $self->debug("UNTAINTED: $untainted\n");
645 Usage : ($mean, $stdev) = $Util->mean_stdev( @data )
646 Purpose : Calculates the mean and standard deviation given a list of numbers.
647 Returns : 2-element list (mean, stdev)
648 Argument : list of numbers (ints or floats)
656 my ($self, @data) = @_;
657 return (undef, undef) if not @data; # case of empty @data list
660 foreach my $num (@data) {
665 my $sum_diff_sqd = 0;
666 foreach my $num (@data) {
667 $sum_diff_sqd += ($mean - $num) * ($mean - $num);
669 # if only one element in @data list, unbiased stdev is undefined
670 my $stdev = $N <= 1 ?
undef : sqrt( $sum_diff_sqd / ($N-1) );
671 return ($mean, $stdev);
678 Purpose : Counts the number of files/directories within a given directory.
679 : Also reports the number of text and binary files in the dir
680 : as well as names of these files and directories.
681 Usage : count_files(\%data)
682 : $data{-DIR} is the directory to be analyzed. Default is ./
683 : $data{-PRINT} = 0|1; if 1, prints results to STDOUT, (default=0).
684 Argument : Hash reference (empty)
686 : Modifies the hash ref passed in as the sole argument.
687 : $$href{-TOTAL} scalar
688 : $$href{-NUM_TEXT_FILES} scalar
689 : $$href{-NUM_BINARY_FILES} scalar
690 : $$href{-NUM_DIRS} scalar
691 : $$href{-T_FILE_NAMES} array ref
692 : $$href{-B_FILE_NAMES} array ref
693 : $$href{-DIRNAMES} array ref
701 my $href = shift; # Reference to an empty hash.
702 my( $name, @fileLine);
703 my $dir = $$href{-DIR
} || './'; # THIS IS UNIX SPECIFIC? FIXME/TODO
704 my $print = $$href{-PRINT
} || 0;
706 ### Make sure $dir ends with /
707 $dir !~ m{/$} and do{ $dir .= '/'; $$href{-DIR
} = $dir; };
709 open ( my $PIPE, "ls -1 $dir |" ) || $self->throw("Can't open input pipe: $!");
711 ### Initialize the hash data.
713 $$href{-NUM_TEXT_FILES
} = $$href{-NUM_BINARY_FILES
} = $$href{-NUM_DIRS
} = 0;
714 $$href{-T_FILE_NAMES
} = [];
715 $$href{-B_FILE_NAMES
} = [];
716 $$href{-DIR_NAMES
} = [];
717 while( my $line = <$PIPE> ) {
720 if( -T
$dir.$line ) {
721 $$href{-NUM_TEXT_FILES
}++;
722 push @
{$$href{-T_FILE_NAMES
}}, $line; }
723 if( -B
$dir.$line and not -d
$dir.$line) {
724 $$href{-NUM_BINARY_FILES
}++;
725 push @
{$$href{-B_FILE_NAMES
}}, $line; }
726 if( -d
$dir.$line ) {
728 push @
{$$href{-DIR_NAMES
}}, $line; }
733 printf( "\n%4d %s\n", $$href{-TOTAL
}, "total files+dirs in $dir");
734 printf( "%4d %s\n", $$href{-NUM_TEXT_FILES
}, "text files");
735 printf( "%4d %s\n", $$href{-NUM_BINARY_FILES
}, "binary files");
736 printf( "%4d %s\n", $$href{-NUM_DIRS
}, "directories");
744 Purpose : Obtains a variety of date for a given file.
745 : Provides an interface to Perl's stat().
746 Status : Under development. Not ready. Don't use!
753 my ($self, %param) = @_;
754 my ($file, $get, $fmt) = $self->_rearrange([qw(FILE GET FMT)], %param);
756 $fmt ||= 'yyyy-mm-dd';
758 my($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
759 $atime, $mtime, $ctime, $blksize, $blocks) = stat $file;
761 if($get =~ /date/i) {
762 ## I can get the elapsed time since the file was modified but
763 ## it's not so straightforward to get the date in a nice format...
764 ## Think about using a standard CPAN module for this, like
765 ## Date::Manip or Date::DateCalc.
768 my $elsec = time - $mtime;
769 printf "\nFile age: %.0f sec %.0f hrs %.0f days", $elsec, $elsec/3600, $elsec/(3600*24);<STDIN
>;
770 my $days = sprintf "%.0f", $elsec/(3600*24);
771 } elsif($get eq 'all') {
787 my $fileName = shift;
788 if(not -e
$fileName) {
789 $self->throw("Could not delete file '$fileName': Does not exist.");
790 } elsif(not -o
$fileName) {
791 $self->throw("Could not delete file '$fileName': Not owner.");
793 my $ulval = unlink($fileName) > 0
794 or $self->throw("Failed to delete file '$fileName': $!");
798 =head2 create_filehandle
800 Usage : $object->create_filehandle(<named parameters>);
801 Purpose : Create a FileHandle object from a file or STDIN.
802 : Mainly used as a helper method by read() and get_newline().
803 Example : $data = $object->create_filehandle(-FILE =>'usr/people/me/data.txt')
804 Argument : Named parameters (case-insensitive):
806 : -CLIENT => object reference for the object submitting
807 : the request. Default = $Util.
808 : -FILE => string (full path to file) or a reference
809 : to a FileHandle object or typeglob. This is an
810 : optional parameter (if not defined, STDIN is used).
811 Returns : Reference to a FileHandle object.
812 Throws : Exception if cannot open a supplied file or if supplied with a
813 : reference that is not a FileHandle ref.
814 Comments : If given a FileHandle reference, this method simply returns it.
815 : This method assumes the user wants to read ascii data. So, if
816 : the file is binary, it will be treated as a compressed (gzipped)
817 : file and access it using gzip -ce. The problem here is that not
818 : all binary files are necessarily compressed. Therefore,
819 : this method should probably have a -mode parameter to
820 : specify ascii or binary.
822 See Also : L<get_newline()|get_newline>
826 #---------------------
827 sub create_filehandle
{
828 #---------------------
829 my($self, @param) = @_;
830 my($client, $file, $handle) =
831 $self->_rearrange([qw( CLIENT FILE HANDLE )], @param);
833 if(not ref $client) { $client = $self; }
835 if( $client->can('file')) {
836 $file = $client->file($file);
842 if($handle_ref = ref($file)) {
843 if($handle_ref eq 'FileHandle') {
845 $client->{'_input_type'} = "FileHandle";
846 } elsif($handle_ref eq 'GLOB') {
848 $client->{'_input_type'} = "Glob";
850 $self->throw(-class => 'Bio::Root::IOException',
851 -text
=> "Could not read file '$file': Not a FileHandle or GLOB ref.");
853 $self->verbose > 0 and printf STDERR
"$ID: reading data from FileHandle\n";
856 $client->{'_input_type'} = "FileHandle for $file";
858 # Use gzip -cd to access compressed data.
860 $client->{'_input_type'} .= " (compressed)";
861 my $gzip = $self->find_exe('gzip');
862 $file = "$gzip -cd $file |"
866 $FH = FileHandle
->new();
867 open ($FH, $file) || $self->throw(-class=>'Bio::Root::FileOpenException',
868 -text
=>"Could not access data file '$file': $!");
869 $self->verbose > 0 and printf STDERR
"$ID: reading data from file '$file'\n";
874 $self->verbose > 0 and printf STDERR
"$ID: reading data from STDIN\n";
875 $client->{'_input_type'} = "STDIN";
883 Usage : $object->get_newline(<named parameters>);
884 Purpose : Determine the character(s) used for newlines in a given file or
885 : input stream. Delegates to Bio::Root::Utilities::get_newline()
886 Example : $data = $object->get_newline(-CLIENT => $anObj,
887 : -FILE =>'usr/people/me/data.txt')
888 Argument : Same arguemnts as for create_filehandle().
889 Returns : Reference to a FileHandle object.
890 Throws : Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
892 See Also : L<taste_file()|taste_file>, L<create_filehandle()|create_filehandle>
899 my($self, @param) = @_;
901 return $NEWLINE if defined $NEWLINE;
904 $self->_rearrange([qw( CLIENT )], @param);
906 my $FH = $self->create_filehandle(@param);
908 if(not ref $client) { $client = $self; }
910 if($client->{'_input_type'} =~ /STDIN|Glob|compressed/) {
911 # Can't taste from STDIN since we can't seek 0 on it.
912 # Are other non special Glob refs seek-able?
913 # Attempt to guess newline based on platform.
914 # Not robust since we could be reading Unix files on a Mac, e.g.
915 if(defined $ENV{'MACPERL'}) {
916 $NEWLINE = "\015"; # \r
918 $NEWLINE = "\012"; # \n
921 $NEWLINE = $self->taste_file($FH);
924 close ($FH) unless ($client->{'_input_type'} eq 'STDIN' ||
925 $client->{'_input_type'} eq 'FileHandle' ||
926 $client->{'_input_type'} eq 'Glob' );
928 delete $client->{'_input_type'};
930 return $NEWLINE || $DEFAULT_NEWLINE;
936 Usage : $object->taste_file( <FileHandle> );
937 : Mainly a utility method for get_newline().
938 Purpose : Sample a filehandle to determine the character(s) used for a newline.
939 Example : $char = $Util->taste_file($FH)
940 Argument : Reference to a FileHandle object.
941 Returns : String containing an octal represenation of the newline character string.
942 : Unix = "\012" ("\n")
943 : Win32 = "\012\015" ("\r\n")
944 : Mac = "\015" ("\r")
945 Throws : Exception if no input is read within $TIMEOUT_SECS seconds.
946 : Exception if argument is not FileHandle object reference.
947 : Warning if cannot determine neewline char(s).
948 Comments : Based on code submitted by Vicki Brown (vlb@deltagen.com).
950 See Also : L<get_newline()|get_newline>
957 my ($self, $FH) = @_;
958 my $BUFSIZ = 256; # Number of bytes read from the file handle.
959 my ($buffer, $octal, $str, $irs, $i);
961 ref($FH) eq 'FileHandle' or $self->throw("Can't taste file: not a FileHandle ref");
965 # this is a quick hack to check for availability of alarm(); just copied
966 # from Bio/Root/IOManager.pm HL 02/19/01
967 my $alarm_available = 1;
972 # alarm() not available (ActiveState perl for win32 doesn't have it.
973 # See jitterbug PR#98)
974 $alarm_available = 0;
976 $SIG{ALRM
} = sub { die "Timed out!"; };
979 $alarm_available && alarm( $TIMEOUT_SECS );
980 $result = read($FH, $buffer, $BUFSIZ); # read the $BUFSIZ characters of file
981 $alarm_available && alarm(0);
983 if($@
=~ /Timed out!/) {
984 $self->throw( "Timed out while waiting for input.",
985 "Timeout period = $TIMEOUT_SECS seconds.\n"
986 ."For longer time before timing out, edit \$TIMEOUT_SECS in Bio::Root::Utilities.pm.");
988 } elsif(not $result) {
990 $self->throw("read taste failed to read from FileHandle.", $err);
992 } elsif($@
=~ /\S/) {
994 $self->throw("Unexpected error during read: $err");
997 seek($FH, 0, 0) or $self->throw("seek failed to seek 0 on FileHandle.");
999 my @chars = split(//, $buffer);
1002 for ($i = 0; $i <$BUFSIZ; $i++) {
1003 if (($chars[$i] eq "\012")) {
1004 unless ($chars[$i-1] eq "\015") {
1011 } elsif (($chars[$i] eq "\015") && ($chars[$i+1] eq "\012")) {
1013 $octal = "\015\012";
1017 } elsif (($chars[$i] eq "\015")) {
1026 $self->warn("Could not determine newline char. Using '\012'");
1029 #print STDERR "FLAVOR=$flavor, NEWLINE CHAR = $irs\n";
1036 Usage : $object->file_flavor( <filename> );
1037 Purpose : Returns the 'flavor' of a given file (unix, dos, mac)
1038 Example : print "$file has flavor: ", $Util->file_flavor($file);
1039 Argument : filename = string, full path name for file
1040 Returns : String describing flavor of file and handy info about line endings.
1041 : One of these is returned:
1042 : unix (\n or 012 or ^J)
1043 : dos (\r\n or 015,012 or ^M^J)
1044 : mac (\r or 015 or ^M)
1046 Throws : Exception if argument is not a file
1047 : Propagates any exceptions thrown by Bio::Root::Utilities::get_newline().
1049 See Also : L<get_newline()|get_newline>, L<taste_file()|taste_file>
1056 my ($self, $file) = @_;
1057 my %flavors=("\012" =>'unix (\n or 012 or ^J)',
1058 "\015\012" =>'dos (\r\n or 015,012 or ^M^J)',
1059 "\015" =>'mac (\r or 015 or ^M)'
1062 -f
$file or $self->throw("Could not determine flavor: arg '$file' is either non existant or is not a file.\n");
1063 my $octal = $self->get_newline($file);
1064 my $flavor = $flavors{$octal} || "unknown";
1068 ######################################
1069 ##### Mail Functions ########
1070 ######################################
1072 =head2 mail_authority
1074 Title : mail_authority
1075 Usage : $Util->mail_authority( $message )
1076 Purpose : Syntactic sugar to send email to $Bio::Root::Global::AUTHORITY
1078 See Also : L<send_mail()|send_mail>
1083 sub mail_authority
{
1085 my( $self, $message ) = @_;
1086 my $script = $self->untaint($0,1);
1088 my $email = $self->{'_auth_email'} || $AUTHORITY;
1089 if (defined $email) {
1090 $self->send_mail( -TO
=>$AUTHORITY, -SUBJ
=>$script, -MSG
=>$message);
1092 $self->throw("Can't email authority. No email defined.");
1099 Usage : $Util->authority('admin@example.com');
1100 Purpose : Set/get the email address that should be notified by mail_authority()
1102 See Also : L<mail_authority()|mail_authority>
1109 my( $self, $email ) = @_;
1110 $self->{'_auth_email'} = $email if defined $email;
1111 return $self->{'_auth_email'};
1118 Usage : $Util->send_mail( named_parameters )
1119 Purpose : Provides an interface to mail or sendmail, if available
1121 Argument : Named parameters: (case-insensitive)
1122 : -TO => e-mail address to send to
1123 : -SUBJ => subject for message (optional)
1124 : -MSG => message to be sent (optional)
1125 : -CC => cc: e-mail address (optional)
1126 Thows : Exception if TO: address appears bad or is missing.
1127 : Exception if mail cannot be sent.
1128 Comments : Based on TomC's tip at:
1129 : http://www.perl.com/CPAN/doc/FMTEYEWTK/safe_shellings
1131 : Using default 'From:' information.
1132 : sendmail options used:
1133 : -t: ignore the address given on the command line and
1134 : get To:address from the e-mail header.
1135 : -oi: prevents send_mail from ending the message if it
1136 : finds a period at the start of a line.
1138 See Also : L<mail_authority()|mail_authority>
1146 my( $self, @param) = @_;
1147 my($recipient,$subj,$message,$cc) = $self->_rearrange([qw(TO SUBJ MSG CC)],@param);
1149 $self->throw("Invalid or missing e-mail address: $recipient")
1150 if not $recipient =~ /\S+\@\S+/;
1152 $subj ||= 'empty subject'; $message ||= '';
1154 # Best to use mail rather than sendmail. Permissions on sendmail in
1155 # linux distros have been significantly locked down in recent years,
1156 # due to the perception that it is insecure.
1158 if ($exe = $self->find_exe('mail')) {
1162 $self->debug("send_mail: $exe -s '$subj' $ccinfo $recipient\n");
1163 open (MAIL
, "| $exe -s '$subj' $ccinfo $recipient") ||
1164 $self->throw("Can't send email: mail cannot fork: $!");
1165 print MAIL
<<QQ_EOFM_QQ;
1168 $? and $self->warn("mail didn't exit nicely: $?");
1170 } elsif ($exe = $self->find_exe('sendmail')) {
1171 open (SENDMAIL, "| $exe -oi -t") ||
1172 $self->throw("Can't send email: sendmail cannot fork: $!");
1173 print SENDMAIL <<QQ_EOFSM_QQ
;
1181 $?
and $self->warn("sendmail didn't exit nicely: $?");
1185 $self->throw("Can't find executable for mail or sendmail.");
1193 Usage : $Util->find_exe(name);
1194 Purpose : Locate an executable (for use in a system() call, e.g.))
1195 Example : $Util->find_exe("gzip");
1196 Returns : String containing executable that passes the -x test.
1197 Returns undef if an executable of the supplied name cannot be found.
1198 Argument : Name of executable to be found.
1199 : Can be a full path. If supplied name is not executable, an executable
1200 : of that name will be searched in all directories in the currently
1201 : defined PATH environment variable.
1202 Throws : No exceptions, but issues a warning if multiple paths are found
1203 : for a given name. The first one is used.
1204 Comments : TODO: Confirm functionality on all bioperl-supported platforms.
1205 May get tripped up by variation in path separator character used
1206 for splitting ENV{PATH}.
1214 my ($self, $name) = @_;
1216 if ($^O
=~ m/mswin/i) {
1217 @bindirs = split ';', $ENV{'PATH'};
1218 # Add usual executable extension if missing or -x won't work
1219 $name.= '.exe' if ($name !~ m/\.exe$/i);
1222 @bindirs = split ':', $ENV{'PATH'};
1228 foreach my $d (@bindirs) {
1229 # Note: Windows also understand '/' as folder separator,
1230 # so there is no need to use a conditional with '\'
1231 push(@exes, "$d/$name") if -x
"$d/$name";
1235 if (defined $exes[1]) {
1236 $self->warn("find_exe: Multiple paths to '$name' found. Using $exe.");
1244 ######################################
1245 ### Interactive Functions #####
1246 ######################################
1252 Usage : $Util->yes_reply( [query_string]);
1253 Purpose : To test an STDIN input value for affirmation.
1254 Example : print +( $Util->yes_reply('Are you ok') ? "great!\n" : "sorry.\n" );
1255 : $Util->yes_reply('Continue') || die;
1256 Returns : Boolean, true (1) if input string begins with 'y' or 'Y'
1257 Argument: query_string = string to be used to prompt user (optional)
1258 : If not provided, 'Yes or no' will be used.
1259 : Question mark is automatically appended.
1269 $query ||= 'Yes or no';
1270 print "\n$query? (y/n) [n] ";
1271 chomp( $reply = <STDIN
> );
1279 Title : request_data()
1280 Usage : $Util->request_data( [value_name]);
1281 Purpose : To request data from a user to be entered via keyboard (STDIN).
1282 Example : $name = $Util->request_data('Name');
1283 : # User will see: % Enter Name:
1284 Returns : String, (data entered from keyboard, sans terminal newline.)
1285 Argument: value_name = string to be used to prompt user.
1286 : If not provided, 'data' will be used, (not very helpful).
1287 : Question mark is automatically appended.
1295 my $data = shift || 'data';
1296 print "Enter $data: ";
1297 # Remove the terminal newline char.
1298 chomp($data = <STDIN
>);
1311 # Not much used since you can use request_data()
1312 # and test for an empty string.
1315 chop( $reply = <STDIN
> );
1320 =head2 verify_version
1322 Purpose : Checks the version of Perl used to invoke the script.
1323 : Aborts program if version is less than the given argument.
1324 Usage : verify_version('5.000')
1329 sub verify_version
{
1332 my $reqVersion = shift;
1334 $] < $reqVersion and do {
1335 printf STDERR
( "\a\n%s %0.3f.\n", "** Sorry. This Perl script requires at least version", $reqVersion);
1336 printf STDERR
( "%s %0.3f %s\n\n", "You are running Perl version", $], "Please update your Perl!\n\n" );