Merge pull request #42 from solgenomics/topic/duplicate_image_warning
[cxgn-corelibs.git] / lib / CXGN / Tools / File.pm
blobc0fa263e346321b135965a922234cb7651a07982
1 package CXGN::Tools::File;
2 use strict;
3 use warnings;
4 use POSIX;
5 use Carp;
7 #use UNIVERSAL qw/isa/;
9 use CXGN::Tools::Text;
10 use HTML::TreeBuilder;
12 =head1 NAME
14 CXGN::Tools::File - assorted lowish-level utilities for
15 working with, um, files and stuff.
17 =head1 FUNCTIONS
19 All functions below are EXPORT_OK.
21 =cut
23 BEGIN {
24 our @EXPORT_OK = qw/
25 read_commented_file
26 get_sections
27 create_zips_only
28 create_displays_only
29 create_thumbnails_displays_zips_for_psd_to_png
30 create_thumbnails_displays_zips
31 traverse_dir
32 count_file_lines
33 executable_is_in_path
34 size_changing
35 is_filehandle
38 our @EXPORT_OK;
39 use base qw/Exporter/;
41 =head2 read_commented_file
43 Usage: my $contents = read_commented_file('myfile.txt');
44 Desc : like file_contents, except removes any
45 lines with # as the first characterf
46 Args : filename
47 Ret : string containing file contents
48 Side Effects: dies if file not found or not readable
50 =cut
52 sub read_commented_file {
53 my ($filename) = @_;
54 open(my $FILE,'<', $filename) or return("Could not open file $filename: $!");
55 my $file_contents = "";
56 while (<$FILE>) {
57 next if /^\#/;
58 $file_contents .= $_;
60 return $file_contents;
63 #function to get sections of text from a file. sections are separated by single empty lines. used by index.pl.
64 sub get_sections {
65 my($filename,$number_of_sections_to_get)=@_;
66 my $content='';
67 my $FILE;
68 open($FILE,'<', $filename) or croak "Could not open file $filename: $!";
70 # the first line contains the number of sections to show
71 # and is of the format: COUNT <integer>
72 $number_of_sections_to_get = <$FILE>;
73 $number_of_sections_to_get =~ s/.*COUNT\s+(\d+).*/$1/i;
74 while($number_of_sections_to_get>0)
76 if(my $line=<$FILE>)
78 # ignore comment lines
79 if ($line =~ /^\#/) { next; }
80 if($line=~/^\s+$/)#if line is all whitespace, then it's a section divider, so we've completed another section
82 $number_of_sections_to_get--;
84 else#otherwise, we want this line
86 $content.=$line;
89 else
91 $number_of_sections_to_get=0;
94 close $FILE;
95 return $content;
98 sub get_div_sections {
99 my ($file, $num_sections, $skip_num) = @_;
100 $skip_num = 0 unless ($skip_num > 0);
101 $num_sections = 1 unless ($num_sections > 1);
102 my $root = HTML::TreeBuilder->new_from_file($file);
103 my ($body) = grep { $_->tag eq "body"} $root->content_list;
104 my (@div_children) = grep{$_->tag eq "div"} $body->content_list;
105 my ($lb, $rb) = ($skip_num, $num_sections + $skip_num - 1);
106 my @sections = @div_children[$lb..$rb];
107 my $content = "";
108 $content .= $_->as_HTML() foreach(@sections);
109 return ($content, scalar @sections, scalar @div_children);
113 =head2 create_zips_only
115 Desc:
116 Args:
117 Ret :
118 Side Effects:
119 Example:
121 =cut
123 sub create_zips_only($$$) {
124 my ($image_name, $source_dir, $dest_dir) = @_;
126 #parse file path to find path and name
127 my $tmp = $image_name;
128 $tmp =~ /($source_dir)(.*)(\/)(.*)(\.)(png|jpg|tiff|tif|gif|psd)$/i;
129 my $path = $2;
130 my $just_name = $4;
131 my $no_ext = "$1"."$2"."$3"."$4";
132 my $no_path = "$4"."$5"."$6";
133 my $ext = $6;
135 #create mirror directory structure for thumbnails and original image zipped
136 if ($path) {
137 system ("mkdirhier "."$dest_dir"."/zips"."$path");
139 else {
140 system ("mkdirhier "."$dest_dir"."/zips");
141 $path = "";
144 #zip and move original image
145 #zip new_name original
146 #print "zip "."$just_name"." $image_name -D\n";
147 #print "mv $just_name"."\."."zip"." $dest_dir"."/zips"."$path\n\n";
148 #system ("gzip "." $image_name"."$just_name\.zip");
149 #system ("mv $just_name"."\."."zip"." $dest_dir"."/zips"."$path");
151 #copy and gzip original image
152 system ("cp $image_name" . " $dest_dir" . "/zips"."$path");
153 system ("gzip " . "$dest_dir" . "/zips"."$path"."/$just_name"."\.$ext");
154 #system ("mv $dest_dir" . "/zips"."$path"."/$just_name"."\.$ext"."\.gz"." $dest_dir" . "/zips"."$path"."/$just_name"."\.$ext"."\.zip");
158 =head2 create_displays_only
160 Desc:
161 Args:
162 Ret :
163 Side Effects:
164 Example:
166 =cut
168 sub create_displays_only($$$) {
169 my ($image_name, $source_dir, $dest_dir) = @_;
171 #parse file path to find path and name
172 my $tmp = $image_name;
173 $tmp =~ /($source_dir)(.*)(\/.*)(\.)(png|jpg|tiff|tif|gif|psd)$/i;
174 my $path = $2;
175 my $just_name = $3;
176 my $ext = $5;
177 #print "path - $path\n";
178 #print "filename - $just_name\n";
180 #create mirror directory structure for thumbnails and original image zipped
181 if ($path) {
182 system ("mkdirhier "."$dest_dir"."/displays"."$path");
184 else {
185 system ("mkdirhier "."$dest_dir"."/displays");
186 $path = "";
189 #create display image in png format
190 system ("convert -geometry 440 -format png $image_name "."$dest_dir" . "/displays"."$path"."$just_name"."_display.png");
191 # system ("cp $image_name" . " $dest_dir" . "/displays"."$path"."$just_name"."_display"."\.$ext");
192 # if (($ext eq "tiff") || ($ext eq "tif")) {
193 # system (convert $);
194 # $ext = "png";
196 # system ("mogrify -geometry 440 -format png $dest_dir"."/displays"."$path"."$just_name"."_display"."\.$ext");
200 =head2 create_thumbnails_displays_zips_for_psd_to_png
202 Desc:
203 Args:
204 Ret :
205 Side Effects:
206 Example:
208 Legacy Documentation:
210 ----------------------------------------------------------------
211 INPUT:
212 directory path of the image to be processed
213 destination of thumbnail, display and zipped source files
214 OUTPUT:
215 thumbnails (standard-size and display sizes) and original
216 in png format with a mirroring original directory structure
217 testing: /auto/home/jenny/Phenotypic/Generated_Images
218 thumbnails will have the original name plus "_thumbnail"
219 display images will have the original name plus "_display"
220 zipped images will have the original name plus ".gz" in the extension
222 =cut
224 sub create_thumbnails_displays_zips_for_psd_to_png ($$$) {
225 print "entering create_thumbnails_displays_zips_for_psd_to_png... \n";
227 my ($image_name, $source_dir, $dest_dir) = @_;
228 #print "$image_name, \n$source_dir, \n$dest_dir \n\n";
230 #parse file path to find path and name
231 my $tmp = $image_name;
232 $tmp =~ /($source_dir)(.*)(\/.*)(\.)(png|jpg|tiff|tif|gif|psd)$/i;
233 my $path = $2;
234 my $just_name = $3;
236 my $ext = $5;
237 if ($ext eq "png") {
238 print "\n$image_name\n";
240 #print "path - $path\n";
241 #print "filename - $just_name\n";
243 #create thumbnail
244 # system ("convert -geometry 63x63! $image_name "."$dest_dir" . "/thumbnails" . "$path" . "$just_name"."_thumbnail.jpg");
245 #system ("convert -size 63x63 $image_name -resize 216x274 +profile \"*\" " . "$dest_dir" . "/thumbnails" . "$path" . "/$just_name"."_thumbnail.jpg");
247 #copy and gzip original image
248 # system ("cp $image_name" . " $dest_dir" . "/zips"."$path");
249 # system ("gzip " . "$dest_dir" . "/zips"."$path"."$just_name"."\.$ext");
251 #create display image in png format
252 system ("convert -geometry 440 $image_name "."$dest_dir" . "/displays"."$path"."$just_name"."_display.png");
253 #system ("mogrify -format png -resize 720x720! $image_name");
254 #system ("mv $source_dir"."$path"."$just_name"."\.png " . "$dest_dir" . "/displays"."$path"."/$just_name"."_display.png");
256 print "leaving create_thumbnails... \n\n";
260 =head2 create_thumbnails_displays_zips
262 Usage:
263 Desc :
264 Ret :
265 Args :
266 Side Effects:
267 Example:
269 Legacy documentation:
271 ----------------------------------------------------------------
272 INPUT:
273 directory path of the image to be processed
274 destination of thumbnail, display and zipped source files
275 OUTPUT:
276 thumbnails (standard-size and display sizes) and original
277 in png format with a mirroring original directory structure
278 testing: /auto/home/jenny/Phenotypic/Generated_Images
279 thumbnails will have the original name plus "_thumbnail"
280 display images will have the original name plus "_display"
281 zipped images will have the original name plus ".gz" in the extension
283 =cut
285 sub create_thumbnails_displays_zips ($$$) {
286 #print "entering create_thumbnails... \n";
288 my ($image_name, $source_dir, $dest_dir) = @_;
289 #print "$image_name, \n$source_dir, \n$dest_dir \n\n";
291 #parse file path to find path and name
292 my $tmp = $image_name;
293 $tmp =~ /($source_dir)(.*)(\/.*)(\.)(png|jpg|tiff|tif|gif|psd)$/i;
294 my $path = $2;
295 my $just_name = $3;
296 my $ext = $5;
297 #print "path - $path\n";
298 #print "filename - $just_name\n";
300 #create mirror directory structure for thumbnails and original image zipped
301 if ($path) {
302 system ("mkdirhier "."$dest_dir"."/thumbnails"."$path");
303 system ("mkdirhier "."$dest_dir"."/displays"."$path");
304 system ("mkdirhier "."$dest_dir"."/zips"."$path");
306 else {
307 system ("mkdirhier "."$dest_dir"."/thumbnails");
308 system ("mkdirhier "."$dest_dir"."/displays");
309 system ("mkdirhier "."$dest_dir"."/zips");
310 $path = "";
313 #create thumbnail
314 system ("convert -geometry 63x63! $image_name "."$dest_dir" . "/thumbnails" . "$path" . "$just_name"."_thumbnail.jpg");
315 #system ("convert -size 63x63 $image_name -resize 216x274 +profile \"*\" " . "$dest_dir" . "/thumbnails" . "$path" . "/$just_name"."_thumbnail.jpg");
317 #copy and gzip original image
318 system ("cp $image_name" . " $dest_dir" . "/zips"."$path");
319 system ("gzip " . "$dest_dir" . "/zips"."$path"."$just_name"."\.$ext");
321 #create display image in png format
322 system ("convert -geometry 440 $image_name "."$dest_dir" . "/displays"."$path"."$just_name"."_display.png");
323 #system ("mogrify -format png -resize 720x720! $image_name");
324 #system ("mv $source_dir"."$path"."$just_name"."\.png " . "$dest_dir" . "/displays"."$path"."/$just_name"."_display.png");
326 #print "leaving create_thumbnails... \n\n";
331 =head2 traverse_dir
333 THIS FUNCTION IS DEPRECATED. USE L<File::Find> for this. Do
334 'perldoc File::Find'
336 Usage:
337 Desc :
338 Ret :
339 Args :
340 Side Effects:
341 Example:
343 ----------------------------------------------------------------
344 Dan's function to recursively traverse a directory,
345 appends all file names (with full paths) to a list
347 =cut
349 sub traverse_dir ($) {
350 #my ($dirarg)= "/home/jenny/bin/";
351 #print "entering traverse_dir...\n<br />";
353 my ($dirarg)= @_;
354 opendir (THISDIR, $dirarg) or croak "Couldn't open directory $dirarg\n";
356 #skip any files starting with .
357 my @dir_list= grep !/^\./, readdir THISDIR;
358 #print "\ndirectory list: @dir_list\n<br />";
360 my ($filename, @filelist);
362 foreach $filename (@dir_list){
363 #print "filename: $filename\n<br />";
364 $filename=$dirarg.$filename;
365 if(-d $filename){
366 $filename.='/';
367 push @filelist, &traverse_dir($filename);
369 else{
370 #print "adding $filename\n<br />";
371 push @filelist, $filename;
375 #print "leaving traverse_dir...\n<br />";
376 return @filelist;
380 =head2 count_file_lines
382 Desc: count the number of lines in a file, in pure perl
383 Args: the file name
384 Ret : the number of lines in the file
385 Side Effects: dies if the file can't be opened
386 Example:
388 my $lines = count_file_lines('my_lame_file.txt');
389 print "There are $lines lines in that file, yo.\n";
391 =cut
393 sub count_file_lines {
394 my $filename = shift;
396 my $pagesize = POSIX::sysconf(&POSIX::_SC_PAGESIZE);
397 my $lines = 0;
398 open(my $bleh, $filename) or croak "Can't open `$filename': $!";
399 while (sysread $bleh, my $buffer, $pagesize) {
400 $lines += ($buffer =~ tr/\n//);
402 close $bleh;
403 return $lines;
406 =head2 executable_is_in_path
408 Usage: print 'we have cross_match' if executable_is_in_path('cross_match');
409 Desc : figure out if an executable with the given name is in our execution path
410 Ret : 1 if there is an executable by that name, 0 otherwise
411 Args : name of an executable
412 Side Effects: runs 'which'
413 Example:
415 =cut
417 sub executable_is_in_path($) {
418 my $executable = shift;
419 `which $executable` or return 0;
420 return 1;
424 =head2 size_changing
426 Usage: print "changing!" if size_changing('myfile.txt');
427 Desc : given a filename, look at the file over a X-second window
428 of time and see if its size is changing
429 Ret : 1 if it is changing, undef if not
430 Args : filename,
431 (optional) time in seconds to sleep between looks
432 default 5 seconds.
433 Side Effects: stats a file, sleeps, stats it again
435 =cut
437 sub size_changing {
438 my $filename = shift;
439 my $sleeptime = shift || 5;
440 -f $filename or croak "$filename is not a file!\n";
442 my $begin_time = time;
443 my @begin_stat = stat $filename
444 or croak "Could not stat $filename: $!\n";
446 sleep $sleeptime;
448 return $begin_stat[7] != (stat $filename)[7];
451 =head2 is_filehandle
453 Usage: print "it's a filehandle" if is_filehandle($my_thing);
454 Desc : check whether the given thing is usable as a filehandle.
455 I put this in a module cause a filehandle might be either
456 a GLOB or isa IO::Handle or isa Apache::Upload
457 Ret : true if it is a filehandle, false otherwise
458 Args : a single thing
459 Side Effects: none
461 =cut
463 sub is_filehandle {
464 my ($thing) = @_;
465 return ref($thing) eq 'IO::Handle' || ref($thing) eq 'Apache::Upload' || ref($thing) eq 'GLOB';
470 1;#do not remove