modified: n.fq
[GalaxyCodeBases.git] / funny / gz.pl
blobe81d6564bb0723645fee6160fe47775f808869c6
1 #!/usr/bin/perl -w
2 use strict;
3 use warnings;
4 #use IO::Handle;
5 use IPC::Open2;
6 use POSIX ":sys_wait_h";
8 =pod
9 http://tools.ietf.org/html/rfc1951#section-3.2
10 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286)
11 5 Bits: HDIST, # of Distance codes - 1 (1 - 32)
12 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19)
13 ...
14 e3 02 00 for "\n";
16 ? bytes compressed data
17 4 bytes crc32
18 4 bytes uncompressed input size modulo 2^32
19 =cut
20 my $minGZ=10; # (5+5+4)b + 8 byte
21 sub Compress($$) {
22 my ($Str,$Bin)=@_;
23 #$Str=v0 if $Str eq '';
24 if (length $Str <= $minGZ) {
25 $_[1] = $Str;
26 return 0;
28 my ($pid,$chld_out, $chld_in);
29 my $sleep_count = 0;
30 do {
31 $pid = open2($chld_out, $chld_in, 'gzip','-6nc');
32 unless (defined $pid) {
33 warn "[!]Cannot fork gzip: $!";
34 die "[x]Bailing out" if $sleep_count++ > 6;
35 sleep 10;
37 } until defined $pid;
38 print $chld_in $Str;
39 close $chld_in;
40 local $/; # enable "slurp" mode
41 $Bin=<$chld_out>;
42 close $chld_out;
43 $_[1] = substr $Bin,10;
44 #waitpid $pid, 0;
45 return $pid;
47 my $GZHeader = v31.139.8.0.0.0.0.0.0.3; # -9 => 2.3; -2~8 => 0.3
48 #cat t.pl|while read a;do echo "$a"|gzip -6nc|hexdump -C;done
49 #00000000 1f 8b 08 00 00 00 00 00 00 03 e3 02 00 93 06 d7
50 sub deCompress($$) {
51 my ($Bin,$Str)=@_;
52 if (length $Bin <= $minGZ) {
53 $_[1] = $Bin;
54 return 0;
56 my ($pid,$chld_out, $chld_in);
57 my $sleep_count = 0;
58 do {
59 $pid = open2($chld_out, $chld_in, 'gzip','-dc');
60 unless (defined $pid) {
61 warn "[!]Cannot fork gzip: $!";
62 die "[x]Bailing out" if $sleep_count++ > 6;
63 sleep 10;
65 } until defined $pid;
66 print $chld_in $GZHeader,$Bin;
67 close $chld_in;
68 local $/; # enable "slurp" mode
69 $Str=<$chld_out>;
70 close $chld_out;
71 $_[1] = $Str;
72 #waitpid $pid, 0;
73 return $pid;
75 sub waitGout() {
76 my $kid;
77 do {
78 $kid = waitpid(-1, WNOHANG);
79 #warn $kid;
80 } while $kid > 0;
82 =pod
83 $ man waitpid
85 pid_t waitpid(pid_t pid, int *status, int options);
86 The value of pid can be:
88 < -1 meaning wait for any child process whose process group ID is
89 equal to the absolute value of pid.
91 -1 meaning wait for any child process.
93 0 meaning wait for any child process whose process group ID is
94 equal to that of the calling process.
96 > 0 meaning wait for the child whose process ID is equal to the
97 value of pid.
99 The value of options is an OR of zero or more of the following conĀ©\
100 stants:
102 WNOHANG return immediately if no child has exited.
104 WUNTRACED also return if a child has stopped (but not traced via
105 ptrace(2)). Status for traced children which have stopped
106 is provided even if this option is not specified.
108 WCONTINUED (since Linux 2.6.10)
109 also return if a stopped child has been resumed by delivery
110 of SIGCONT.
111 =cut
113 my ($ttt,$aaa);
114 my($chld_out, $chld_in);
115 #$|=1;
116 my ($a,$b,$c,$sumA,$sumB);
117 open IN,'<',$ARGV[0] or die "$!";
118 #my $i=1;
119 while (<IN>) {
120 chomp;
121 $a=length($_);
122 Compress($_,$ttt);
123 $b=length $ttt;
124 deCompress($ttt,$aaa);
125 $c=length $aaa;
126 chomp $aaa;
127 print int(1000*$b/$a)/10," %\t$a\t$b\t$c,[$aaa]\n" if $a > 0;
128 $sumA += $a; $sumB += $b;
129 #waitpid(-1,0);
130 #waitGout() unless $i % 100;
131 #++$i;
132 waitGout();
133 #waitpid(-1,0);
134 #waitpid(-1,0);
136 close IN;
137 print '-'x75,"\n",int(10000*$sumB/$sumA)/100," %\t$sumA\t$sumB\n";
139 __END__