new file: cell2loc.py
[GalaxyCodeBases.git] / perl / bsvf / lib / Data / Dump.pm
blobb5564f1856ea91bad2f9ee5951812e9bf9b69667
1 package Data::Dump;
3 use strict;
4 use vars qw(@EXPORT @EXPORT_OK $VERSION $DEBUG);
5 use subs qq(dump);
7 require Exporter;
8 *import = \&Exporter::import;
9 @EXPORT = qw(dd ddx);
10 @EXPORT_OK = qw(dump pp dumpf quote);
12 $VERSION = "1.22";
13 $DEBUG = 0;
15 use overload ();
16 use vars qw(%seen %refcnt @dump @fixup %require $TRY_BASE64 @FILTERS $INDENT);
18 $TRY_BASE64 = 50 unless defined $TRY_BASE64;
19 $INDENT = " " unless defined $INDENT;
21 sub dump
23 local %seen;
24 local %refcnt;
25 local %require;
26 local @fixup;
28 require Data::Dump::FilterContext if @FILTERS;
30 my $name = "a";
31 my @dump;
33 for my $v (@_) {
34 my $val = _dump($v, $name, [], tied($v));
35 push(@dump, [$name, $val]);
36 } continue {
37 $name++;
40 my $out = "";
41 if (%require) {
42 for (sort keys %require) {
43 $out .= "require $_;\n";
46 if (%refcnt) {
47 # output all those with refcounts first
48 for (@dump) {
49 my $name = $_->[0];
50 if ($refcnt{$name}) {
51 $out .= "my \$$name = $_->[1];\n";
52 undef $_->[1];
55 for (@fixup) {
56 $out .= "$_;\n";
60 my $paren = (@dump != 1);
61 $out .= "(" if $paren;
62 $out .= format_list($paren, undef,
63 map {defined($_->[1]) ? $_->[1] : "\$".$_->[0]}
64 @dump
66 $out .= ")" if $paren;
68 if (%refcnt || %require) {
69 $out .= ";\n";
70 $out =~ s/^/$INDENT/gm;
71 $out = "do {\n$out}";
74 print STDERR "$out\n" unless defined wantarray;
75 $out;
78 *pp = \&dump;
80 sub dd {
81 print dump(@_), "\n";
84 sub ddx {
85 my(undef, $file, $line) = caller;
86 $file =~ s,.*[\\/],,;
87 my $out = "$file:$line: " . dump(@_) . "\n";
88 $out =~ s/^/# /gm;
89 print $out;
92 sub dumpf {
93 require Data::Dump::Filtered;
94 goto &Data::Dump::Filtered::dump_filtered;
97 sub _dump
99 my $ref = ref $_[0];
100 my $rval = $ref ? $_[0] : \$_[0];
101 shift;
103 my($name, $idx, $dont_remember, $pclass, $pidx) = @_;
105 my($class, $type, $id);
106 my $strval = overload::StrVal($rval);
107 # Parse $strval without using regexps, in order not to clobber $1, $2,...
108 if ((my $i = rindex($strval, "=")) >= 0) {
109 $class = substr($strval, 0, $i);
110 $strval = substr($strval, $i+1);
112 if ((my $i = index($strval, "(0x")) >= 0) {
113 $type = substr($strval, 0, $i);
114 $id = substr($strval, $i + 2, -1);
116 else {
117 die "Can't parse " . overload::StrVal($rval);
119 if ($] < 5.008 && $type eq "SCALAR") {
120 $type = "REF" if $ref eq "REF";
122 warn "\$$name(@$idx) $class $type $id ($ref)" if $DEBUG;
124 my $out;
125 my $comment;
126 my $hide_keys;
127 if (@FILTERS) {
128 my $pself = "";
129 $pself = fullname("self", [@$idx[$pidx..(@$idx - 1)]]) if $pclass;
130 my $ctx = Data::Dump::FilterContext->new($rval, $class, $type, $ref, $pclass, $pidx, $idx);
131 my @bless;
132 for my $filter (@FILTERS) {
133 if (my $f = $filter->($ctx, $rval)) {
134 if (my $v = $f->{object}) {
135 local @FILTERS;
136 $out = _dump($v, $name, $idx, 1);
137 $dont_remember++;
139 if (defined(my $c = $f->{bless})) {
140 push(@bless, $c);
142 if (my $c = $f->{comment}) {
143 $comment = $c;
145 if (defined(my $c = $f->{dump})) {
146 $out = $c;
147 $dont_remember++;
149 if (my $h = $f->{hide_keys}) {
150 if (ref($h) eq "ARRAY") {
151 $hide_keys = sub {
152 for my $k (@$h) {
153 return 1 if $k eq $_[0];
155 return 0;
161 push(@bless, "") if defined($out) && !@bless;
162 if (@bless) {
163 $class = shift(@bless);
164 warn "More than one filter callback tried to bless object" if @bless;
168 unless ($dont_remember) {
169 if (my $s = $seen{$id}) {
170 my($sname, $sidx) = @$s;
171 $refcnt{$sname}++;
172 my $sref = fullname($sname, $sidx,
173 ($ref && $type eq "SCALAR"));
174 warn "SEEN: [\$$name(@$idx)] => [\$$sname(@$sidx)] ($ref,$sref)" if $DEBUG;
175 return $sref unless $sname eq $name;
176 $refcnt{$name}++;
177 push(@fixup, fullname($name,$idx)." = $sref");
178 return "do{my \$fix}" if @$idx && $idx->[-1] eq '$';
179 return "'fix'";
181 $seen{$id} = [$name, $idx];
184 if ($class) {
185 $pclass = $class;
186 $pidx = @$idx;
189 if (defined $out) {
190 # keep it
192 elsif ($type eq "SCALAR" || $type eq "REF" || $type eq "REGEXP") {
193 if ($ref) {
194 if ($class && $class eq "Regexp") {
195 my $v = "$rval";
197 my $mod = "";
198 if ($v =~ /^\(\?\^?([msix-]*):([\x00-\xFF]*)\)\z/) {
199 $mod = $1;
200 $v = $2;
201 $mod =~ s/-.*//;
204 my $sep = '/';
205 my $sep_count = ($v =~ tr/\///);
206 if ($sep_count) {
207 # see if we can find a better one
208 for ('|', ',', ':', '#') {
209 my $c = eval "\$v =~ tr/\Q$_\E//";
210 #print "SEP $_ $c $sep_count\n";
211 if ($c < $sep_count) {
212 $sep = $_;
213 $sep_count = $c;
214 last if $sep_count == 0;
218 $v =~ s/\Q$sep\E/\\$sep/g;
220 $out = "qr$sep$v$sep$mod";
221 undef($class);
223 else {
224 delete $seen{$id} if $type eq "SCALAR"; # will be seen again shortly
225 my $val = _dump($$rval, $name, [@$idx, "\$"], 0, $pclass, $pidx);
226 $out = $class ? "do{\\(my \$o = $val)}" : "\\$val";
228 } else {
229 if (!defined $$rval) {
230 $out = "undef";
232 elsif (do {no warnings 'numeric'; $$rval + 0 eq $$rval}) {
233 $out = $$rval;
235 else {
236 $out = str($$rval);
238 if ($class && !@$idx) {
239 # Top is an object, not a reference to one as perl needs
240 $refcnt{$name}++;
241 my $obj = fullname($name, $idx);
242 my $cl = quote($class);
243 push(@fixup, "bless \\$obj, $cl");
247 elsif ($type eq "GLOB") {
248 if ($ref) {
249 delete $seen{$id};
250 my $val = _dump($$rval, $name, [@$idx, "*"], 0, $pclass, $pidx);
251 $out = "\\$val";
252 if ($out =~ /^\\\*Symbol::/) {
253 $require{Symbol}++;
254 $out = "Symbol::gensym()";
256 } else {
257 my $val = "$$rval";
258 $out = "$$rval";
260 for my $k (qw(SCALAR ARRAY HASH)) {
261 my $gval = *$$rval{$k};
262 next unless defined $gval;
263 next if $k eq "SCALAR" && ! defined $$gval; # always there
264 my $f = scalar @fixup;
265 push(@fixup, "RESERVED"); # overwritten after _dump() below
266 $gval = _dump($gval, $name, [@$idx, "*{$k}"], 0, $pclass, $pidx);
267 $refcnt{$name}++;
268 my $gname = fullname($name, $idx);
269 $fixup[$f] = "$gname = $gval"; #XXX indent $gval
273 elsif ($type eq "ARRAY") {
274 my @vals;
275 my $tied = tied_str(tied(@$rval));
276 my $i = 0;
277 for my $v (@$rval) {
278 push(@vals, _dump($v, $name, [@$idx, "[$i]"], $tied, $pclass, $pidx));
279 $i++;
281 $out = "[" . format_list(1, $tied, @vals) . "]";
283 elsif ($type eq "HASH") {
284 my(@keys, @vals);
285 my $tied = tied_str(tied(%$rval));
287 # statistics to determine variation in key lengths
288 my $kstat_max = 0;
289 my $kstat_sum = 0;
290 my $kstat_sum2 = 0;
292 my @orig_keys = keys %$rval;
293 if ($hide_keys) {
294 @orig_keys = grep !$hide_keys->($_), @orig_keys;
296 my $text_keys = 0;
297 for (@orig_keys) {
298 $text_keys++, last unless /^[-+]?(?:0|[1-9]\d*)(?:\.\d+)?\z/;
301 if ($text_keys) {
302 @orig_keys = sort { lc($a) cmp lc($b) } @orig_keys;
304 else {
305 @orig_keys = sort { $a <=> $b } @orig_keys;
308 my $quote;
309 for my $key (@orig_keys) {
310 next if $key =~ /^-?[a-zA-Z_]\w*\z/;
311 next if $key =~ /^-?[1-9]\d{0,8}\z/;
312 $quote++;
313 last;
316 for my $key (@orig_keys) {
317 my $val = \$rval->{$key}; # capture value before we modify $key
318 $key = quote($key) if $quote;
319 $kstat_max = length($key) if length($key) > $kstat_max;
320 $kstat_sum += length($key);
321 $kstat_sum2 += length($key)*length($key);
323 push(@keys, $key);
324 push(@vals, _dump($$val, $name, [@$idx, "{$key}"], $tied, $pclass, $pidx));
326 my $nl = "";
327 my $klen_pad = 0;
328 my $tmp = "@keys @vals";
329 if (length($tmp) > 60 || $tmp =~ /\n/ || $tied) {
330 $nl = "\n";
332 # Determine what padding to add
333 if ($kstat_max < 4) {
334 $klen_pad = $kstat_max;
336 elsif (@keys >= 2) {
337 my $n = @keys;
338 my $avg = $kstat_sum/$n;
339 my $stddev = sqrt(($kstat_sum2 - $n * $avg * $avg) / ($n - 1));
341 # I am not actually very happy with this heuristics
342 if ($stddev / $kstat_max < 0.25) {
343 $klen_pad = $kstat_max;
345 if ($DEBUG) {
346 push(@keys, "__S");
347 push(@vals, sprintf("%.2f (%d/%.1f/%.1f)",
348 $stddev / $kstat_max,
349 $kstat_max, $avg, $stddev));
353 $out = "{$nl";
354 $out .= "$INDENT# $tied$nl" if $tied;
355 while (@keys) {
356 my $key = shift @keys;
357 my $val = shift @vals;
358 my $vpad = $INDENT . (" " x ($klen_pad ? $klen_pad + 4 : 0));
359 $val =~ s/\n/\n$vpad/gm;
360 my $kpad = $nl ? $INDENT : " ";
361 $key .= " " x ($klen_pad - length($key)) if $nl;
362 $out .= "$kpad$key => $val,$nl";
364 $out =~ s/,$/ / unless $nl;
365 $out .= "}";
367 elsif ($type eq "CODE") {
368 $out = 'sub { ... }';
370 elsif ($type eq "VSTRING") {
371 $out = sprintf +($ref ? '\v%vd' : 'v%vd'), $$rval;
373 else {
374 warn "Can't handle $type data";
375 $out = "'#$type#'";
378 if ($class && $ref) {
379 $out = "bless($out, " . quote($class) . ")";
381 if ($comment) {
382 $comment =~ s/^/# /gm;
383 $comment .= "\n" unless $comment =~ /\n\z/;
384 $comment =~ s/^#[ \t]+\n/\n/;
385 $out = "$comment$out";
387 return $out;
390 sub tied_str {
391 my $tied = shift;
392 if ($tied) {
393 if (my $tied_ref = ref($tied)) {
394 $tied = "tied $tied_ref";
396 else {
397 $tied = "tied";
400 return $tied;
403 sub fullname
405 my($name, $idx, $ref) = @_;
406 substr($name, 0, 0) = "\$";
408 my @i = @$idx; # need copy in order to not modify @$idx
409 if ($ref && @i && $i[0] eq "\$") {
410 shift(@i); # remove one deref
411 $ref = 0;
413 while (@i && $i[0] eq "\$") {
414 shift @i;
415 $name = "\$$name";
418 my $last_was_index;
419 for my $i (@i) {
420 if ($i eq "*" || $i eq "\$") {
421 $last_was_index = 0;
422 $name = "$i\{$name}";
423 } elsif ($i =~ s/^\*//) {
424 $name .= $i;
425 $last_was_index++;
426 } else {
427 $name .= "->" unless $last_was_index++;
428 $name .= $i;
431 $name = "\\$name" if $ref;
432 $name;
435 sub format_list
437 my $paren = shift;
438 my $comment = shift;
439 my $indent_lim = $paren ? 0 : 1;
440 if (@_ > 3) {
441 # can we use range operator to shorten the list?
442 my $i = 0;
443 while ($i < @_) {
444 my $j = $i + 1;
445 my $v = $_[$i];
446 while ($j < @_) {
447 # XXX allow string increment too?
448 if ($v eq "0" || $v =~ /^-?[1-9]\d{0,9}\z/) {
449 $v++;
451 elsif ($v =~ /^"([A-Za-z]{1,3}\d*)"\z/) {
452 $v = $1;
453 $v++;
454 $v = qq("$v");
456 else {
457 last;
459 last if $_[$j] ne $v;
460 $j++;
462 if ($j - $i > 3) {
463 splice(@_, $i, $j - $i, "$_[$i] .. $_[$j-1]");
465 $i++;
468 my $tmp = "@_";
469 if ($comment || (@_ > $indent_lim && (length($tmp) > 60 || $tmp =~ /\n/))) {
470 my @elem = @_;
471 for (@elem) { s/^/$INDENT/gm; }
472 return "\n" . ($comment ? "$INDENT# $comment\n" : "") .
473 join(",\n", @elem, "");
474 } else {
475 return join(", ", @_);
479 sub str {
480 if (length($_[0]) > 20) {
481 for ($_[0]) {
482 # Check for repeated string
483 if (/^(.)\1\1\1/s) {
484 # seems to be a repating sequence, let's check if it really is
485 # without backtracking
486 unless (/[^\Q$1\E]/) {
487 my $base = quote($1);
488 my $repeat = length;
489 return "($base x $repeat)"
492 # Length protection because the RE engine will blow the stack [RT#33520]
493 if (length($_) < 16 * 1024 && /^(.{2,5}?)\1*\z/s) {
494 my $base = quote($1);
495 my $repeat = length($_)/length($1);
496 return "($base x $repeat)";
501 local $_ = &quote;
503 if (length($_) > 40 && !/\\x\{/ && length($_) > (length($_[0]) * 2)) {
504 # too much binary data, better to represent as a hex/base64 string
506 # Base64 is more compact than hex when string is longer than
507 # 17 bytes (not counting any require statement needed).
508 # But on the other hand, hex is much more readable.
509 if ($TRY_BASE64 && length($_[0]) > $TRY_BASE64 &&
510 (defined &utf8::is_utf8 && !utf8::is_utf8($_[0])) &&
511 eval { require MIME::Base64 })
513 $require{"MIME::Base64"}++;
514 return "MIME::Base64::decode(\"" .
515 MIME::Base64::encode($_[0],"") .
516 "\")";
518 return "pack(\"H*\",\"" . unpack("H*", $_[0]) . "\")";
521 return $_;
524 my %esc = (
525 "\a" => "\\a",
526 "\b" => "\\b",
527 "\t" => "\\t",
528 "\n" => "\\n",
529 "\f" => "\\f",
530 "\r" => "\\r",
531 "\e" => "\\e",
534 # put a string value in double quotes
535 sub quote {
536 local($_) = $_[0];
537 # If there are many '"' we might want to use qq() instead
538 s/([\\\"\@\$])/\\$1/g;
539 return qq("$_") unless /[^\040-\176]/; # fast exit
541 s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
543 # no need for 3 digits in escape for these
544 s/([\0-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
546 s/([\0-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
547 s/([^\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
549 return qq("$_");
554 __END__
556 =head1 NAME
558 Data::Dump - Pretty printing of data structures
560 =head1 SYNOPSIS
562 use Data::Dump qw(dump);
564 $str = dump(@list);
565 @copy_of_list = eval $str;
567 # or use it for easy debug printout
568 use Data::Dump; dd localtime;
570 =head1 DESCRIPTION
572 This module provide a few functions that traverse their
573 argument and produces a string as its result. The string contains
574 Perl code that, when C<eval>ed, produces a deep copy of the original
575 arguments.
577 The main feature of the module is that it strives to produce output
578 that is easy to read. Example:
580 @a = (1, [2, 3], {4 => 5});
581 dump(@a);
583 Produces:
585 "(1, [2, 3], { 4 => 5 })"
587 If you dump just a little data, it is output on a single line. If
588 you dump data that is more complex or there is a lot of it, line breaks
589 are automatically added to keep it easy to read.
591 The following functions are provided (only the dd* functions are exported by default):
593 =over
595 =item dump( ... )
597 =item pp( ... )
599 Returns a string containing a Perl expression. If you pass this
600 string to Perl's built-in eval() function it should return a copy of
601 the arguments you passed to dump().
603 If you call the function with multiple arguments then the output will
604 be wrapped in parenthesis "( ..., ... )". If you call the function with a
605 single argument the output will not have the wrapping. If you call the function with
606 a single scalar (non-reference) argument it will just return the
607 scalar quoted if needed, but never break it into multiple lines. If you
608 pass multiple arguments or references to arrays of hashes then the
609 return value might contain line breaks to format it for easier
610 reading. The returned string will never be "\n" terminated, even if
611 contains multiple lines. This allows code like this to place the
612 semicolon in the expected place:
614 print '$obj = ', dump($obj), ";\n";
616 If dump() is called in void context, then the dump is printed on
617 STDERR and then "\n" terminated. You might find this useful for quick
618 debug printouts, but the dd*() functions might be better alternatives
619 for this.
621 There is no difference between dump() and pp(), except that dump()
622 shares its name with a not-so-useful perl builtin. Because of this
623 some might want to avoid using that name.
625 =item quote( $string )
627 Returns a quoted version of the provided string.
629 It differs from C<dump($string)> in that it will quote even numbers and
630 not try to come up with clever expressions that might shorten the
631 output. If a non-scalar argument is provided then it's just stringified
632 instead of traversed.
634 =item dd( ... )
636 =item ddx( ... )
638 These functions will call dump() on their argument and print the
639 result to STDOUT (actually, it's the currently selected output handle, but
640 STDOUT is the default for that).
642 The difference between them is only that ddx() will prefix the lines
643 it prints with "# " and mark the first line with the file and line
644 number where it was called. This is meant to be useful for debug
645 printouts of state within programs.
647 =item dumpf( ..., \&filter )
649 Short hand for calling the dump_filtered() function of L<Data::Dump::Filtered>.
650 This works like dump(), but the last argument should be a filter callback
651 function. As objects are visited the filter callback is invoked and it
652 can modify how the objects are dumped.
654 =back
656 =head1 CONFIGURATION
658 There are a few global variables that can be set to modify the output
659 generated by the dump functions. It's wise to localize the setting of
660 these.
662 =over
664 =item $Data::Dump::INDENT
666 This holds the string that's used for indenting multiline data structures.
667 It's default value is " " (two spaces). Set it to "" to suppress indentation.
668 Setting it to "| " makes for nice visuals even if the dump output then fails to
669 be valid Perl.
671 =item $Data::Dump::TRY_BASE64
673 How long must a binary string be before we try to use the base64 encoding
674 for the dump output. The default is 50. Set it to 0 to disable base64 dumps.
676 =back
679 =head1 LIMITATIONS
681 Code references will be dumped as C<< sub { ... } >>. Thus, C<eval>ing them will
682 not reproduce the original routine. The C<...>-operator used will also require
683 perl-5.12 or better to be evaled.
685 If you forget to explicitly import the C<dump> function, your code will
686 core dump. That's because you just called the builtin C<dump> function
687 by accident, which intentionally dumps core. Because of this you can
688 also import the same function as C<pp>, mnemonic for "pretty-print".
690 =head1 HISTORY
692 The C<Data::Dump> module grew out of frustration with Sarathy's
693 in-most-cases-excellent C<Data::Dumper>. Basic ideas and some code
694 are shared with Sarathy's module.
696 The C<Data::Dump> module provides a much simpler interface than
697 C<Data::Dumper>. No OO interface is available and there are fewer
698 configuration options to worry about. The other benefit is
699 that the dump produced does not try to set any variables. It only
700 returns what is needed to produce a copy of the arguments. This means
701 that C<dump("foo")> simply returns C<'"foo"'>, and C<dump(1..3)> simply
702 returns C<'(1, 2, 3)'>.
704 =head1 SEE ALSO
706 L<Data::Dump::Filtered>, L<Data::Dump::Trace>, L<Data::Dumper>, L<JSON>,
707 L<Storable>
709 =head1 AUTHORS
711 The C<Data::Dump> module is written by Gisle Aas <gisle@aas.no>, based
712 on C<Data::Dumper> by Gurusamy Sarathy <gsar@umich.edu>.
714 Copyright 1998-2010 Gisle Aas.
715 Copyright 1996-1998 Gurusamy Sarathy.
717 This library is free software; you can redistribute it and/or
718 modify it under the same terms as Perl itself.
720 =cut