stdfilt to access child process pid
[hband-tools.git] / user-tools / tabularize
blob86b12ecdd07e2eda27c4fdcb89d07610ce3b5fa6
1 #!/usr/bin/env perl
3 =pod
5 =head1 NAME
7 tabularize - Takes TAB-delimited lines of text and outputs formatted table.
9 =head1 SYNOPSIS
11 I<COMMAND> | tabularize [I<OPTIONS>]
13 =head1 OPTIONS
15 =over 4
17 =item -a, --ascii
19 7-bit ascii borders
21 =item -u, --unicode
23 borders with nice graphical chars
25 =item -H, --no-horizontal
27 no horizontal lines in the output
29 =item -M, --no-margins
31 no margins, ie. no right-most and left-most vertical borders
33 =item -p, --padding I<NUM>
35 add space padding in cells.
36 I<NUM> is how many spaces.
38 =item -v, --output-vertical-separator I<CHAR>
40 vertical separator character(s) in the output
42 =item -r, --align-right I<NUM>
44 align these columns (0-indexed) to the right,
45 others are auto-detected and if they seem to hold mostly numeric data,
46 then aligned to the right;
47 otherwise to the left.
48 this option is repeatable.
50 =item -l, --align-left I<NUM>
52 similar to --align-right option
54 =back
56 =head1 ENVIRONMENT
58 =over 4
60 =item PAGER
62 If B<$PAGER> is set and standard output is a terminal
63 and the resulting table is wider than the terminal,
64 then pipe the table through B<$PAGER>.
66 =back
68 =head1 SEE ALSO
70 column(1), untabularize(1)
72 =cut
75 use Data::Dumper;
76 use Date::Parse;
77 use DateTime::Format::Strptime;
78 use Encode qw/decode encode decode_utf8 encode_utf8/;
79 use Getopt::Long qw/:config no_ignore_case bundling no_getopt_compat no_auto_abbrev require_order/;
80 use IPC::Run qw/run/;
81 use List::MoreUtils qw/all any none zip/;
82 use Pod::Usage;
83 use Term::Size;
84 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
87 sub set_ascii
89 $verticalBorder = '|';
90 $verticalBar = '|';
91 $horizontalBar = '-';
92 $topleftCorner = '+';
93 $toprightCorner = '+';
94 $bottomleftCorner = '+';
95 $bottomrightCorner = '+';
96 $leftCross = '+';
97 $middleCross = '+';
98 $rightCross = '+';
99 $topCross = '+';
100 $bottomCross = '+';
103 sub set_unicode
105 $verticalBorder = '│';
106 $verticalBar = '│';
107 $horizontalBar = '─';
108 $topleftCorner = '┌';
109 $toprightCorner = '┐';
110 $bottomleftCorner = '└';
111 $bottomrightCorner = '┘';
112 $leftCross = '├';
113 $middleCross = '┼';
114 $rightCross = '┤';
115 $topCross = '┬';
116 $bottomCross = '┴';
119 set_unicode;
121 use constant { ALIGN_LEFT => 1, ALIGN_RIGHT =>2, };
123 $OptHorizontal = 1;
124 $OptMargins = 1;
125 $OptPadding = 0;
126 @columnsAlignment = ();
128 GetOptions(
129 'a|ascii' => \&set_ascii,
130 'u|unicode' => \&set_unicode,
131 'H|no-horizontal' => sub { $OptHorizontal = 0; },
132 'M|no-margins' => sub { $OptMargins = 0; },
133 'p|padding=i' => \$OptPadding,
134 'v|output-vertical-separator=s' => sub {
135 my ($getopt_obj, $param) = @_;
136 $verticalBorder = $param;
137 $verticalBar = $param;
138 $topleftCorner = $param;
139 $toprightCorner = $param;
140 $bottomleftCorner = $param;
141 $bottomrightCorner = $param;
142 $leftCross = $param;
143 $middleCross = $param;
144 $rightCross = $param;
145 $topCross = $param;
146 $bottomCross = $param;
148 'r|align-right=i@' => sub {
149 my ($getopt_obj, $param) = @_;
150 $columnsAlignment[$param] = ALIGN_RIGHT;
152 'l|align-left=i@' => sub {
153 my ($getopt_obj, $param) = @_;
154 $columnsAlignment[$param] = ALIGN_LEFT;
156 'help' => sub { pod2usage(-exitval=>0, -verbose=>99); },
157 '<>' => sub { unshift @ARGV, @_[0]; die '!FINISH'; },
158 ) or pod2usage(-exitval=>2, -verbose=>99);
162 sub is_numeric
164 # consider a cell's value numeric if it has only
165 # - optional sign prefix,
166 # - numbers,
167 # - and optionally a common thousands- and/or fraction separator.
168 # I don't like the anglo-saxon ".123" notation, missing leading zero before the fraction separator.
169 local $_ = shift;
170 /^[+-]?\d+((?'thousands_sep'[,. ])\d+(\g{thousands_sep}\d+)*)?([.,]\d+(\g{thousands_sep}\d+)*)?$/ and return 1;
171 return 0;
175 # compute the width of each column
177 @Table = ();
178 @columnsWidth = ();
179 @numericals_by_col = ();
180 @non_numericals_by_col = ();
182 while(<STDIN>)
184 chomp;
185 my @cells = split /\t/;
187 for my $idx (0 .. $#cells)
189 my $cell = $cells[$idx] || '';
190 my $width = length $cell; # TODO multibyte chars?
191 $columnsWidth[$idx] = $width if $columnsWidth[$idx] < $width;
193 # guess if this cell has numerical content (skip 1st line as it's likely a header)
194 if($. > 0 and not defined $columnsAlignment[$idx])
196 is_numeric($cell) ? ($numericals_by_col[$idx]++) : ($non_numericals_by_col[$idx]++);
200 push @Table, \@cells;
203 if(not @Table)
205 exit;
209 # compose the format string
211 if(not $OptMargins)
213 $verticalBorder = '';
214 $topleftCorner = '';
215 $toprightCorner = '';
216 $bottomleftCorner = '';
217 $bottomrightCorner = '';
218 $leftCross = '';
219 $rightCross = '';
222 for my $idx (0 .. $#columnsWidth)
224 if(not defined $columnsAlignment[$idx])
226 if($numericals_by_col[$idx] >= $non_numericals_by_col[$idx])
228 $columnAlignment[$idx] = ALIGN_RIGHT;
230 else
232 $columnAlignment[$idx] = ALIGN_LEFT;
236 $full_table_width += $idx == 0 ? length($verticalBorder) : length($verticalBar);
237 $full_table_width += $columnsWidth[$idx] + 2*$OptPadding;
240 $full_table_width += length($verticalBorder);
241 $n_cols = scalar @columnsWidth;
242 @verticalBars = ($verticalBar) x ($n_cols-1);
244 if($OptHorizontal)
246 $gridlineTop = $topleftCorner . join($topCross, map {$horizontalBar x ($_+2*$OptPadding)} @columnsWidth) . $toprightCorner;
247 $gridlineInner = $leftCross . join($middleCross, map {$horizontalBar x ($_+2*$OptPadding)} @columnsWidth) . $rightCross;
248 $gridlineBottom = $bottomleftCorner . join($bottomCross, map {$horizontalBar x ($_+2*$OptPadding)} @columnsWidth) . $bottomrightCorner;
250 $line_format = '%s'.
251 join('%s',
252 map {(' 'x$OptPadding).'%'.$_.'s'.(' 'x$OptPadding)}
253 map {($columnAlignment[$_] == ALIGN_RIGHT ? 1 : -1) * $columnsWidth[$_]} 0 .. $#columnsWidth
254 ).'%s';
257 sub print_table
259 my $fh = shift;
260 my $row_num = 0;
261 for my $row (@Table)
263 $row_num++;
264 if($OptHorizontal)
266 print {$fh} ($row_num == 1 ? $gridlineTop : $gridlineInner) . "\n";
269 my @cells = map {$row->[$_] || ''} 0 .. $#columnsWidth;
270 my @cells_and_inner_borders = zip @cells, @verticalBars;
271 delete $cells_and_inner_borders[-1];
272 printf {$fh} $line_format."\n", $verticalBorder, @cells_and_inner_borders, $verticalBorder;
275 if($OptHorizontal)
277 print {$fh} $gridlineBottom . "\n";
282 # display the rendered table
284 if(-t 1 and $ENV{PAGER})
286 my ($terminal_cols, $terminal_rows) = Term::Size::chars(*STDOUT);
287 if($terminal_cols <= $full_table_width)
289 my ($p_read, $p_write);
290 pipe($p_read, $p_write) or die "$0: pipe: $!\n";
291 my $pid = fork;
292 die "$0: fork: $!\n" if not defined $pid;
293 if($pid == 0)
295 close $p_read;
296 print_table($p_write);
297 close $p_write;
298 exit 0;
300 open \*STDIN, '<&', $p_read;
301 close $p_write;
302 exec {$ENV{PAGER}} [$ENV{PAGER}] or warn "$0: exec: $!\n";
303 exit 127;
307 print_table(\*STDOUT);