add unicode-aware expand command
[hband-tools.git] / tabdata / mrkv2td
blobffad332b0947d3f48511cf3ba7a402eef5a88036
1 #!/usr/bin/env perl
3 =pod
5 =head1 NAME
7 mrkv2td - Transform multi-record key-value (MRKV) stream to tabular data format.
9 =head1 DESCRIPTION
11 As tabular data format presents field names at the start of transmission,
12 mrkv2td(1) infers them only from the first record,
13 so no need to buffer the whole dataset to find all fields,
14 and it's usual for all records to have all fields anyways.
16 =head1 OPTIONS
18 =over 4
20 =item -s, --separator I<REGEXP>
22 Regexp which separates field name from cell data in MRKV stream.
23 Default is TAB (C<\t>).
25 =item -g, --multiline-glue I<STRING>
27 =back
29 =head1 SEE ALSO
31 td2mrkv(1)
33 =cut
35 $OptSeparatorRegexp = "\t";
36 $OptMultilineGlue = "\n";
37 %OptionDefs = (
38 's|separator=s' => \$OptSeparatorRegexp,
39 'g|multiline-glue=s' => \$OptMultilineGlue,
42 use Data::Dumper;
43 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
44 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
47 sub flush_record
49 my $colnum = shift;
50 my $record = shift;
52 if(not $header_sent)
54 print join($FS, map {escape_tabdata($_)} sort { $colnum->{$a} <=> $colnum->{$b} } keys %$colnum).$RS;
55 $header_sent = 1;
58 print join($FS, map { escape_tabdata($record->{$_}) } sort { $colnum->{$a} <=> $colnum->{$b} } keys %$record).$RS;
61 @Headers = ();
62 %ColumnNumber = ();
63 %record = ();
64 $record_num = 0;
65 $header_sent = 0;
67 while(my $line = <STDIN>)
69 chomp $line;
70 if($line eq '')
72 if(%record)
74 flush_record(\%ColumnNumber, \%record);
75 %record = ();
76 $record_num++;
79 else
81 my ($field, $cell) = split /$OptSeparatorRegexp/, $line, 2;
82 if(exists $record{$field})
84 $record{$field} .= $OptMultilineGlue . $cell;
86 else
88 $record{$field} = $cell;
91 if($record_num == 0)
93 push @Headers, $field;
94 $ColumnNumber{$field} = $#Headers;
99 if(%record)
101 flush_record(\%ColumnNumber, \%record);