WIP: spoolprocess
[hband-tools.git] / tabdata / kvpairs2td
blob0c0fd312a8308096b91c23176944691ebd0645ae
1 #!/usr/bin/env perl
3 =pod
5 =head1 NAME
7 kvpairs2td - Transform lines of key-value pairs to tabular data stream
9 =head1 OPTIONS
11 =over 4
13 =item -i, --ignore-non-existing-columns
15 Do not fail when encounters a new field after the first record.
17 =item -w, --warn-non-existing-columns
19 =item -c, --column I<COLUMN>
21 Indicate that there will be a column by the name I<COLUMN>.
22 This is useful if the first record does not have I<COLUMN>.
23 This option is repeatable.
25 =item -r, --restcolumn I<NAME>
27 Name of the column where the rest of the input line will be put
28 which is not part of key-value pairs.
29 Default is B<_REST>.
31 =item -u, --unknown-to-rest
33 Put unknown (non-existing) fields in the "rest" column
34 (see B<-r> option).
36 =back
38 =head1 SEE ALSO
40 td2mrkv(1), td2kvpairs(1)
42 =cut
45 sub parse_next_key_value
47 local $_ = shift;
48 # to support backslash-escaped quotes in quoted strings, we need to bookkeep what is the minimum length
49 # of the string which does not end with an odd number of backslashes, which would mean that the trailing
50 # quote is in fact an escaped quote and should be part of the string.
51 # regexp lookbehind does not work well when need to look behind to an arbitrary distance.
52 my %min = (key => 0, value => 0);
53 MATCH_NEXT_KEY_VALUE:
54 /(^|(?'spacebefore'\s*))(?'rawkey'(?'keyquote'[""''])(?'key'.{$min{key},}?)\g{keyquote}|(?'key'[^\s=]+))=(?'rawvalue'(?'valuequote'[""''])(?'value'.{$min{value},}?)\g{valuequote}|(?'value'\S*))((?'spaceafter'\s*)|$)/
55 or return ();
56 my %m = %+;
57 $m{_before} = $`;
58 $m{_after} = $';
59 #print Dumper [$key_min, $val_min, \%m];
60 for my $elem ('key', 'value')
62 # check if the key (value) is quoted and does not end with odd number of backslashes,
63 # because if so, then increase the minimum length that it should be,
64 # then try to match it again.
65 if($m{$elem.'quote'} and not $m{$elem} =~ /(?<!\\)(\\{2})*$/)
67 $min{$elem} = length($m{$elem})+1;
68 goto MATCH_NEXT_KEY_VALUE;
71 # return a hash of all named capture groups,
72 # plus the substring before and after the regexp match in the _before and _after keys respectively.
73 return %m;
77 $OptWarnBadColumnNames = 1;
78 $OptFailBadColumnNames = 1;
79 @OptPredefColumns = ();
80 $OptRestColumnName = "_REST";
81 $OptUnknownToRest = 0;
82 %OptionDefs = (
83 'i|ignore-non-existing-columns' => sub { $OptFailBadColumnNames = 0; $OptWarnBadColumnNames = 0; },
84 'w|warn-non-existing-columns' => sub { $OptFailBadColumnNames = 0; $OptWarnBadColumnNames = 1; },
85 'c|column=s@' => \@OptPredefColumns,
86 'r|restcolumn=s' => \$OptRestColumnName,
87 'u|unknown-to-rest' => \$OptUnknownToRest,
90 use Data::Dumper;
91 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
92 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
94 @Headers = ();
95 $RN = 0;
96 $rest_column_idx = undef;
98 while(my $Line = <STDIN>)
100 chomp $Line;
101 my @record = ();
102 my $Rest = '';
105 while(my %m = parse_next_key_value($Line))
107 $Rest .= $m{_before} . $m{'spacebefore'};
108 $Line = $m{_after};
109 my ($key, $value) = (kvpair_unescape($m{'rawkey'}), kvpair_unescape($m{'rawvalue'}));
110 my $spaceafter = $m{'spaceafter'};
112 if($RN == 0)
114 push @Headers, $key;
117 my $cell_set = 0;
118 my $colinstances = 0;
119 for my $colidx (0 .. $#Headers)
121 if($Headers[$colidx] eq $key)
123 $colinstances++;
124 if(not defined $record[$colidx])
126 $record[$colidx] = $value;
127 $cell_set = 1;
128 last;
133 if(not $cell_set)
135 $colinstances++;
136 warn "$0: column (instance $colinstances) is not initialized: $key\n" if $OptWarnBadColumnNames;
137 if($OptUnknownToRest)
139 $Rest .= kvpair_escape($key) . '=' . kvpair_escape($value) . $spaceafter;
141 else
143 exit 3 if $OptFailBadColumnNames;
148 if($RN == 0)
150 push @Headers, grep { not $_ ~~ @Headers } @OptPredefColumns, $OptRestColumnName;
151 $rest_column_idx = $#Headers;
152 print join($FS, map {escape_tabdata($_)} @Headers).$RS;
155 $record[$rest_column_idx] = $Rest . $Line;
157 print join($FS, map {escape_tabdata($_)} @record) . $RS;
159 $RN++;