7 kvpairs2td - Transform lines of key-value pairs to tabular data stream
13 =item -i, --ignore-non-existing-columns
15 Do not fail when encounters a new field after the first record.
17 =item -w, --warn-non-existing-columns
19 =item -c, --column I<COLUMN>
21 Indicate that there will be a column by the name I<COLUMN>.
22 This is useful if the first record does not have I<COLUMN>.
23 This option is repeatable.
25 =item -r, --restcolumn I<NAME>
27 Name of the column where the rest of the input line will be put
28 which is not part of key-value pairs.
31 =item -u, --unknown-to-rest
33 Put unknown (non-existing) fields in the "rest" column
40 td2mrkv(1), td2kvpairs(1)
45 sub parse_next_key_value
48 # to support backslash-escaped quotes in quoted strings, we need to bookkeep what is the minimum length
49 # of the string which does not end with an odd number of backslashes, which would mean that the trailing
50 # quote is in fact an escaped quote and should be part of the string.
51 # regexp lookbehind does not work well when need to look behind to an arbitrary distance.
52 my %min = (key
=> 0, value
=> 0);
54 /(^|(?'spacebefore'\s*))(?'rawkey'(?'keyquote'[""''])(?'key'.{$min{key},}?)\g{keyquote}|(?'key'[^\s=]+))=(?'rawvalue'(?'valuequote'[""''])(?'value'.{$min{value},}?)\g{valuequote}|(?'value'\S*))((?'spaceafter'\s*)|$)/
59 #print Dumper [$key_min, $val_min, \%m];
60 for my $elem ('key', 'value')
62 # check if the key (value) is quoted and does not end with odd number of backslashes,
63 # because if so, then increase the minimum length that it should be,
64 # then try to match it again.
65 if($m{$elem.'quote'} and not $m{$elem} =~ /(?<!\\)(\\{2})*$/)
67 $min{$elem} = length($m{$elem})+1;
68 goto MATCH_NEXT_KEY_VALUE;
71 # return a hash of all named capture groups,
72 # plus the substring before and after the regexp match in the _before and _after keys respectively.
77 $OptWarnBadColumnNames = 1;
78 $OptFailBadColumnNames = 1;
79 @OptPredefColumns = ();
80 $OptRestColumnName = "_REST";
81 $OptUnknownToRest = 0;
83 'i|ignore-non-existing-columns' => sub { $OptFailBadColumnNames = 0; $OptWarnBadColumnNames = 0; },
84 'w|warn-non-existing-columns' => sub { $OptFailBadColumnNames = 0; $OptWarnBadColumnNames = 1; },
85 'c|column=s@' => \@OptPredefColumns,
86 'r|restcolumn=s' => \$OptRestColumnName,
87 'u|unknown-to-rest' => \$OptUnknownToRest,
91 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
92 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
96 $rest_column_idx = undef;
98 while(my $Line = <STDIN>)
105 while(my %m = parse_next_key_value($Line))
107 $Rest .= $m{_before} . $m{'spacebefore'};
109 my ($key, $value) = (kvpair_unescape($m{'rawkey'}), kvpair_unescape($m{'rawvalue'}));
110 my $spaceafter = $m{'spaceafter'};
118 my $colinstances = 0;
119 for my $colidx (0 .. $#Headers)
121 if($Headers[$colidx] eq $key)
124 if(not defined $record[$colidx])
126 $record[$colidx] = $value;
136 warn "$0: column (instance $colinstances) is not initialized: $key\n" if $OptWarnBadColumnNames;
137 if($OptUnknownToRest)
139 $Rest .= kvpair_escape($key) . '=' . kvpair_escape($value) . $spaceafter;
143 exit 3 if $OptFailBadColumnNames;
150 push @Headers, grep { not $_ ~~ @Headers } @OptPredefColumns, $OptRestColumnName;
151 $rest_column_idx = $#Headers;
152 print join($FS, map {escape_tabdata($_)} @Headers).$RS;
155 $record[$rest_column_idx] = $Rest . $Line;
157 print join($FS, map {escape_tabdata($_)} @record) . $RS;