7 td-filter - Show only those records from the input tabular data stream which match to the conditions.
11 td-filter [I<OPTIONS>] [--] I<COLUMN> I<OPERATOR> I<R-VALUE> [[or] I<COLUMN> I<OPERATOR> I<R-VALUE> [[or] ...]]
13 td-filter [I<OPTIONS>] --perl I<EXPR>
17 Pass through those records which match at least one of the conditions (inclusive OR).
18 A condition consists of a triplet of I<COLUMN>, I<OPERATOR>, and I<R-VALUE>.
19 You may put together conditions conjunctively (AND) by chaining multiple td-filter(1) commands by shell pipes.
22 td-filter NAME eq john NAME eq jacob | tr-filter AGE -gt 18
24 This gives the records with either john or jacob, and all of them will be above 18.
26 The optional word "B<or>" between triplets makes your code more explicite.
28 td-filter(1) evaluates the Perl expression in the second form and passes through records
29 only if the result is true-ish in Perl (non zero, non empty string, etc).
30 Each field's value is in @F by index, and in %F by column name.
31 You can implement more complex conditions in this way.
43 show headers (default)
45 =item -i, --ignore-non-existing-columns
47 do not treat non-existing (missing or typo) column names as failure
49 =item -w, --warn-non-existing-columns
51 only show warning on non-existing (missing or typo) column names, but don't fail
53 =item -N, --no-fail-non-numeric
55 do not fail when a non-numeric r-value is given to a numeric operator
57 =item -W, --no-warn-non-numeric
59 do not show warning when a non-numeric r-value is given to a numeric operator
65 These operators are supported, semantics are the same as in Perl, see perlop(1).
67 == != <= >= < > =~ !~ eq ne gt lt
69 For your convenience, not to bother with escaping, you may also use these operators as alternatives to the canonical ones above:
75 =item = I<(single equal sign)>
77 string equality (B<eq>)
81 string inequality (B<ne>)
85 numeric equality (B<==>)
89 numeric inequality (B<!=>)
93 numeric inequality (B<!=>)
97 numeric greater than (B<E<gt>>)
101 numeric greater or equal (B<E<gt>=>)
105 numeric less than (B<E<lt>>)
109 numeric less or equal (B<E<lt>=>)
123 negated regexp match (B<!~>)
131 =item is [not] one of
133 =item is [not] any of
135 I<R-VALUE> is split into pieces by commas (C<,>) and
136 equality to at least one of them is required.
137 Equality to none of them is required if the operator is negated.
139 =item contains [whole word]
142 Plural form "contain" is also accepted.
143 Optional I<whole word> is a literal part of the operator.
145 =item contains [one | any] [whole word] of
147 Similar to B<is one of>, but substring match is checked
148 instead of full string equality.
149 Plural form "contain" is also accepted.
150 Optional I<whole word> is a literal part of the operator.
156 Plural forms are also accepted.
160 Operators may be preceeded by I<not>, I<does not>, I<do not> to negate their effect.
164 If there is no I<COLUMN> column in the input data, it's silently considered empty.
165 td-filter(1) does not need I<R-VALUE> to be quoted or escaped, however your shell may do.
169 td-filter(1) is analogous to SQL WHERE.
175 $OptWarnBadColumnNames = 1;
176 $OptFailBadColumnNames = 1;
177 $OptWarnNonNumericRValue = 1;
178 $OptFailNonNumericRValue = 1;
180 'H|no-header' => sub { $OptShowHeader = 0; },
181 'h|header' => sub { $OptShowHeader = 1; },
182 'i|ignore-non-existing-columns' => sub { $OptFailBadColumnNames = 0; $OptWarnBadColumnNames = 0; },
183 'w|warn-non-existing-columns' => sub { $OptFailBadColumnNames = 0; $OptWarnBadColumnNames = 1; },
184 'N|no-fail-non-numeric' => sub { $OptFailNonNumericRValue = 0; },
185 'W|no-warn-non-numeric' => sub { $OptWarnNonNumericRValue = 0; },
189 use List
::MoreUtils qw
/all any none uniq/;
191 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
192 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
194 use Math
::Cartesian
::Product
;
197 sub subst_alternate_forms
200 my $altforms = shift;
201 my @product_terms = ();
202 for my $word (split /\s+/, $words)
204 push @product_terms, ($word ~~ [keys %$altforms]) ?
$altforms->{$word} : [$word];
207 cartesian
{push @results, join ' ', @_} @product_terms;
213 my %alternate_form = (
214 'ANY' => [qw
/one any/],
215 'MATCH' => [qw
/match matches/],
216 'CONTAIN' => [qw
/contain contains/],
217 'DOES' => [qw
/do does/],
218 'START' => [qw
/start starts/],
219 'END' => [qw
/end ends/],
222 { phrase
=> 'MATCH', operator
=> '=~'},
223 { phrase
=> 'not MATCH', operator
=> '!~'},
224 { phrase
=> 'DOES not match', operator
=> '!~'},
226 { phrase
=> 'is not ANY of', operator
=> 'is-one-of', negation
=> 1},
227 { phrase
=> 'is ANY of', operator
=> 'is-one-of'},
228 { phrase
=> 'is not', operator
=> 'ne'},
229 { phrase
=> 'is', operator
=> 'eq'},
231 { phrase
=> '=', operator
=> 'eq'},
232 { phrase
=> '-eq', operator
=> '=='},
233 { phrase
=> '-ne', operator
=> '!='},
234 { phrase
=> '<>', operator
=> '!='},
235 { phrase
=> '-gt', operator
=> '>'},
236 { phrase
=> '-ge', operator
=> '>='},
237 { phrase
=> '-lt', operator
=> '<'},
238 { phrase
=> '-le', operator
=> '<='},
240 { phrase
=> 'CONTAIN whole word', operator
=> 'contains', wholeword
=> 1},
241 { phrase
=> 'CONTAIN ANY whole word of', operator
=> 'contains-one-of', wholeword
=> 1},
242 { phrase
=> 'CONTAIN ANY of', operator
=> 'contains-one-of'},
243 { phrase
=> 'CONTAIN', operator
=> 'contains'},
244 { phrase
=> 'not CONTAIN whole word', operator
=> 'contains', negation
=> 1, wholeword
=> 1},
245 { phrase
=> 'not CONTAIN ANY whole word of', operator
=> 'contains-one-of', negation
=> 1, wholeword
=> 1},
246 { phrase
=> 'not CONTAIN ANY of', operator
=> 'contains-one-of', negation
=> 1},
247 { phrase
=> 'not CONTAIN', operator
=> 'contains', negation
=> 1},
248 { phrase
=> 'DOES not contain ANY whole word of', operator
=> 'contains-one-of', negation
=> 1, wholeword
=> 1},
249 { phrase
=> 'DOES not contain whole word', operator
=> 'contains', negation
=> 1, wholeword
=> 1},
250 { phrase
=> 'DOES not contain ANY of', operator
=> 'contains-one-of', negation
=> 1},
251 { phrase
=> 'DOES not contain', operator
=> 'contains', negation
=> 1},
253 { phrase
=> 'START with', operator
=> 'starts-with'},
254 { phrase
=> 'not START with', operator
=> 'starts-with', negation
=> 1},
255 { phrase
=> 'DOES not start with', operator
=> 'starts-with', negation
=> 1},
257 { phrase
=> 'END with', operator
=> 'ends-with'},
258 { phrase
=> 'not END with', operator
=> 'ends-with', negation
=> 1},
259 { phrase
=> 'DOES not end with', operator
=> 'ends-with', negation
=> 1},
261 my @canonical_operators = uniq
map {$_->{operator
}} @opdef;
262 for my $opdef (@opdef)
264 my $phrase = $opdef->{phrase
};
265 my $nwords = scalar split /\s+/, $phrase;
266 my @phrases = subst_alternate_forms
($phrase, \
%alternate_form);
267 #warn Dumper \@phrases;
268 for my $possible_operator (@phrases)
270 my $argstr = join ' ', @args[0..($nwords-1)];
271 if($argstr eq $possible_operator)
273 my %result = %$opdef;
274 $result{remaining_args
} = [@args[$nwords..$#args]];
279 if($args[0] ~~ [@canonical_operators])
281 return {operator
=> $args[0], remaining_args
=> [@args[1..$#args]]};
283 die "valid operator expected; found this: @args";
292 if($ARGV[0] eq '--perl')
295 die "$0: missing Perl expression\n" unless @ARGV;
296 $Expr = join ';', @ARGV;
302 shift @ARGV if $ARGV[0] eq 'or';
304 my $column = shift @ARGV;
306 my $complex_operator = parse_operator
(@ARGV);
307 #warn Dumper $complex_operator;
308 my $operator = $complex_operator->{operator
};
309 my $negation = $complex_operator->{negation
};
310 my $wholeword = $complex_operator->{wholeword
};
311 @ARGV = @
{$complex_operator->{remaining_args
}};
313 my $rvalue = shift @ARGV;
314 die "$0: missing r-value for: $column $operator\n" if not defined $rvalue;
316 if($operator ~~ [qw
/== != <= >= < >/] and $rvalue !~ /^[+-]?[0-9]\d*(.\d+|)(E[+-]?\d+|)$/i)
318 warn "$0: operator ($operator) is numeric but r-value ($rvalue) is not\n" if $OptWarnNonNumericRValue;
319 exit -1 if $OptFailNonNumericRValue;
322 push @Filter, {'field'=>$column, 'operator'=>$operator, 'negation'=>$negation, 'value'=>$rvalue, 'wholeword'=>$wholeword};
326 process_header
(scalar <STDIN
>);
328 if(not defined $Expr)
330 for my $filter (@Filter)
332 my $colname = $filter->{'field'};
333 if(not exists $Header{$colname})
335 my $cols = join ", ", @Header;
336 warn "$0: $colname: no such column. known columns: $cols\n" if $OptWarnBadColumnNames;
337 exit 3 if $OptFailBadColumnNames;
342 if($OptShowHeader and length $HeaderLine)
344 print $HeaderLine.$RS;
349 my @Field = read_record
(\
*STDIN
);
354 my %F = map {$_=>$Field[$Header{$_}]} keys %Header;
362 for my $filter (@Filter)
364 my $left_data = exists $Header{$filter->{'field'}} ?
$Field[$Header{$filter->{'field'}}] : '';
365 my $right_data = $filter->{'value'};
366 my $operator = $filter->{'operator'};
369 if($filter->{'wholeword'})
371 # have to match "whole word" - simple substring match won't be enough.
372 # transform "contains" and "contains-one-of" operators into a regexp match.
374 if($operator eq 'contains-one-of')
376 $right_data = '\b(' . (join '|', map { quotemeta($_) } split /,/, $right_data) . ')\b';
378 elsif($operator eq 'contains')
380 $right_data = '\b' . quotemeta($right_data) . '\b';
382 else { die "$0: should not happen\n"; }
390 $result = 1 + index $left_data, $right_data;
394 $result = any
{ $left_data eq $_ } split /,/, $right_data;
396 case
'contains-one-of'
398 $result = any
{ 1 + index $left_data, $_ } split /,/, $right_data;
402 $result = $right_data eq substr($left_data, 0, length $right_data);
406 $result = $right_data eq substr($left_data, -length $right_data);
410 my $left_code = '$left_data';
411 my $right_code = '$right_data';
412 $right_code = "/$right_code/" if $operator ~~ ['=~', '!~'];
413 $result = eval "$left_code $operator $right_code";
417 $result = !$result if $filter->{'negation'};
429 print join($FS, @Field).$RS;