bugfix
[hband-tools.git] / tabdata / td-alter
blob209e442d69d185ab36644e33ecc708ae0e546bb6
1 #!/usr/bin/env perl
3 =pod
5 =head1 NAME
7 td-alter - Add new columns and fields to tabular data stream, and modify value of existing fields.
9 =head1 USAGE
11 td-alter I<COLUMN>=I<EXPR> [I<COLUMN>=I<EXPR> [I<COLUMN>=I<EXPR> [...]]]
13 =head1 DESCRIPTION
15 On each data row, sets field in I<COLUMN> to the value resulted by I<EXPR>
16 Perl expression.
18 In I<EXPR>, you may refer to other columns by C<$F{NAME}> where I<NAME> is the column name;
19 or by C<$F[INDEX]> where I<INDEX> is the 0-based column index number;
20 or in case of uppercase alpha-numeric field names, simply by bareword C<COLUMN>
21 (because these are set up as subroutines internally).
22 Topic variable (C<$_>) is set to the current value of I<COLUMN>.
24 You can create new columns simply by referring to a I<COLUMN> name that does not yet exist.
25 You can refer to an earlier defined I<COLUMN> in subsequent I<EXPR> expressions.
27 =head1 EXAMPLES
29 Add new columns: TYPE and IS_BIGFILE.
30 IS_BIGFILE depends on previously defined TYPE field.
32 ls -l | td-trans-ls | td-alter TYPE='substr MODE,0,1' IS_BIGFILE='SIZE>10000000 && TYPE ne "d" ? "yes" : "no"'
34 Strip sub-seconds and timezone from DATETIME field.
36 TIME_STYLE=full-iso ls -l | td-trans-ls | td-alter DATETIME='s/\..*//; $_'
38 =head1 OPTIONS
40 =over 4
42 =item -H, --no--header
44 do not show headers
46 =item -h, --header
48 show headers (default)
50 =back
52 =head1 REFERENCES
54 "Alter" in td-alter comes from SQL.
55 td-alter(1) can change the "table" column layout.
56 But contrary to SQL's ALTER TABLE, td-alter(1) can modify the records too, so akin to SQL UPDATE as well.
58 =cut
60 $OptShowHeader = 1;
61 %OptionDefs = (
62 'H|no-header' => sub { $OptShowHeader = 0; },
63 'h|header' => sub { $OptShowHeader = 1; },
66 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
67 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
69 process_header(scalar <STDIN>);
71 @Derives = ();
73 while(scalar @ARGV)
75 if($ARGV[0] =~ /^([^=]+)=(.*)/)
77 my $column = $1;
78 my $expr = $2;
80 if(not exists $Header{$column})
82 push @Header, $column;
83 $Header{$column} = $#Header;
86 push @Derives, {'column'=>$column, 'expr'=>$expr};
88 else
90 pod2usage(-exitval=>2, -verbose=>99, -msg=>"$0: unknown parameter: $ARGV[0]");
92 shift @ARGV;
95 if($OptShowHeader and @Header)
97 print join($FS, @Header).$RS;
100 while($line = <STDIN>)
102 chomp $line;
104 @F = split $FS, $line;
105 %F = map {$_=>$F[$Header{$_}]} keys %Header;
107 for my $d (@Derives)
109 my $cidx = $Header{$d->{'column'}};
110 my $expr_prerun = '';
111 for my $col (grep {/^[A-Z_][A-Z0-9_]*$/} @Header)
113 # define all (uppercase alpha-numeric only) column name as a subroutine
114 # which return the given field's value.
115 $expr_prerun .= sprintf('sub %s { $F{"%s"} };', $col, $col);
117 $expr_prerun .= '$_ = $F[$cidx];';
118 my $expr = $expr_prerun . $d->{'expr'};
120 $F[$cidx] = $F{$d->{'column'}} = eval $expr;
121 warn $@ if $@;
124 # undefined is empty string in tab-data format.
125 for my $idx (0..$#F)
127 $F[$idx] = '' if not defined $F[$idx];
130 print join($FS, @F).$RS;