7 td-collapse - Collapse multiple tabular data records with equivalent keys into one.
11 td-collapse [I<OPTIONS>]
15 It goes row-by-row on a sorted tabular data stream
16 and if 2 or more subsequent rows' first (key) cell are
17 the same then collapse them into one row.
18 This is done by joining corresponding cells' data from each row into one
19 cell, effectively keeping every column's data in the same column.
21 If you want to group by an other column, not the first one, then first
22 reorder the columns by td-select(1). Eg. C<td-select KEYCOLUMN +REST>.
28 =item -g, --glue I<STR>
30 Delimiter character or string between joined cell data.
33 =item -u, --distribute-unique-field I<FIELD>
35 Take the I<FIELD> column's cells from the first collapsed group,
36 and multiplicate all other columns as many times as many rows are in this group,
37 in a way that each cell goes under a new column corresponding to that cell's original row.
38 I<FIELD> field's cells need to be unique within each groups.
40 If an unexpected value found during processing the 2nd row group and onwards,
41 ie. a value which was not there in the first group,
42 it won't be distibuted into the new column, since the header is already sent,
43 but left in the original column just like B<-u> option would not be in effect.
44 See "pause" and "resume" in the example below.
48 ID | EVENT | TIME | STATUS
52 16 | end | 11:06 | err
56 td-collapse -u EVENT -z
58 COUNT | ID | EVENT | TIME | TIME_start | TIME_end | STATUS | STATUS_start | STATUS_end
59 2 | 15 | | | 10:00 | 10:05 | | | ok
60 4 | 16 | pause resume | 11:04 11:05 | 11:00 | 11:06 | | | err
62 =item -s, --distributed-column-name-separator I<STR>
64 When generating new columns as described at B<-u> option,
65 join the original column name with each of the unique field's values
67 See example at B<-u> option description.
68 Default is underscore C<_>.
70 =item -k, --keep-equivalent-cells-united
72 Don't repeat the original cells' content
73 in the collapsed cell if all of the original cell are the same.
75 =item -z, --empty-distributed-cells
77 Clear cells of which data moved to other columns by B<-u> option.
83 This pipeline shows which users are using each of the configured default
84 shells, grouped by shell path.
86 # get the list of users
89 # transform into tabular data stream
91 td-add-headers USER X UID GID GECOS HOME SHELL |\
93 # put the shell in the first column, and sort, then collapse
94 td-select SHELL USER | td-keepheader sort | td-collapse -g ' ' |\
96 # change header name "USER" to "USERS"
97 td-alter USERS=USER | td-select +ALL -USER
101 | COUNT | SHELL | USERS |
102 | 4 | /bin/bash | user1 user2 nova root |
103 | 5 | /bin/false | fetchmail hplip sddm speech-dispatcher sstpc |
104 | 1 | /bin/sync | sync |
105 | 1 | /sbin/rebootlogon | reboot |
106 | 6 | /usr/sbin/nologin | _apt avahi avahi-autoipd backup bin daemon |
110 Have to sort input data first.
112 Group key is always the first input column.
114 If a row in the input data has more cells than the number of columns, those are ignored.
118 td-expand(1) is a kind of an inverse to td-collapse(1).
122 td-collapse(1) roughly translates to SELECT COUNT(*) + GROUP_CONCAT() + GROUP BY in SQL.
128 $OptDistUniqueField = undef;
129 $OptDistColumnNameSeparator = "_";
130 $OptKeepSameCells = 0;
131 $OptEmptyDistCells = 0;
133 'g|glue=s' => \
$OptGlue,
134 'u|distribute-unique-field=s' => \
$OptDistUniqueField,
135 's|distributed-column-name-separator=s' => \
$OptDistColumnNameSeparator,
136 'k|keep-equivalent-cells-united!' => \
$OptKeepSameCells,
137 'z|empty-distributed-cells!' => \
$OptEmptyDistCells,
141 use List
::MoreUtils qw
/all/;
142 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
143 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
145 process_header
(scalar <STDIN
>);
146 $last_input_column_idx = $#Header;
148 if(defined $OptDistUniqueField)
150 die "$0: no such column: $OptDistUniqueField\n" if not exists $Header{$OptDistUniqueField};
151 $uniq_field_idx = $Header{$OptDistUniqueField};
155 sub make_collapsed_cell
157 my $first_input_cell = $_[0];
158 if($OptKeepSameCells and all
{$_ eq $first_input_cell} @_)
160 return $first_input_cell;
162 return join $OptGlue, @_;
167 if($group_count == 0)
169 if($OptDistUniqueField)
171 for my $colidx (1..$#Header)
173 next if $colidx == $uniq_field_idx;
174 @distributed_cells = @
{$group_members[$uniq_field_idx]};
175 for my $uniq_cell (@distributed_cells)
177 my $distrib_colname = $Header[$colidx] . $OptDistColumnNameSeparator . $uniq_cell;
178 push @Header, $distrib_colname;
179 $Header{$distrib_colname} = $#Header;
183 print join($FS, "COUNT", @Header).$RS;
186 if($OptDistUniqueField)
188 for my $uniq_cell_idx (0..$#{$group_members[$uniq_field_idx]})
190 my $uniq_cell = $group_members[$uniq_field_idx]->[$uniq_cell_idx];
191 next if not $uniq_cell ~~ @distributed_cells;
192 for my $colidx (1..$last_input_column_idx)
194 my $colname = $Header[$colidx];
195 my $distrib_colname = $colname . $OptDistColumnNameSeparator . $uniq_cell;
196 $group_members[$Header{$distrib_colname}]->[0] = $group_members[$colidx]->[$uniq_cell_idx];
197 $group_members[$colidx]->[$uniq_cell_idx] = undef if $OptEmptyDistCells;
199 $group_members[$uniq_field_idx]->[$uniq_cell_idx] = undef if $OptEmptyDistCells;
201 for my $colidx (1..$last_input_column_idx)
203 @
{$group_members[$colidx]} = grep {defined} @
{$group_members[$colidx]};
205 @
{$group_members[$uniq_field_idx]} = grep {defined} @
{$group_members[$uniq_field_idx]};
208 print $collapsed_rows . $FS . $prev_group_key . $FS . join($FS, map {make_collapsed_cell
(@
$_)} @group_members[1..$#group_members]) . $RS;
215 $prev_group_key = undef;
219 @distributed_cells = ();
224 my @input_row = read_record
(\
*STDIN
);
226 $group_key = $input_row[0];
228 if(defined $prev_group_key and $group_key ne $prev_group_key)
233 for my $cell_idx (1..$last_input_column_idx)
235 my $cell = $input_row[$cell_idx];
236 $cell = '' unless defined $cell;
237 push @
{$group_members[$cell_idx]}, $cell;
241 $prev_group_key = $group_key;
244 END { commit_group
; }