7 td-collapse - Collapse multiple tabular data records with equivalent keys into one.
11 td-collapse [I<OPTIONS>]
15 It goes row-by-row on a sorted tabular data stream
16 and if 2 or more subsequent rows' first (key) cell are
17 the same then collapse them into one row.
18 This is done by joining corresponding cells' data from each row into one
19 cell, effectively keeping every column's data in the same column.
21 If you want to group by an other column, not the first one, then first
22 reorder the columns by td-select(1). Eg. C<td-select KEYCOLUMN +REST>.
28 =item -g, --glue I<STR>
30 Delimiter character or string between joined cell data.
33 =item -u, --distribute-unique-field I<FIELD>
35 Take the I<FIELD> column's cells from the first collapsed group,
36 and multiplicate all other columns as many times as many rows are in this group,
37 in a way that each cell goes under a new column corresponding to that cell's original row.
38 I<FIELD> field's cells need to be unique within each groups.
40 If an unexpected value found during processing the 2nd row group and onwards,
41 ie. a value which was not there in the first group,
42 it won't be distibuted into the new column, since the header is already sent,
43 but left in the original column just like B<-u> option would not be in effect.
44 See "pause" and "resume" in the example below.
48 ID | EVENT | TIME | STATUS
52 16 | end | 11:06 | err
58 COUNT | ID | EVENT | TIME | TIME_start | TIME_end | STATUS | STATUS_start | STATUS_end
59 2 | 15 | | | 10:00 | 10:05 | | | ok
60 4 | 16 | pause resume | 11:04 11:05 | 11:00 | 11:06 | | | err
62 =item -s, --distributed-column-name-separator I<STR>
64 When generating new columns as described at B<-u> option,
65 join the original column name with each of the unique field's values
67 See example at B<-u> option description.
68 Default is underscore C<_>.
74 This pipeline shows which users are using each of the configured default
75 shells, grouped by shell path.
77 # get the list of users
80 # transform into tabular data stream
82 td-add-headers USER X UID GID GECOS HOME SHELL |\
84 # put the shell in the first column, and sort, then collapse
85 td-select SHELL USER | td-keepheader sort | td-collapse -g ' ' |\
87 # change header name "USER" to "USERS"
88 td-alter USERS=USER | td-select +ALL -USER
92 | COUNT | SHELL | USERS |
93 | 4 | /bin/bash | user1 user2 nova root |
94 | 5 | /bin/false | fetchmail hplip sddm speech-dispatcher sstpc |
95 | 1 | /bin/sync | sync |
96 | 1 | /sbin/rebootlogon | reboot |
97 | 6 | /usr/sbin/nologin | _apt avahi avahi-autoipd backup bin daemon |
101 Have to sort input data first.
103 Group key is always the first input column.
105 If a row in the input data has more cells than the number of columns, those are ignored.
109 td-expand(1) is a kind of an inverse to td-collapse(1).
113 td-collapse(1) roughly translates to SELECT COUNT(*) + GROUP_CONCAT() + GROUP BY in SQL.
119 $OptDistUniqueField = undef;
120 $OptDistColumnNameSeparator = "_";
122 'g|glue=s' => \
$OptGlue,
123 'u|distribute-unique-field=s' => \
$OptDistUniqueField,
124 's|distributed-column-name-separator=s' => \
$OptDistColumnNameSeparator,
128 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
129 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
131 process_header
(scalar <STDIN
>);
132 $last_input_column_idx = $#Header;
134 if(defined $OptDistUniqueField)
136 die "$0: no such column: $OptDistUniqueField\n" if not exists $Header{$OptDistUniqueField};
137 $uniq_field_idx = $Header{$OptDistUniqueField};
143 if($group_count == 0)
145 if($OptDistUniqueField)
147 for my $colidx (1..$#Header)
149 next if $colidx == $uniq_field_idx;
150 @distributed_cells = @
{$group_members[$uniq_field_idx]};
151 for my $uniq_cell (@distributed_cells)
153 my $distrib_colname = $Header[$colidx] . $OptDistColumnNameSeparator . $uniq_cell;
154 push @Header, $distrib_colname;
155 $Header{$distrib_colname} = $#Header;
159 print join($FS, "COUNT", @Header).$RS;
162 if($OptDistUniqueField)
164 for my $uniq_cell_idx (0..$#{$group_members[$uniq_field_idx]})
166 my $uniq_cell = $group_members[$uniq_field_idx]->[$uniq_cell_idx];
167 next if not $uniq_cell ~~ @distributed_cells;
168 for my $colidx (1..$last_input_column_idx)
170 my $colname = $Header[$colidx];
171 my $distrib_colname = $colname . $OptDistColumnNameSeparator . $uniq_cell;
172 $group_members[$Header{$distrib_colname}]->[0] = $group_members[$colidx]->[$uniq_cell_idx];
173 $group_members[$colidx]->[$uniq_cell_idx] = undef;
175 $group_members[$uniq_field_idx]->[$uniq_cell_idx] = undef;
177 for my $colidx (1..$last_input_column_idx)
179 @
{$group_members[$colidx]} = grep {defined} @
{$group_members[$colidx]};
181 @
{$group_members[$uniq_field_idx]} = grep {defined} @
{$group_members[$uniq_field_idx]};
184 print $collapsed_rows . $FS . $prev_group_key . $FS . join($FS, map { join $OptGlue, @
$_ } @group_members[1..$#group_members]) . $RS;
191 $prev_group_key = undef;
195 @distributed_cells = ();
200 my @input_row = read_record
(\
*STDIN
);
202 $group_key = $input_row[0];
204 if(defined $prev_group_key and $group_key ne $prev_group_key)
209 for my $cell_idx (1..$last_input_column_idx)
211 my $cell = $input_row[$cell_idx];
212 $cell = '' unless defined $cell;
213 push @
{$group_members[$cell_idx]}, $cell;
217 $prev_group_key = $group_key;
220 END { commit_group
; }