PMbwmon bandwidth monitor: can specify which interfaces to watch, also supporting...
[hband-tools.git] / tabdata / td-collapse
blob8a1cabf5a2efe61d9382150e15abbb968878cb70
1 #!/usr/bin/env perl
3 =pod
5 =head1 NAME
7 td-collapse - Collapse multiple tabular data records with equivalent keys into one.
9 =head1 SYNOPSIS
11 td-collapse [I<OPTIONS>]
13 =head1 DESCRIPTION
15 It goes row-by-row on a sorted tabular data stream
16 and if 2 or more subsequent rows' first (key) cell are
17 the same then collapse them into one row.
18 This is done by joining corresponding cells' data from each row into one
19 cell, effectively keeping every column's data in the same column.
21 If you want to group by an other column, not the first one, then first
22 reorder the columns by td-select(1). Eg. C<td-select KEYCOLUMN +REST>.
24 =head1 OPTIONS
26 =over 4
28 =item -g, --glue I<STR>
30 Delimiter character or string between joined cell data.
31 Default is space.
33 =item -u, --distribute-unique-field I<FIELD>
35 Take the I<FIELD> column's cells from the first collapsed group,
36 and multiplicate all other columns as many times as many rows are in this group,
37 in a way that each cell goes under a new column corresponding to that cell's original row.
38 I<FIELD> field's cells need to be unique within each groups.
40 If an unexpected value found during processing the 2nd row group and onwards,
41 ie. a value which was not there in the first group,
42 it won't be distibuted into the new column, since the header is already sent,
43 but left in the original column just like B<-u> option would not be in effect.
44 See "pause" and "resume" in the example below.
46 B<Example>:
48 ID | EVENT | TIME | STATUS
49 15 | start | 10:00 |
50 15 | end | 10:05 | ok
51 16 | start | 11:00 |
52 16 | end | 11:06 | err
53 16 | pause | 11:04 |
54 16 | resume | 11:05 |
56 td-collapse -u EVENT
58 COUNT | ID | EVENT | TIME | TIME_start | TIME_end | STATUS | STATUS_start | STATUS_end
59 2 | 15 | | | 10:00 | 10:05 | | | ok
60 4 | 16 | pause resume | 11:04 11:05 | 11:00 | 11:06 | | | err
62 =item -s, --distributed-column-name-separator I<STR>
64 When generating new columns as described at B<-u> option,
65 join the original column name with each of the unique field's values
66 by I<STR> string.
67 See example at B<-u> option description.
68 Default is underscore C<_>.
70 =back
72 =head1 EXAMPLES
74 This pipeline shows which users are using each of the configured default
75 shells, grouped by shell path.
77 # get the list of users
78 getent passwd |\
80 # transform into tabular data stream
81 tr : "\t" |\
82 td-add-headers USER X UID GID GECOS HOME SHELL |\
84 # put the shell in the first column, and sort, then collapse
85 td-select SHELL USER | td-keepheader sort | td-collapse -g ' ' |\
87 # change header name "USER" to "USERS"
88 td-alter USERS=USER | td-select +ALL -USER
90 B<Output>:
92 | COUNT | SHELL | USERS |
93 | 4 | /bin/bash | user1 user2 nova root |
94 | 5 | /bin/false | fetchmail hplip sddm speech-dispatcher sstpc |
95 | 1 | /bin/sync | sync |
96 | 1 | /sbin/rebootlogon | reboot |
97 | 6 | /usr/sbin/nologin | _apt avahi avahi-autoipd backup bin daemon |
99 =head1 CAVEATS
101 Have to sort input data first.
103 Group key is always the first input column.
105 If a row in the input data has more cells than the number of columns, those are ignored.
107 =head1 SEE ALSO
109 td-expand(1) is a kind of an inverse to td-collapse(1).
111 =head1 REFERENCES
113 td-collapse(1) roughly translates to SELECT COUNT(*) + GROUP_CONCAT() + GROUP BY in SQL.
115 =cut
118 $OptGlue = " ";
119 $OptDistUniqueField = undef;
120 $OptDistColumnNameSeparator = "_";
121 %OptionDefs = (
122 'g|glue=s' => \$OptGlue,
123 'u|distribute-unique-field=s' => \$OptDistUniqueField,
124 's|distributed-column-name-separator=s' => \$OptDistColumnNameSeparator,
127 use Data::Dumper;
128 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
129 do '/usr/lib/tool/perl5/tabdata/common.pl' or die "$@";
131 process_header(scalar <STDIN>);
132 $last_input_column_idx = $#Header;
134 if(defined $OptDistUniqueField)
136 die "$0: no such column: $OptDistUniqueField\n" if not exists $Header{$OptDistUniqueField};
137 $uniq_field_idx = $Header{$OptDistUniqueField};
141 sub commit_group
143 if($group_count == 0)
145 if($OptDistUniqueField)
147 for my $colidx (1..$#Header)
149 next if $colidx == $uniq_field_idx;
150 @distributed_cells = @{$group_members[$uniq_field_idx]};
151 for my $uniq_cell (@distributed_cells)
153 my $distrib_colname = $Header[$colidx] . $OptDistColumnNameSeparator . $uniq_cell;
154 push @Header, $distrib_colname;
155 $Header{$distrib_colname} = $#Header;
159 print join($FS, "COUNT", @Header).$RS;
162 if($OptDistUniqueField)
164 for my $uniq_cell_idx (0..$#{$group_members[$uniq_field_idx]})
166 my $uniq_cell = $group_members[$uniq_field_idx]->[$uniq_cell_idx];
167 next if not $uniq_cell ~~ @distributed_cells;
168 for my $colidx (1..$last_input_column_idx)
170 my $colname = $Header[$colidx];
171 my $distrib_colname = $colname . $OptDistColumnNameSeparator . $uniq_cell;
172 $group_members[$Header{$distrib_colname}]->[0] = $group_members[$colidx]->[$uniq_cell_idx];
173 $group_members[$colidx]->[$uniq_cell_idx] = undef;
175 $group_members[$uniq_field_idx]->[$uniq_cell_idx] = undef;
177 for my $colidx (1..$last_input_column_idx)
179 @{$group_members[$colidx]} = grep {defined} @{$group_members[$colidx]};
181 @{$group_members[$uniq_field_idx]} = grep {defined} @{$group_members[$uniq_field_idx]};
184 print $collapsed_rows . $FS . $prev_group_key . $FS . join($FS, map { join $OptGlue, @$_ } @group_members[1..$#group_members]) . $RS;
185 @group_members = ();
186 $collapsed_rows = 0;
187 $group_count++;
190 $group_key = undef;
191 $prev_group_key = undef;
192 @group_members = ();
193 $collapsed_rows = 0;
194 $group_count = 0;
195 @distributed_cells = ();
198 while(not eof STDIN)
200 my @input_row = read_record(\*STDIN);
202 $group_key = $input_row[0];
204 if(defined $prev_group_key and $group_key ne $prev_group_key)
206 commit_group;
209 for my $cell_idx (1..$last_input_column_idx)
211 my $cell = $input_row[$cell_idx];
212 $cell = '' unless defined $cell;
213 push @{$group_members[$cell_idx]}, $cell;
216 $collapsed_rows++;
217 $prev_group_key = $group_key;
220 END { commit_group; }