7 pcut - Cut given fields of text input separated by the given Perl regex
11 pcut [I<OPTIONS>] [I<FILE> [I<FILE> [...]]]
15 Standard cut(1) breaks up input lines by a given single char.
16 pcut(1) does this by the given perl(1)-compatible regular expression.
17 cut(1) outputs fields always in ascending order, without duplication.
18 pcut(1) outputs fields in the requested order, even multiple times if asked so by the B<-f> option.
24 =item -f, --fields I<NUMBERS>
27 See cut(1) for syntax.
29 =item -d, --delimiter I<REGEX>
31 Default is whitespace (C<\s+>).
33 =item -s, --only-delimited
35 See the same option in cut(1).
37 =item -D, --output-delimiter I<STRING>
39 Define the output field delimiter.
40 Default is not to use a constant output delimiter,
41 but to preserve the separator substrings as they matched to the pattern of B<-d> option
42 (see B<--prefer-preceding-delimiter> and B<--prefer-succeeding-delimiter> options).
44 =item -P, --prefer-preceding-delimiter
46 =item --prefer-succeeding-delimiter (default)
48 Contrary to cut(1), pcut(1) does not always use a constant delimiter char,
49 but a regexp pattern which may match to different substrings between fields in the input lines.
51 Each output field (except the last one) is followed by that substring
52 which was matched to the delimiter pattern just right after that field in the input.
54 With B<--prefer-preceding-delimiter>, each output field (except the first one) is similarly preceded by that substring
55 which was matched to the delimiter pattern just before that field in the input.
57 =item --delimiter-before-first I<STRING>
59 Write I<STRING> before field 1 if it is not the first field on the output (in B<--prefer-preceding-delimiter> mode).
61 =item --delimiter-after-last I<STRING>
63 Write I<STRING> after the last field if it is written not as the last field on the output.
65 =item -z, --zero-terminated
67 Terminate output records (lines) by NUL char instead of LineFeed.
82 use Getopt
::Long qw
/:config no_ignore_case no_bundling no_getopt_compat no_auto_abbrev require_order/;
83 use List
::MoreUtils qw
/all any none/;
85 no if ($] >= 5.018), 'warnings' => 'experimental::smartmatch';
88 $OptDelimiter = '\s+';
89 $OptOutputDelimiter = undef;
90 $OptPreferPreceding = 0;
91 $OptDelimBeforeFirst = undef;
92 $OptDelimAfterLast = undef;
93 $OptOnlyDelimited = 0;
96 'f|fields=s@' => \
@OptFields,
97 'd|delimiter=s' => \
$OptDelimiter,
98 'z|zero-terminated' => \
$OptZeroTerminated,
99 'D|output-delimiter=s' => \
$OptOutputDelimiter,
100 'P|prefer-preceding-delimiter' => \
$OptPreferPreceding,
101 'prefer-succeeding-delimiter' => sub { $OptPreferPreceding = 0; },
102 'delimiter-before-first=s' => \
$OptDelimBeforeFirst,
103 'delimiter-after-last=s' => \
$OptDelimAfterLast,
104 's|only-delimited' => \
$OptOnlyDelimited,
105 'help' => sub { pod2usage
(-exitval
=>0, -verbose
=>99); },
106 '<>' => sub { unshift @ARGV, @_[0]; die '!FINISH'; },
107 ) or pod2usage
(-exitval
=>2, -verbose
=>99);
109 if(defined $OptOutputDelimiter)
111 if($OptPreferPreceding)
113 die "$0: Options -D and -P are mutually exclusive.\n";
115 if(defined $OptDelimBeforeFirst or defined $OptDelimAfterLast)
117 die "$0: Options -D and --delimiter-before-first/--delimiter-after-last are mutually exclusive.\n";
123 push @OptFields, '1-';
126 @OptFields = map {split /,/} @OptFields;
128 for my $fields (@OptFields)
130 die "$0: invalid fields: $fields\n" if
131 $fields !~ /^(\d+(-(\d+)?)?|-\d+)$/
132 or ($fields =~ /^(\d+)-(\d+)$/ and $2 < $1)
133 or ($fields =~ /\b0\b/);
138 [map { $_[$_*2] } (0 .. $#_ / 2)],
139 [map { $_[$_*2+1] } (0 .. $#_ / 2)];
144 map { $_[$_], $_[$_ + ($#_+1)/2] } (0 .. $#_/2);
147 sub precedingDelimiter
150 if($p{fieldIndex
} == 0)
152 if(defined $OptDelimBeforeFirst) { return $OptDelimBeforeFirst; }
153 else { return $p{succeedingDelimiters
}->[0]; }
155 return $p{succeedingDelimiters
}->[$p{fieldIndex
} - 1];
160 eval 'use ARGV::readonly; 1' or die;
165 my ($Field, $Delimiter) = unzip
split /($OptDelimiter)/;
167 if($OptOnlyDelimited and not defined $Delimiter->[0])
172 for my $requested_fieldnumbers_idx (0 .. $#OptFields)
174 my $requested_fieldnumbers = $OptFields[$requested_fieldnumbers_idx];
176 if($requested_fieldnumbers =~ /^(\d+)$/) { ($1-1); }
177 elsif($requested_fieldnumbers =~ /^(\d+)-(\d+)$/) { ($1-1 .. $2-1); }
178 elsif($requested_fieldnumbers =~ /^(\d+)-$/) { ($1-1 .. $#$Field); }
179 elsif($requested_fieldnumbers =~ /^-(\d+)$/) { (0 .. $1-1); }
180 else { die 'SHOULD-NOT-REACHED'; }
182 for my $field_idx_idx (0 .. $#fields)
184 my $field_idx = $fields[$field_idx_idx];
185 if($OptPreferPreceding and $requested_fieldnumbers_idx+$field_idx_idx > 0)
187 print precedingDelimiter
(fieldIndex
=>$field_idx, succeedingDelimiters
=>$Delimiter);
189 print $Field->[$field_idx];
190 if(not $OptPreferPreceding and ($requested_fieldnumbers_idx < $#OptFields or $field_idx_idx < $#fields))
192 print $OptOutputDelimiter // $Delimiter->[$field_idx] // $OptDelimAfterLast;
197 if($OptZeroTerminated)