tests/sort/sort.pl

   1 #!/usr/bin/perl
   2
   3 # Copyright (C) 2008-2024 Free Software Foundation, Inc.
   4
   5 # This program is free software: you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation, either version 3 of the License, or
   8 # (at your option) any later version.
   9
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  17
  18 use strict;
  19
  20 my $limits = getlimits ();
  21
  22 my $prog = 'sort';
  23
  24 # Turn off localization of executable's output.
  25 @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
  26
  27 my $mb_locale = $ENV{LOCALE_FR_UTF8};
  28 ! defined $mb_locale || $mb_locale eq 'none'
  29   and $mb_locale = 'C';
  30
  31 # Since each test is run with a file name and with redirected stdin,
  32 # the name in the diagnostic is either the file name or "-".
  33 # Normalize each diagnostic to use '-'.
  34 my $normalize_filename = {ERR_SUBST => 's/^$prog: .*?:/$prog: -:/'};
  35
  36 my $no_file = "$prog: cannot read: no-file: No such file or directory\n";
  37
  38 my @Tests =
  39 (
  40 ["n1", '-n', {IN=>".01\n0\n"}, {OUT=>"0\n.01\n"}],
  41 ["n2", '-n', {IN=>".02\n.01\n"}, {OUT=>".01\n.02\n"}],
  42 ["n3", '-n', {IN=>".02\n.00\n"}, {OUT=>".00\n.02\n"}],
  43 ["n4", '-n', {IN=>".02\n.000\n"}, {OUT=>".000\n.02\n"}],
  44 ["n5", '-n', {IN=>".021\n.029\n"}, {OUT=>".021\n.029\n"}],
  45
  46 ["n6", '-n', {IN=>".02\n.0*\n"}, {OUT=>".0*\n.02\n"}],
  47 ["n7", '-n', {IN=>".02\n.*\n"}, {OUT=>".*\n.02\n"}],
  48 ["n8a", '-s -n -k1,1', {IN=>".0a\n.0b\n"}, {OUT=>".0a\n.0b\n"}],
  49 ["n8b", '-s -n -k1,1', {IN=>".0b\n.0a\n"}, {OUT=>".0b\n.0a\n"}],
  50 ["n9a", '-s -n -k1,1', {IN=>".000a\n.000b\n"}, {OUT=>".000a\n.000b\n"}],
  51 ["n9b", '-s -n -k1,1', {IN=>".000b\n.000a\n"}, {OUT=>".000b\n.000a\n"}],
  52 ["n10a", '-s -n -k1,1', {IN=>".00a\n.000b\n"}, {OUT=>".00a\n.000b\n"}],
  53 ["n10b", '-s -n -k1,1', {IN=>".00b\n.000a\n"}, {OUT=>".00b\n.000a\n"}],
  54 ["n11a", '-s -n -k1,1', {IN=>".01a\n.010\n"}, {OUT=>".01a\n.010\n"}],
  55 ["n11b", '-s -n -k1,1', {IN=>".010\n.01a\n"}, {OUT=>".010\n.01a\n"}],
  56
  57 # human readable suffixes
  58 ["h1", '-h',
  59  {IN=>"1Q\n1R\n1Y\n1Z\n1E\n1P\n1T\n1G\n1M\n1K\n02\n1\nY\n-1k\n-1M\n-1G\n-1T\n"
  60       . "-1P\n-1E\n-1Z\n-1Y\n-1Q\n-1R\n"},
  61  {OUT=>"-1Q\n-1R\n-1Y\n-1Z\n-1E\n-1P\n-1T\n-1G\n-1M\n-1k\n"
  62       . "Y\n1\n02\n1K\n1M\n1G\n1T\n1P\n1E\n1Z\n1Y\n1R\n1Q\n"}],
  63 ["h2", '-h', {IN=>"1M\n-2G\n-3K"}, {OUT=>"-2G\n-3K\n1M\n"}],
  64 # check that it works with powers of 1024
  65 ["h3", '-k 2,2h -k 1,1', {IN=>"a 1G\nb 1023M\n"}, {OUT=>"b 1023M\na 1G\n"}],
  66 # decimal at end => allowed
  67 ["h4", '-h', {IN=>"1.E\n2.M\n"}, {OUT=>"2.M\n1.E\n"}],
  68 # double decimal => ignore suffix
  69 ["h5", '-h', {IN=>"1..2E\n2..2M\n"}, {OUT=>"1..2E\n2..2M\n"}],
  70 # "M" sorts before "G" regardless of the positive number attached.
  71 ["h6", '-h', {IN=>"1GiB\n1030MiB\n"}, {OUT=>"1030MiB\n1GiB\n"}],
  72 # check option incompatibility
  73 ["h7", '-hn', {IN=>""}, {OUT=>""}, {EXIT=>2},
  74  {ERR=>"$prog: options '-hn' are incompatible\n"}],
  75 # check key processing
  76 ["h8", '-n -k2,2h', {IN=>"1 1E\n2 2M\n"}, {OUT=>"2 2M\n1 1E\n"}],
  77 # SI and IEC prefixes on separate keys allowed
  78 ["h9", '-h -k1,1 -k2,2', {IN=>"1M 1Mi\n1M 1Mi\n"}, {OUT=>"1M 1Mi\n1M 1Mi\n"}],
  79 # This invalid SI and IEC prefix mixture is not significant so not noticed
  80 ["h10", '-h -k1,1 -k2,2', {IN=>"1M 2M\n2M 1Mi\n"}, {OUT=>"1M 2M\n2M 1Mi\n"}],
  81
  82 ["01a", '', {IN=>"A\nB\nC\n"}, {OUT=>"A\nB\nC\n"}],
  83 #
  84 ["02a", '-c', {IN=>"A\nB\nC\n"}, {OUT=>''}],
  85 ["02b", '-c', {IN=>"A\nC\nB\n"}, {OUT=>''}, {EXIT=>1},
  86  {ERR=>"$prog: -:3: disorder: B\n"}, $normalize_filename],
  87 ["02c", '-c -k1,1', {IN=>"a\na b\n"}, {OUT=>''}],
  88 ["02d", '-C', {IN=>"A\nB\nC\n"}, {OUT=>''}],
  89 ["02e", '-C', {IN=>"A\nC\nB\n"}, {OUT=>''}, {EXIT=>1}],
  90 # This should fail because there are duplicate keys
  91 ["02m", '-cu', {IN=>"A\nA\n"}, {OUT=>''}, {EXIT=>1},
  92  {ERR=>"$prog: -:2: disorder: A\n"}, $normalize_filename],
  93 ["02n", '-cu', {IN=>"A\nB\n"}, {OUT=>''}],
  94 ["02o", '-cu', {IN=>"A\nB\nB\n"}, {OUT=>''}, {EXIT=>1},
  95  {ERR=>"$prog: -:3: disorder: B\n"}, $normalize_filename],
  96 ["02p", '-cu', {IN=>"B\nA\nB\n"}, {OUT=>''}, {EXIT=>1},
  97  {ERR=>"$prog: -:2: disorder: A\n"}, $normalize_filename],
  98 ["02q", '-c -k 1,1fR', {IN=>"ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n"}],
  99 ["02r", '-c -k 1,1fV', {IN=>"ABC\nABc\nAbC\nAbc\naBC\naBc\nabC\nabc\n"}],
 100 ["02s", '-c -k 1,1dfR',
 101                  {IN=>".ABC\n.ABc.\nA.bC\nA.bc.\naB.C\naB.c.\nabC.\nabc..\n"}],
 102 #
 103 ["03a", '-k1', {IN=>"B\nA\n"}, {OUT=>"A\nB\n"}],
 104 ["03b", '-k1,1', {IN=>"B\nA\n"}, {OUT=>"A\nB\n"}],
 105 ["03c", '-k1 -k2', {IN=>"A b\nA a\n"}, {OUT=>"A a\nA b\n"}],
 106 # Fail with a diagnostic when -k specifies field == 0.
 107 ["03d", '-k0', {EXIT=>2},
 108  {ERR=>"$prog: -: invalid field specification '0'\n"},
 109   $normalize_filename],
 110 # Fail with a diagnostic when -k specifies character == 0.
 111 ["03e", '-k1.0', {EXIT=>2},
 112  {ERR=>"$prog: character offset is zero: invalid field specification '1.0'\n"}],
 113 ["03f", '-k1.1,-k0', {EXIT=>2},
 114  {ERR=>"$prog: invalid number after ',': invalid count at start of '-k0'\n"}],
 115 # This is ok.
 116 ["03g", '-k1.1,1.0', {IN=>''}],
 117 # This is equivalent to 3f.
 118 ["03h", '-k1.1,1', {IN=>''}],
 119 # This too, is equivalent to 3f.
 120 ["03i", '-k1,1', {IN=>''}],
 121 #
 122 ["04a", '-nc', {IN=>"2\n11\n"}],
 123 ["04b", '-n', {IN=>"11\n2\n"}, {OUT=>"2\n11\n"}],
 124 ["04c", '-k1n', {IN=>"11\n2\n"}, {OUT=>"2\n11\n"}],
 125 ["04d", '-k1', {IN=>"11\n2\n"}, {OUT=>"11\n2\n"}],
 126 ["04e", '-k2', {IN=>"ignored B\nz-ig A\n"}, {OUT=>"z-ig A\nignored B\n"}],
 127 #
 128 ["05a", '-k1,2', {IN=>"A B\nA A\n"}, {OUT=>"A A\nA B\n"}],
 129 ["05b", '-k1,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 130 ["05c", '-k1 -k2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 131 ["05d", '-k2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 132 ["05e", '-k2,2', {IN=>"A B Z\nA A A\n"}, {OUT=>"A A A\nA B Z\n"}],
 133 ["05f", '-k2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 134 #
 135 ["06a", '-k 1,2', {IN=>"A B\nA A\n"}, {OUT=>"A A\nA B\n"}],
 136 ["06b", '-k 1,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 137 ["06c", '-k 1 -k 2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 138 ["06d", '-k 2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 139 ["06e", '-k 2,2', {IN=>"A B Z\nA A A\n"}, {OUT=>"A A A\nA B Z\n"}],
 140 ["06f", '-k 2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
 141 #
 142 ["07a", '-k 2,3', {IN=>"9 a b\n7 a a\n"}, {OUT=>"7 a a\n9 a b\n"}],
 143 ["07b", '-k 2,3', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
 144 ["07c", '-k 2,3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
 145 ["07d", '+1 -3', {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
 146 # ensure a character position of 0 includes whole field
 147 ["07e", '-k 2,3.0', {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
 148 # ensure fields with end position before start are ignored
 149 ["07f", '-n -k1.3,1.1', {IN=>"a 2\nb 1\n"}, {OUT=>"a 2\nb 1\n"}],
 150 ["07g", '-n -k2.2,1.2', {IN=>"aa 2\nbb 1\n"}, {OUT=>"aa 2\nbb 1\n"}],
 151 ["07h", '-k1.3nb,1.3', {IN=>"  a 2\n  b 1\n"}, {OUT=>"  a 2\n  b 1\n"}],
 152 # ensure obsolescent key limits are handled correctly
 153 ["07i", '-s +0 -1', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
 154 ["07j", '-s +0 -1.0', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
 155 ["07k", '-s +0 -1.1', {IN=>"a c\na b\n"}, {OUT=>"a c\na b\n"}],
 156 ["07l", '-s +0 -1.2', {IN=>"a c\na b\n"}, {OUT=>"a b\na c\n"}],
 157 ["07m", '-s +0 -1.1b', {IN=>"a c\na b\n"}, {OUT=>"a b\na c\n"}],
 158 #
 159 # report an error for '.' without following char spec
 160 ["08a", '-k 2.,3', {EXIT=>2},
 161  {ERR=>"$prog: invalid number after '.': invalid count at start of ',3'\n"}],
 162 # report an error for ',' without following POS2
 163 ["08b", '-k 2,', {EXIT=>2},
 164  {ERR=>"$prog: invalid number after ',': invalid count at start of ''\n"}],
 165 #
 166 # Test new -g option.
 167 ["09a", '-g', {IN=>"1e2\n2e1\n"}, {OUT=>"2e1\n1e2\n"}],
 168 # Make sure -n works how we expect.
 169 ["09b", '-n', {IN=>"1e2\n2e1\n"}, {OUT=>"1e2\n2e1\n"}],
 170 ["09c", '-n', {IN=>"2e1\n1e2\n"}, {OUT=>"1e2\n2e1\n"}],
 171 ["09d", '-k2g', {IN=>"a 1e2\nb 2e1\n"}, {OUT=>"b 2e1\na 1e2\n"}],
 172 #
 173 # Bug reported by Roger Peel <R.Peel@ee.surrey.ac.uk>
 174 ["10a", '-t : -k 2.2,2.2', {IN=>":ba\n:ab\n"}, {OUT=>":ba\n:ab\n"}],
 175 # Equivalent to above, but using obsolescent '+pos -pos' option syntax.
 176 ["10b", '-t : +1.1 -1.2', {IN=>":ba\n:ab\n"}, {OUT=>":ba\n:ab\n"}],
 177 #
 178 # The same as the preceding two, but with input lines reversed.
 179 ["10c", '-t : -k 2.2,2.2', {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
 180 # Equivalent to above, but using obsolescent '+pos -pos' option syntax.
 181 ["10d", '-t : +1.1 -1.2', {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
 182 # Try without -t...
 183 # But note that we have to count the delimiting space at the beginning
 184 # of each field that has it.
 185 ["10a0", '-k 2.3,2.3', {IN=>"z ba\nz ab\n"}, {OUT=>"z ba\nz ab\n"}],
 186 ["10a1", '-k 1.2,1.2', {IN=>"ba\nab\n"}, {OUT=>"ba\nab\n"}],
 187 ["10a2", '-b -k 2.2,2.2', {IN=>"z ba\nz ab\n"}, {OUT=>"z ba\nz ab\n"}],
 188 #
 189 # An even simpler example demonstrating the bug.
 190 ["10e", '-k 1.2,1.2', {IN=>"ab\nba\n"}, {OUT=>"ba\nab\n"}],
 191 #
 192 # The way sort works on these inputs (10f and 10g) seems wrong to me.
 193 # See https://git.sv.gnu.org/gitweb/?p=coreutils.git;a=commitdiff;h=3c467c0d223
 194 # POSIX doesn't seem to say one way or the other, but that's the way all
 195 # other sort implementations work.
 196 ["10f", '-t : -k 1.3,1.3', {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
 197 ["10g", '-k 1.4,1.4', {IN=>"a ab\nb ba\n"}, {OUT=>"b ba\na ab\n"}],
 198 #
 199 # Exercise bug re using -b to skip trailing blanks.
 200 ["11a", '-t: -k1,1b -k2,2', {IN=>"a\t:a\na :b\n"}, {OUT=>"a\t:a\na :b\n"}],
 201 ["11b", '-t: -k1,1b -k2,2', {IN=>"a :b\na\t:a\n"}, {OUT=>"a\t:a\na :b\n"}],
 202 ["11c", '-t: -k2,2b -k3,3', {IN=>"z:a\t:a\na :b\n"}, {OUT=>"z:a\t:a\na :b\n"}],
 203 # Before 1.22m, the first key comparison reported equality.
 204 # With 1.22m, they compare different: "a" sorts before "a\n",
 205 # and the second key spec isn't even used.
 206 ["11d", '-t: -k2,2b -k3,3', {IN=>"z:a :b\na\t:a\n"}, {OUT=>"a\t:a\nz:a :b\n"}],
 207 #
 208 # Exercise bug re comparing '-' and integers.
 209 ["12a", '-n -t: +1', {IN=>"a:1\nb:-\n"}, {OUT=>"b:-\na:1\n"}],
 210 ["12b", '-n -t: +1', {IN=>"b:-\na:1\n"}, {OUT=>"b:-\na:1\n"}],
 211 # Try some other (e.g. 'X') invalid character.
 212 ["12c", '-n -t: +1', {IN=>"a:1\nb:X\n"}, {OUT=>"b:X\na:1\n"}],
 213 ["12d", '-n -t: +1', {IN=>"b:X\na:1\n"}, {OUT=>"b:X\na:1\n"}],
 214 # From Karl Heuer
 215 ["13a", '+0.1n', {IN=>"axx\nb-1\n"}, {OUT=>"b-1\naxx\n"}],
 216 ["13b", '+0.1n', {IN=>"b-1\naxx\n"}, {OUT=>"b-1\naxx\n"}],
 217 #
 218 # From Carl Johnson <carlj@cjlinux.home.org>
 219 ["14a", '-d -u', {IN=>"mal\nmal-\nmala\n"}, {OUT=>"mal\nmala\n"}],
 220 # Be sure to fix the (translate && ignore) case in keycompare.
 221 ["14b", '-f -d -u', {IN=>"mal\nmal-\nmala\n"}, {OUT=>"mal\nmala\n"}],
 222 #
 223 # Experiment with -i.
 224 ["15a", '-i -u', {IN=>"a\na\1\n"}, {OUT=>"a\n"}],
 225 ["15b", '-i -u', {IN=>"a\n\1a\n"}, {OUT=>"a\n"}],
 226 ["15c", '-i -u', {IN=>"a\1\na\n"}, {OUT=>"a\1\n"}],
 227 ["15d", '-i -u', {IN=>"\1a\na\n"}, {OUT=>"\1a\n"}],
 228 ["15e", '-i -u', {IN=>"a\n\1\1\1\1\1a\1\1\1\1\n"}, {OUT=>"a\n"}],
 229
 230 # This would fail (printing only the 7) for 8.6..8.18.
 231 # Use --parallel=1 for reproducibility, and a small buffer size
 232 # to let us trigger the problem with a smaller input.
 233 ["unique-1", '--p=1 -S32b -u', {IN=>"7\n"x11 . "1\n"}, {OUT=>"1\n7\n"}],
 234 # Demonstrate that 8.19's key-spec-adjusting code is required.
 235 # These are more finicky in that they are arch-dependent.
 236 ["unique-key-i686",   '-u -k2,2 --p=1 -S32b',
 237   {IN=>"a 7\n"x10 . "b 1\n"}, {OUT=>"b 1\na 7\n"}],
 238 ["unique-key-x86_64", '-u -k2,2 --p=1 -S32b',
 239   {IN=>"a 7\n"x11 . "b 1\n"}, {OUT=>"b 1\na 7\n"}],
 240 # Before 8.19, this would trigger a free-memory read.
 241 ["unique-free-mem-read", '-u --p=1 -S32b',
 242   {IN=>"a\n"."b"x900 ."\n"},
 243  {OUT=>"a\n"."b"x900 ."\n"}],
 244
 245 # From Erick Branderhorst -- fixed around 1.19e
 246 ["16a", '-f',
 247  {IN=>"éminence\nüberhaupt\n's-Gravenhage\naëroclub\nAag\naagtappels\n"},
 248  {OUT=>"'s-Gravenhage\nAag\naagtappels\naëroclub\néminence\nüberhaupt\n"}],
 249
 250 # This provokes a one-byte memory overrun of a malloc'd block for versions
 251 # of sort from textutils-1.19p and before.
 252 ["17", '-c', {IN=>"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"}],
 253
 254 # POSIX says -n no longer implies -b, so here we're comparing ' 9' and '10'.
 255 ["18a", '-k1.1,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],
 256
 257 # Just like above, because the global '-b' has no effect on the
 258 # key specifier when a key-specific option ('n' in this case) is used.
 259 ["18b", '-b -k1.1,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],
 260
 261 # Here we're comparing ' 90' and '10', because the 'b' on the key-end specifier
 262 # makes sort ignore leading blanks when determining that key's *end*.
 263 ["18c", '-k1.1,1.2nb', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
 264
 265 # Here we're comparing '9' and '10', because the 'b' on the key-start specifier
 266 # makes sort ignore leading blanks when determining that key's *start*.
 267 ["18d", '-k1.1b,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],
 268
 269 # This compares '90' and '10', as it ignores leading blanks for both
 270 # key start and key end.
 271 ["18e", '-nb -k1.1,1.2', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],
 272
 273 # When ignoring leading blanks for end position, ensure blanks from
 274 # next field are not included in the sort.  I.e., order should not change here.
 275 ["18f", '-k1,1b', {IN=>"a  y\na z\n"}, {OUT=>"a  y\na z\n"}],
 276
 277 # When ignoring leading blanks for start position, ensure blanks from
 278 # next field are not included in the sort.  I.e., order should not change here.
 279 # This was noticed as an issue on fedora 8 (only in multibyte locales).
 280 ["18g", '-k1b,1', {IN=>"a  y\na z\n"}, {OUT=>"a  y\na z\n"},
 281  {ENV => "LC_ALL=$mb_locale"}],
 282
 283 # This looks odd, but works properly -- 2nd keyspec is never
 284 # used because all lines are different.
 285 ["19a", '+0 +1nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],
 286
 287 # The test *intended* by the author of the above, but using the
 288 # more-intuitive POSIX-style -k options.
 289 ["19b", '-k1,1 -k2nr', {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 3\nb 2\nb 1\n"}],
 290
 291 # This test failed when sort-1.22 was compiled on a Next x86 system
 292 # without optimization.  Without optimization gcc uses the buggy version
 293 # of memcmp in the Next C library.  With optimization, gcc uses its
 294 # (working) builtin version.  Test case form William Lewis.
 295 ["20a", '',
 296  {IN=>"_________U__free\n_________U__malloc\n_________U__abort\n"
 297       . "_________U__memcpy\n_________U__memset\n"
 298       . "_________U_dyld_stub_binding_helper\n_________U__malloc\n"
 299       . "_________U___iob\n_________U__abort\n_________U__fprintf\n"},
 300  {OUT=>"_________U___iob\n_________U__abort\n_________U__abort\n"
 301        . "_________U__fprintf\n_________U__free\n_________U__malloc\n"
 302        . "_________U__malloc\n_________U__memcpy\n_________U__memset\n"
 303        . "_________U_dyld_stub_binding_helper\n"}],
 304
 305 # Demonstrate that folding changes the ordering of e.g. A, a, and _
 306 # because while they normally (in the C locale) collate like A, _, a,
 307 # when using -f, 'a' is compared as if it were 'A'.
 308 ["21a", '', {IN=>"A\na\n_\n"}, {OUT=>"A\n_\na\n"}],
 309 ["21b", '-f', {IN=>"A\na\n_\n"}, {OUT=>"A\na\n_\n"}],
 310 ["21c", '-f', {IN=>"a\nA\n_\n"}, {OUT=>"A\na\n_\n"}],
 311 ["21d", '-f', {IN=>"_\na\nA\n"}, {OUT=>"A\na\n_\n"}],
 312 ["21e", '-f', {IN=>"a\n_\nA\n"}, {OUT=>"A\na\n_\n"}],
 313 ["21f", '-fs', {IN=>"A\na\n_\n"}, {OUT=>"A\na\n_\n"}],
 314 ["21g", '-fu', {IN=>"a\n_\n"}, {OUT=>"a\n_\n"}],
 315
 316 # This test failed until 1.22f.  From Zvi Har'El.
 317 ["22a", '-k 2,2fd -k 1,1r', {IN=>"3 b\n4 B\n"}, {OUT=>"4 B\n3 b\n"}],
 318 ["22b", '-k 2,2d  -k 1,1r', {IN=>"3 b\n4 b\n"}, {OUT=>"4 b\n3 b\n"}],
 319
 320 # This fails in Fedora 20, per Göran Uddeborg in: https://bugs.gnu.org/18540
 321 ["23", '-s -k1,1 -t/', {IN=>"a b/x\na-b-c/x\n"}, {OUT=>"a b/x\na-b-c/x\n"},
 322  {ENV => "LC_ALL=$mb_locale"}],
 323
 324 ["no-file1", 'no-file', {EXIT=>2}, {ERR=>$no_file}],
 325 # This test failed until 1.22f.  Sort didn't give an error.
 326 # From Will Edgington.
 327 ["o-no-file1", qw(-o no-file no-file), {EXIT=>2}, {ERR=>$no_file}],
 328
 329 ["create-empty", qw(-o no/such/file /dev/null), {EXIT=>2},
 330  {ERR=>"$prog: open failed: no/such/file: No such file or directory\n"}],
 331
 332 # From Paul Eggert.  This was fixed in textutils-1.22k.
 333 ["neg-nls", '-n', {IN=>"-1\n-9\n"}, {OUT=>"-9\n-1\n"}],
 334
 335 # From Paul Eggert.  This was fixed in textutils-1.22m.
 336 # The bug was visible only when using the internationalized sorting code
 337 # (i.e., not when configured with --disable-nls).
 338 ["nul-nls", '', {IN=>"\0b\n\0a\n"}, {OUT=>"\0a\n\0b\n"}],
 339
 340 # Paul Eggert wrote:
 341 # A previous version of POSIX incorrectly required that the newline
 342 # at the end of the input line contributed to the sort, which would
 343 # mean that an empty line should sort after a line starting with a tab
 344 # (because \t precedes \n in the ASCII collating sequence).
 345 # GNU 'sort' was altered to do this, but was changed back once it
 346 # was discovered to be a POSIX bug (and the POSIX bug was fixed).
 347 # Check that 'sort' conforms to the fixed POSIX, not to the buggy one.
 348 ["use-nl", '', {IN=>"\n\t\n"}, {OUT=>"\n\t\n"}],
 349
 350 # Specifying two -o options should evoke a failure
 351 ["o2", qw(-o x -o y), {EXIT=>2},
 352  {ERR=>"foo\n"}, {ERR_SUBST => 's/^$prog: .*/foo/'}],
 353
 354 # Specifying incompatible options should evoke a failure.
 355 ["incompat1", '-in', {EXIT=>2},
 356  {ERR=>"$prog: options '-in' are incompatible\n"}],
 357 ["incompat2", '-nR', {EXIT=>2},
 358  {ERR=>"$prog: options '-nR' are incompatible\n"}],
 359 ["incompat3", '-dfgiMnR', {EXIT=>2},
 360  {ERR=>"$prog: options '-dfgMnR' are incompatible\n"}],
 361 ["incompat4", qw(-c -o /dev/null), {EXIT=>2},
 362  {ERR=>"$prog: options '-co' are incompatible\n"}],
 363 ["incompat5", qw(-C -o /dev/null), {EXIT=>2},
 364  {ERR=>"$prog: options '-Co' are incompatible\n"}],
 365 ["incompat6", '-cC', {EXIT=>2},
 366  {ERR=>"$prog: options '-cC' are incompatible\n"}],
 367 ["incompat7", qw(--sort=random -n), {EXIT=>2},
 368  {ERR=>"$prog: options '-nR' are incompatible\n"}],
 369
 370 # -t '\0' is accepted, as of coreutils-5.0.91
 371 ['nul-tab', "-k2,2 -t '\\0'",
 372  {IN=>"a\0z\01\nb\0y\02\n"}, {OUT=>"b\0y\02\na\0z\01\n"}],
 373
 374 # fields > SIZE_MAX are silently interpreted as SIZE_MAX
 375 ["bigfield1", "-k $limits->{UINTMAX_OFLOW}",
 376  {IN=>"2\n1\n"}, {OUT=>"1\n2\n"}],
 377 ["bigfield2", "-k $limits->{SIZE_OFLOW}",
 378  {IN=>"2\n1\n"}, {OUT=>"1\n2\n"}],
 379
 380 # Using an old-style key-specifying option like +1 with an invalid
 381 # ordering-option character would cause sort to try to free an invalid
 382 # (non-malloc'd) pointer.  This bug affects coreutils-6.5 through 6.9.
 383 ['obs-inval', '+1x', {EXIT=>2},
 384  {ERR=>"foo\n"}, {ERR_SUBST => 's/^$prog: .*/foo/'}],
 385
 386 # Exercise the code that enlarges the line buffer.
 387 ['realloc-buf', '-S1', {IN=>'a'x4000 ."\n"}, {OUT=>'a'x4000 ."\n"}],
 388 ['realloc-buf-2', '-S1', {IN=>'a'x5 ."\n"}, {OUT=>'a'x5 ."\n"}],
 389
 390 ["sort-numeric", '--sort=numeric', {IN=>".01\n0\n"}, {OUT=>"0\n.01\n"}],
 391 ["sort-gennum", '--sort=general-numeric',
 392   {IN=>"1e2\n2e1\n"}, {OUT=>"2e1\n1e2\n"}],
 393
 394 # -m with output file also used as an input file
 395 # In coreutils-7.2, this caused a segfault.
 396 # This test looks a little strange.  Here's why:
 397 # since we're using "-o f", standard output will be empty, hence OUT=>''
 398 # We still want to ensure that the output file, "f" has expected contents,
 399 # hence the added CMP=> directive.
 400 ["output-is-input", '-m -o f', {IN=> {f=> "a\n"}}, {OUT=>''},
 401  {CMP=> ["a\n", {'f'=> undef}]} ],
 402 ["output-is-input-2", '-m -o f', {OUT=>''},
 403  {IN=> {f=> "a\n"}}, {IN=> {g=> "b\n"}}, {IN=> {h=> "c\n"}},
 404  {CMP=> ["a\nb\nc\n", {'f'=> undef}]} ],
 405 ["output-is-input-3", '-m -o f', {OUT=>''},
 406  {IN=> {g=> "a\n"}}, {IN=> {h=> "b\n"}}, {IN=> {f=> "c\n"}},
 407  {CMP=> ["a\nb\nc\n", {'f'=> undef}]} ],
 408
 409 # --zero-terminated
 410 ['zero-1', '-z', {IN=>"2\0001\000"}, {OUT=>"1\0002\000"}],
 411 ['zero-2', '-z -k2,2', {IN=>"1\n2\0002\n1\000"}, {OUT=>"2\n1\0001\n2\000"}],
 412 ['zero-3', '-zb -k2,2', {IN=>"1\n\n2\0002\n1\0"}, {OUT=>"2\n1\0001\n\n2\0"}],
 413 );
 414
 415 # Add _POSIX2_VERSION=199209 to the environment of each test
 416 # that uses an old-style option like +1.
 417 foreach my $t (@Tests)
 418   {
 419     foreach my $e (@$t)
 420       {
 421         !ref $e && $e =~ /\+\d/
 422           and push (@$t, {ENV=>'_POSIX2_VERSION=199209'}), last;
 423       }
 424   }
 425
 426 @Tests = triple_test \@Tests;
 427
 428 # Remember that triple_test creates from each test with exactly one "IN"
 429 # file two more tests (.p and .r suffix on name) corresponding to reading
 430 # input from a file and from a pipe.  The pipe-reading test would fail
 431 # due to a race condition about 1 in 20 times.
 432 # Remove the IN_PIPE version of the "output-is-input" test above.
 433 # The others aren't susceptible because they have three inputs each.
 434 @Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests;
 435
 436 my $save_temps = $ENV{DEBUG};
 437 my $verbose = $ENV{VERBOSE};
 438
 439 my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
 440 exit $fail;