2 # before coreutils-8.32, uniq would not distinguish
3 # items which compared equal with strcoll()
4 # So ensure we avoid strcoll() for the following cases.
6 # Copyright (C) 2020-2024 Free Software Foundation, Inc.
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 .
"${srcdir=.}/tests/init.sh"; path_prepend_ .
/src
22 print_ver_
uniq printf
26 env LC_ALL
=$LOCALE_FR_UTF8 printf "$@" > in || framework_failure_
29 # strcoll() used to return 0 comparing the following strings
30 # which was fixed somewhere between glibc-2.22 and glibc-2.30
31 gen_input
'%s\n' 'ⁿᵘˡˡ' 'ܥܝܪܐܩ'
32 test $
(LC_ALL
=$LOCALE_FR_UTF8 uniq < in |
wc -l) = 2 || fail
=1
34 # normalization in strcoll is inconsistent across platforms.
35 # glibc based systems at least do _not_ normalize in strcoll,
36 # while cygwin systems for example may do so.
37 # á composed and decomposed, are generally not compared equal
38 gen_input
'\u00E1\na\u0301\n'
39 test $
(LC_ALL
=$LOCALE_FR_UTF8 uniq < in |
wc -l) = 2 || fail
=1
40 # Similarly with the following equivalent hangul characters
41 gen_input
'\uAC01\n\u1100\u1161\u11A8\n'
42 test $
(LC_ALL
=ko_KR.utf8
uniq < in |
wc -l) = 2 || fail
=1
44 # Note if running in the wrong locale,
45 # strcoll may indicate the strings match when they don't.
46 # I.e., cjk and hangul will now work even if
47 # uniq is running in the wrong locale
49 gen_input
'\uAC00\n\uAC01\n'
50 test $
(LC_ALL
=en_US.utf8
uniq < in |
wc -l) = 2 || fail
=1
52 gen_input
'\u3400\n\u3401\n'
53 test $
(LC_ALL
=en_US.utf8
uniq < in |
wc -l) = 2 || fail
=1
55 # Note strcoll() ignores certain characters,
56 # but not if the strings are otherwise equal.
57 # I.e., the following on glibc-2.30 at least,
58 # as expected, does not print a single item,
59 # but testing here for illustration
61 test $
(LC_ALL
=$LOCALE_FR_UTF8 uniq < in |
wc -l) = 2 || fail
=1