1 //===----------------------------------------------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 // UNSUPPORTED: c++03, c++11, c++14, c++17
9 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
11 // This version runs the test when the platform has Unicode support.
12 // UNSUPPORTED: libcpp-has-no-unicode
14 // TODO FMT This test should not require std::to_chars(floating-point)
15 // XFAIL: availability-fp_to_chars-missing
19 // Tests the Unicode width support of the standard format specifiers.
20 // It tests [format.string.std]/8 - 11:
21 // - Properly determining the estimated with of a unicode string.
22 // - Properly truncating to the wanted maximum width.
24 // More specific extended grapheme cluster boundary rules are tested in
25 // test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp
26 // this test is based on test data provided by the Unicode Consortium.
32 #include "make_string.h"
33 #include "test_macros.h"
34 #include "string_literal.h"
35 #include "test_format_string.h"
37 #ifndef TEST_HAS_NO_LOCALIZATION
39 # include <type_traits>
42 #define SV(S) MAKE_STRING_VIEW(CharT, S)
44 template < class CharT
, class... Args
>
45 void check(std::basic_string_view
<CharT
> expected
, test_format_string
<CharT
, Args
...> fmt
, Args
&&... args
) {
46 std::basic_string
<CharT
> out
= std::format(fmt
, std::forward
<Args
>(args
)...);
47 #ifndef TEST_HAS_NO_LOCALIZATION
48 if constexpr (std::same_as
<CharT
, char>)
50 std::cerr
<< "\nFormat string " << fmt
.get() << "\nExpected output " << expected
<< "\nActual output " << out
53 assert(out
== expected
);
56 template <class CharT
>
57 static void test_single_code_point_fill() {
58 //*** 1-byte code points ***
59 check(SV("* *"), SV("{:*^3}"), SV(" "));
60 check(SV("*~*"), SV("{:*^3}"), SV("~"));
62 //*** 2-byte code points ***
63 check(SV("*\u00a1*"), SV("{:*^3}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
64 check(SV("*\u07ff*"), SV("{:*^3}"), SV("\u07ff")); // NKO TAMAN SIGN
66 //*** 3-byte code points ***
67 check(SV("*\u0800*"), SV("{:*^3}"), SV("\u0800")); // SAMARITAN LETTER ALAF
68 check(SV("*\ufffd*"), SV("{:*^3}"), SV("\ufffd")); // REPLACEMENT CHARACTER
71 check(SV("*\u1100*"), SV("{:*^4}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
72 check(SV("*\u115f*"), SV("{:*^4}"), SV("\u115f")); // HANGUL CHOSEONG FILLER
74 check(SV("*\u2329*"), SV("{:*^4}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
75 check(SV("*\u232a*"), SV("{:*^4}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
77 check(SV("*\u2e80*"), SV("{:*^4}"), SV("\u2e80")); // CJK RADICAL REPEAT
78 check(SV("*\u303e*"), SV("{:*^4}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
80 check(SV("*\u3041*"), SV("{:*^4}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A
81 check(SV("*\ua4d0*"), SV("{:*^3}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA
83 check(SV("*\uac00*"), SV("{:*^4}"), SV("\uac00")); // <Hangul Syllable, First>
84 check(SV("*\ud7a3*"), SV("{:*^4}"), SV("\ud7a3")); // Hangul Syllable Hih
86 check(SV("*\uf900*"), SV("{:*^4}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
87 check(SV("*\ufaff*"), SV("{:*^4}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
89 check(SV("*\ufe10*"), SV("{:*^4}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
90 check(SV("*\ufe19*"), SV("{:*^4}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
92 check(SV("*\ufe30*"), SV("{:*^4}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
93 check(SV("*\ufe70*"), SV("{:*^3}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
95 check(SV("*\uff01*"), SV("{:*^4}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK
96 check(SV("*\uff60*"), SV("{:*^4}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
98 check(SV("*\uffe0*"), SV("{:*^4}"), SV("\uffe0")); // FULLWIDTH CENT SIGN
99 check(SV("*\uffe6*"), SV("{:*^4}"), SV("\uffe6")); // FULLWIDTH WON SIGN
101 //*** 4-byte code points ***
102 check(SV("*\U00010000*"), SV("{:*^3}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
103 check(SV("*\U0010FFFF*"), SV("{:*^3}"), SV("\U0010FFFF")); // Undefined Character
106 check(SV("*\U0001f300*"), SV("{:*^4}"), SV("\U0001f300")); // CYCLONE
107 check(SV("*\U0001f64f*"), SV("{:*^4}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
108 check(SV("*\U0001f900*"), SV("{:*^4}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
109 check(SV("*\U0001f9ff*"), SV("{:*^4}"), SV("\U0001f9ff")); // NAZAR AMULET
110 check(SV("*\U00020000*"), SV("{:*^4}"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
111 check(SV("*\U0002fffd*"), SV("{:*^4}"), SV("\U0002fffd")); // Undefined Character
112 check(SV("*\U00030000*"), SV("{:*^4}"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
113 check(SV("*\U0003fffd*"), SV("{:*^4}"), SV("\U0003fffd")); // Undefined Character
116 // One column output is unaffected.
117 // Two column output is removed, thus the result is only the fill character.
118 template <class CharT
>
119 static void test_single_code_point_truncate() {
120 //*** 1-byte code points ***
121 check(SV("* *"), SV("{:*^3.1}"), SV(" "));
122 check(SV("*~*"), SV("{:*^3.1}"), SV("~"));
124 //*** 2-byte code points ***
125 check(SV("*\u00a1*"), SV("{:*^3.1}"), SV("\u00a1")); // INVERTED EXCLAMATION MARK
126 check(SV("*\u07ff*"), SV("{:*^3.1}"), SV("\u07ff")); // NKO TAMAN SIGN
128 //*** 3.1-byte code points ***
129 check(SV("*\u0800*"), SV("{:*^3.1}"), SV("\u0800")); // SAMARITAN LETTER ALAF
130 check(SV("*\ufffd*"), SV("{:*^3.1}"), SV("\ufffd")); // REPLACEMENT CHARACTER
133 check(SV("***"), SV("{:*^3.1}"), SV("\u1100")); // HANGUL CHOSEONG KIYEOK
134 check(SV("***"), SV("{:*^3.1}"), SV("\u115f")); // HANGUL CHOSEONG FILLER
136 check(SV("***"), SV("{:*^3.1}"), SV("\u2329")); // LEFT-POINTING ANGLE BRACKET
137 check(SV("***"), SV("{:*^3.1}"), SV("\u232a")); // RIGHT-POINTING ANGLE BRACKET
139 check(SV("***"), SV("{:*^3.1}"), SV("\u2e80")); // CJK RADICAL REPEAT
140 check(SV("***"), SV("{:*^3.1}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
142 check(SV("***"), SV("{:*^3.1}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A
143 check(SV("*\ua4d0*"), SV("{:*^3.1}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA
145 check(SV("***"), SV("{:*^3.1}"), SV("\uac00")); // <Hangul Syllable, First>
146 check(SV("***"), SV("{:*^3.1}"), SV("\ud7a3")); // Hangul Syllable Hih
148 check(SV("***"), SV("{:*^3.1}"), SV("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
149 check(SV("***"), SV("{:*^3.1}"), SV("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
151 check(SV("***"), SV("{:*^3.1}"), SV("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
152 check(SV("***"), SV("{:*^3.1}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
154 check(SV("***"), SV("{:*^3.1}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
155 check(SV("*\ufe70*"), SV("{:*^3.1}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
157 check(SV("***"), SV("{:*^3.1}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK
158 check(SV("***"), SV("{:*^3.1}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
160 check(SV("***"), SV("{:*^3.1}"), SV("\uffe0")); // FULLWIDTH CENT SIGN
161 check(SV("***"), SV("{:*^3.1}"), SV("\uffe6")); // FULLWIDTH WON SIGN
163 //*** 3.1-byte code points ***
164 check(SV("*\U00010000*"), SV("{:*^3.1}"), SV("\U00010000")); // LINEAR B SYLLABLE B008 A
165 check(SV("*\U0010FFFF*"), SV("{:*^3.1}"), SV("\U0010FFFF")); // Undefined Character
168 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f300")); // CYCLONE
169 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f64f")); // PERSON WITH FOLDED HANDS
170 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
171 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f9ff")); // NAZAR AMULET
172 check(SV("***"), SV("{:*^3.1}"), SV("\U00020000")); // <CJK Ideograph Extension B, First>
173 check(SV("***"), SV("{:*^3.1}"), SV("\U0002fffd")); // Undefined Character
174 check(SV("***"), SV("{:*^3.1}"), SV("\U00030000")); // <CJK Ideograph Extension G, First>
175 check(SV("***"), SV("{:*^3.1}"), SV("\U0003fffd")); // Undefined Character
178 // The examples used in that paper.
179 template <class CharT
>
180 static void test_P1868() {
182 check(SV("*\u0041*"), SV("{:*^3}"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
183 check(SV("*\u00c1*"), SV("{:*^3}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
184 check(SV("*\u0041\u0301*"),
186 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
187 check(SV("*\u0132*"), SV("{:*^3}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
188 check(SV("*\u0394*"), SV("{:*^3}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
190 check(SV("*\u0429*"), SV("{:*^3}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
191 check(SV("*\u05d0*"), SV("{:*^3}"), SV("\u05d0")); // { HEBREW LETTER ALEF }
192 check(SV("*\u0634*"), SV("{:*^3}"), SV("\u0634")); // { ARABIC LETTER SHEEN }
193 check(SV("*\u3009*"), SV("{:*^4}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET }
194 check(SV("*\u754c*"), SV("{:*^4}"), SV("\u754c")); // { CJK Unified Ideograph-754C }
195 check(SV("*\U0001f921*"), SV("{:*^4}"), SV("\U0001f921")); // { UNICORN FACE }
196 check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
198 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
200 // Truncate to 1 column: 1 column grapheme clusters are kept together.
201 check(SV("*\u0041*"), SV("{:*^3.1}"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
202 check(SV("*\u00c1*"), SV("{:*^3.1}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
203 check(SV("*\u0041\u0301*"),
205 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
206 check(SV("*\u0132*"), SV("{:*^3.1}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
207 check(SV("*\u0394*"), SV("{:*^3.1}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
209 check(SV("*\u0429*"), SV("{:*^3.1}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
210 check(SV("*\u05d0*"), SV("{:*^3.1}"), SV("\u05d0")); // { HEBREW LETTER ALEF }
211 check(SV("*\u0634*"), SV("{:*^3.1}"), SV("\u0634")); // { ARABIC LETTER SHEEN }
212 check(SV("***"), SV("{:*^3.1}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET }
213 check(SV("***"), SV("{:*^3.1}"), SV("\u754c")); // { CJK Unified Ideograph-754C }
214 check(SV("***"), SV("{:*^3.1}"), SV("\U0001f921")); // { UNICORN FACE }
217 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
219 // Truncate to 2 column: 2 column grapheme clusters are kept together.
220 check(SV("*\u0041*"), SV("{:*^3.2}"), SV("\u0041")); // { LATIN CAPITAL LETTER A }
221 check(SV("*\u00c1*"), SV("{:*^3.2}"), SV("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE }
222 check(SV("*\u0041\u0301*"),
224 SV("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT }
225 check(SV("*\u0132*"), SV("{:*^3.2}"), SV("\u0132")); // { LATIN CAPITAL LIGATURE IJ }
226 check(SV("*\u0394*"), SV("{:*^3.2}"), SV("\u0394")); // { GREEK CAPITAL LETTER DELTA }
228 check(SV("*\u0429*"), SV("{:*^3.2}"), SV("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA }
229 check(SV("*\u05d0*"), SV("{:*^3.2}"), SV("\u05d0")); // { HEBREW LETTER ALEF }
230 check(SV("*\u0634*"), SV("{:*^3.2}"), SV("\u0634")); // { ARABIC LETTER SHEEN }
231 check(SV("*\u3009*"), SV("{:*^4.2}"), SV("\u3009")); // { RIGHT-POINTING ANGLE BRACKET }
232 check(SV("*\u754c*"), SV("{:*^4.2}"), SV("\u754c")); // { CJK Unified Ideograph-754C }
233 check(SV("*\U0001f921*"), SV("{:*^4.2}"), SV("\U0001f921")); // { UNICORN FACE }
234 check(SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"),
236 SV("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy }
239 #ifdef _LIBCPP_VERSION
240 // Tests the libc++ specific behaviour for malformed UTF-sequences. The
241 // Standard doesn't specify how to handle this.
242 template <class CharT
>
243 static void test_malformed_code_point() {
244 if constexpr (sizeof(CharT
) == 1) {
246 check(SV("*ZZZZ\x8f*"), SV("{:*^7}"), SV("ZZZZ\x8f"));
247 check(SV("*ZZZZ\xcf*"), SV("{:*^7}"), SV("ZZZZ\xcf"));
248 check(SV("*ZZZZ\xef*"), SV("{:*^7}"), SV("ZZZZ\xef"));
249 check(SV("*ZZZZ\xff*"), SV("{:*^7}"), SV("ZZZZ\xff"));
251 // Malformed in middle, no continuation
252 check(SV("*ZZZZ\x8fZ*"), SV("{:*^8}"), SV("ZZZZ\x8fZ"));
253 check(SV("*ZZZZ\xcfZ*"), SV("{:*^8}"), SV("ZZZZ\xcfZ"));
254 check(SV("*ZZZZ\xefZ*"), SV("{:*^8}"), SV("ZZZZ\xefZ"));
255 check(SV("*ZZZZ\xffZ*"), SV("{:*^8}"), SV("ZZZZ\xffZ"));
257 check(SV("*ZZZZ\x8fZZ*"), SV("{:*^9}"), SV("ZZZZ\x8fZZ"));
258 check(SV("*ZZZZ\xcfZZ*"), SV("{:*^9}"), SV("ZZZZ\xcfZZ"));
259 check(SV("*ZZZZ\xefZZ*"), SV("{:*^9}"), SV("ZZZZ\xefZZ"));
260 check(SV("*ZZZZ\xffZZ*"), SV("{:*^9}"), SV("ZZZZ\xffZZ"));
262 check(SV("*ZZZZ\x8fZZZ*"), SV("{:*^10}"), SV("ZZZZ\x8fZZZ"));
263 check(SV("*ZZZZ\xcfZZZ*"), SV("{:*^10}"), SV("ZZZZ\xcfZZZ"));
264 check(SV("*ZZZZ\xefZZZ*"), SV("{:*^10}"), SV("ZZZZ\xefZZZ"));
265 check(SV("*ZZZZ\xffZZZ*"), SV("{:*^10}"), SV("ZZZZ\xffZZZ"));
267 check(SV("*ZZZZ\x8fZZZZ*"), SV("{:*^11}"), SV("ZZZZ\x8fZZZZ"));
268 check(SV("*ZZZZ\xcfZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xcfZZZZ"));
269 check(SV("*ZZZZ\xefZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xefZZZZ"));
270 check(SV("*ZZZZ\xffZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xffZZZZ"));
272 // Invalid continuations
273 check(SV("\xc2\x00"), SV("{}"), SV("\xc2\x00"));
274 check(SV("\xc2\x40"), SV("{}"), SV("\xc2\x40"));
275 check(SV("\xc2\xc0"), SV("{}"), SV("\xc2\xc0"));
277 check(SV("\xe0\x00\x80"), SV("{}"), SV("\xe0\x00\x80"));
278 check(SV("\xe0\x40\x80"), SV("{}"), SV("\xe0\x40\x80"));
279 check(SV("\xe0\xc0\x80"), SV("{}"), SV("\xe0\xc0\x80"));
281 check(SV("\xe0\x80\x00"), SV("{}"), SV("\xe0\x80\x00"));
282 check(SV("\xe0\x80\x40"), SV("{}"), SV("\xe0\x80\x40"));
283 check(SV("\xe0\x80\xc0"), SV("{}"), SV("\xe0\x80\xc0"));
285 check(SV("\xf0\x80\x80\x00"), SV("{}"), SV("\xf0\x80\x80\x00"));
286 check(SV("\xf0\x80\x80\x40"), SV("{}"), SV("\xf0\x80\x80\x40"));
287 check(SV("\xf0\x80\x80\xc0"), SV("{}"), SV("\xf0\x80\x80\xc0"));
289 check(SV("\xf0\x80\x00\x80"), SV("{}"), SV("\xf0\x80\x00\x80"));
290 check(SV("\xf0\x80\x40\x80"), SV("{}"), SV("\xf0\x80\x40\x80"));
291 check(SV("\xf0\x80\xc0\x80"), SV("{}"), SV("\xf0\x80\xc0\x80"));
293 check(SV("\xf0\x00\x80\x80"), SV("{}"), SV("\xf0\x00\x80\x80"));
294 check(SV("\xf0\x40\x80\x80"), SV("{}"), SV("\xf0\x40\x80\x80"));
295 check(SV("\xf0\xc0\x80\x80"), SV("{}"), SV("\xf0\xc0\x80\x80"));
298 check(SV("*ZZZZ\xef\xf5*"), SV("{:*^8}"), SV("ZZZZ\xef\xf5"));
299 check(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("{:*^12}"), SV("ZZZZ\xef\xf5ZZZZ"));
300 check(SV("*ZZZZ\xff\xf5\xf5*"), SV("{:*^9}"), SV("ZZZZ\xff\xf5\xf5"));
301 check(SV("*ZZZZ\xff\xf5\xf5ZZZZ*"), SV("{:*^13}"), SV("ZZZZ\xff\xf5\xf5ZZZZ"));
303 } else if constexpr (sizeof(CharT
) == 2) {
304 // TODO FMT Add these tests.
306 // UTF-32 doesn't combine characters, thus no corruption tests.
310 template <class CharT
>
312 test_single_code_point_fill
<CharT
>();
313 test_single_code_point_truncate
<CharT
>();
316 #ifdef _LIBCPP_VERSION
317 test_malformed_code_point
<CharT
>();
321 int main(int, char**) {
324 #ifndef TEST_HAS_NO_WIDE_CHARACTERS