1 // RUN: %clang_cc1 -verify -std=c99 %s
2 // RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s
8 // Used as a sink for UCNs.
11 // C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly
12 // four hex digits, and \U must be followed by exactly eight.
13 M(\u1
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
14 M(\u12
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
15 M(\u123
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
17 M(\u12345
)// Okay, two tokens (UCN followed by 5)
19 M(\U1
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
20 M(\U12
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
21 M(\U123
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
22 M(\U1234
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \
23 expected
-note
{{did you mean to use
'\u'?}}
24 M(\U12345
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
25 M(\U123456
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
26 M(\U1234567
) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
28 M(\U123456789
) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9)
30 // Now test the ones that should work. Note, these work in C17 and earlier but
31 // are part of the basic character set in C23 and thus should be diagnosed in
32 // that mode. They're valid in a character constant, but not valid in an
33 // identifier, except for U+0024 which is allowed if -fdollars-in-identifiers
35 // FIXME: These three should be handled the same way, and should be accepted
36 // when dollar signs are allowed in identifiers, rather than rejected, see
38 M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}}
39 M(\U00000024
) // expected-error {{character '$' cannot be specified by a universal character name}}
42 // These should always be rejected because they're not valid identifier
44 // FIXME: the diagnostic could be improved to make it clear this is an issue
45 // with forming an identifier rather than a UCN.
46 M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}}
47 M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}}
48 M(\U00000040
) // expected-error {{character '@' cannot be specified by a universal character name}}
49 M(\U00000060
) // expected-error {{character '`' cannot be specified by a universal character name}}
51 // UCNs outside of identifiers are handled in Phase 5 of translation, so we
52 // cannot use the macro expansion to test their behavior.
54 // This is outside of the range of values specified by ISO 10646.
55 const char *c1
= "\U00110000"; // expected-error {{invalid universal character}}
56 // This does not fall outside of the range
57 const char *c2
= "\U0010FFFF";
59 // These should always be accepted because they're a valid in a character
65 int c6
= '\U00000024';
66 int c7
= '\U00000040';
67 int c8
= '\U00000060';
69 // Valid lone surrogates.
71 const char *c9
= "\U0000E000";
73 // Invalid lone surrogates, which are excluded explicitly by 6.4.3p2.
74 M(\uD800) // expected-error {{invalid universal character}}
75 const char *c10
= "\U0000DFFF"; // expected-error {{invalid universal character}}