clang/test/C/C99/n717.c

   1 // RUN: %clang_cc1 -verify -std=c99 %s
   2 // RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s
   3
   4 /* WG14 N717: Clang 17
   5  * Extended identifiers
   6  */
   7
   8 // Used as a sink for UCNs.
   9 #define M(arg)
  10
  11 // C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly
  12 // four hex digits, and \U must be followed by exactly eight.
  13 M(\u1)    // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  14 M(\u12)   // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  15 M(\u123)  // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  16 M(\u1234) // Okay
  17 M(\u12345)// Okay, two tokens (UCN followed by 5)
  18
  19 M(\U1)         // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  20 M(\U12)        // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  21 M(\U123)       // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  22 M(\U1234)      // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \
  23                   expected-note {{did you mean to use '\u'?}}
  24 M(\U12345)     // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  25 M(\U123456)    // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  26 M(\U1234567)   // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}}
  27 M(\U12345678)  // Okay
  28 M(\U123456789) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9)
  29
  30 // Now test the ones that should work. Note, these work in C17 and earlier but
  31 // are part of the basic character set in C23 and thus should be diagnosed in
  32 // that mode. They're valid in a character constant, but not valid in an
  33 // identifier, except for U+0024 which is allowed if -fdollars-in-identifiers
  34 // is enabled.
  35 // FIXME: These three should be handled the same way, and should be accepted
  36 // when dollar signs are allowed in identifiers, rather than rejected, see
  37 // GH87106.
  38 M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}}
  39 M(\U00000024) // expected-error {{character '$' cannot be specified by a universal character name}}
  40 M($)
  41
  42 // These should always be rejected because they're not valid identifier
  43 // characters.
  44 // FIXME: the diagnostic could be improved to make it clear this is an issue
  45 // with forming an identifier rather than a UCN.
  46 M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}}
  47 M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}}
  48 M(\U00000040) // expected-error {{character '@' cannot be specified by a universal character name}}
  49 M(\U00000060) // expected-error {{character '`' cannot be specified by a universal character name}}
  50
  51 // UCNs outside of identifiers are handled in Phase 5 of translation, so we
  52 // cannot use the macro expansion to test their behavior.
  53
  54 // This is outside of the range of values specified by ISO 10646.
  55 const char *c1 = "\U00110000"; // expected-error {{invalid universal character}}
  56 // This does not fall outside of the range
  57 const char *c2 = "\U0010FFFF";
  58
  59 // These should always be accepted because they're a valid in a character
  60 // constant.
  61 int c3 = '\u0024';
  62 int c4 = '\u0040';
  63 int c5 = '\u0060';
  64
  65 int c6 = '\U00000024';
  66 int c7 = '\U00000040';
  67 int c8 = '\U00000060';
  68
  69 // Valid lone surrogates.
  70 M(\uD799)
  71 const char *c9 = "\U0000E000";
  72
  73 // Invalid lone surrogates, which are excluded explicitly by 6.4.3p2.
  74 M(\uD800) // expected-error {{invalid universal character}}
  75 const char *c10  = "\U0000DFFF"; // expected-error {{invalid universal character}}