1 //===-- runtime/utf.cpp ---------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 namespace Fortran::runtime
{
13 #ifndef FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
15 RT_OFFLOAD_VAR_GROUP_BEGIN
16 const RT_CONST_VAR_ATTRS
std::uint8_t UTF8FirstByteTable
[256]{
17 /* 00 - 7F: 7 bit payload in single byte */
18 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
19 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
20 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
21 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
22 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
25 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
26 /* 80 - BF: invalid first byte, valid later byte */
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31 /* C0 - DF: 11 bit payload */
32 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34 /* E0 - EF: 16 bit payload */
35 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
36 /* F0 - F7: 21 bit payload */ 4, 4, 4, 4, 4, 4, 4, 4,
37 /* F8 - FB: 26 bit payload */ 5, 5, 5, 5,
38 /* FC - FD: 31 bit payload */ 6, 6,
39 /* FE: 32 bit payload */ 7,
42 RT_OFFLOAD_VAR_GROUP_END
44 #endif // FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
46 RT_OFFLOAD_API_GROUP_BEGIN
48 std::size_t MeasurePreviousUTF8Bytes(const char *end
, std::size_t limit
) {
49 // Scan back over UTF-8 continuation bytes, if any
50 for (std::size_t n
{1}; n
<= limit
; ++n
) {
51 if ((end
[-n
] & 0xc0) != 0x80) {
58 // Non-minimal encodings are accepted.
59 Fortran::common::optional
<char32_t
> DecodeUTF8(const char *p0
) {
60 const std::uint8_t *p
{reinterpret_cast<const std::uint8_t *>(p0
)};
61 std::size_t bytes
{MeasureUTF8Bytes(*p0
)};
64 } else if (bytes
> 1) {
65 std::uint64_t result
{char32_t
{*p
} & (0x7f >> bytes
)};
66 for (std::size_t j
{1}; j
< bytes
; ++j
) {
67 std::uint8_t next
{p
[j
]};
68 if (next
< 0x80 || next
> 0xbf) {
69 return Fortran::common::nullopt
;
71 result
= (result
<< 6) | (next
& 0x3f);
73 if (result
<= 0xffffffff) {
74 return static_cast<char32_t
>(result
);
77 return Fortran::common::nullopt
;
80 std::size_t EncodeUTF8(char *p0
, char32_t ucs
) {
81 std::uint8_t *p
{reinterpret_cast<std::uint8_t *>(p0
)};
85 } else if (ucs
<= 0x7ff) {
86 p
[0] = 0xc0 | (ucs
>> 6);
87 p
[1] = 0x80 | (ucs
& 0x3f);
89 } else if (ucs
<= 0xffff) {
90 p
[0] = 0xe0 | (ucs
>> 12);
91 p
[1] = 0x80 | ((ucs
>> 6) & 0x3f);
92 p
[2] = 0x80 | (ucs
& 0x3f);
94 } else if (ucs
<= 0x1fffff) {
95 p
[0] = 0xf0 | (ucs
>> 18);
96 p
[1] = 0x80 | ((ucs
>> 12) & 0x3f);
97 p
[2] = 0x80 | ((ucs
>> 6) & 0x3f);
98 p
[3] = 0x80 | (ucs
& 0x3f);
100 } else if (ucs
<= 0x3ffffff) {
101 p
[0] = 0xf8 | (ucs
>> 24);
102 p
[1] = 0x80 | ((ucs
>> 18) & 0x3f);
103 p
[2] = 0x80 | ((ucs
>> 12) & 0x3f);
104 p
[3] = 0x80 | ((ucs
>> 6) & 0x3f);
105 p
[4] = 0x80 | (ucs
& 0x3f);
107 } else if (ucs
<= 0x7ffffff) {
108 p
[0] = 0xf8 | (ucs
>> 30);
109 p
[1] = 0x80 | ((ucs
>> 24) & 0x3f);
110 p
[2] = 0x80 | ((ucs
>> 18) & 0x3f);
111 p
[3] = 0x80 | ((ucs
>> 12) & 0x3f);
112 p
[4] = 0x80 | ((ucs
>> 6) & 0x3f);
113 p
[5] = 0x80 | (ucs
& 0x3f);
117 p
[1] = 0x80 | ((ucs
>> 30) & 0x3f);
118 p
[2] = 0x80 | ((ucs
>> 24) & 0x3f);
119 p
[3] = 0x80 | ((ucs
>> 18) & 0x3f);
120 p
[4] = 0x80 | ((ucs
>> 12) & 0x3f);
121 p
[5] = 0x80 | ((ucs
>> 6) & 0x3f);
122 p
[6] = 0x80 | (ucs
& 0x3f);
126 RT_OFFLOAD_API_GROUP_END
128 } // namespace Fortran::runtime