2 //===----------------------------------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 // WARNING, this entire header is generated by
11 // utils/generate_escaped_output_table.py
14 // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
16 // See Terms of Use <https://www.unicode.org/copyright.html>
17 // for definitions of Unicode Inc.'s Data Files and Software.
19 // NOTICE TO USER: Carefully read the following legal agreement.
20 // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
21 // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
22 // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
23 // TERMS AND CONDITIONS OF THIS AGREEMENT.
24 // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
25 // THE DATA FILES OR SOFTWARE.
27 // COPYRIGHT AND PERMISSION NOTICE
29 // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved.
30 // Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
32 // Permission is hereby granted, free of charge, to any person obtaining
33 // a copy of the Unicode data files and any associated documentation
34 // (the "Data Files") or Unicode software and any associated documentation
35 // (the "Software") to deal in the Data Files or Software
36 // without restriction, including without limitation the rights to use,
37 // copy, modify, merge, publish, distribute, and/or sell copies of
38 // the Data Files or Software, and to permit persons to whom the Data Files
39 // or Software are furnished to do so, provided that either
40 // (a) this copyright and permission notice appear with all copies
41 // of the Data Files or Software, or
42 // (b) this copyright and permission notice appear in associated
45 // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
46 // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
47 // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
48 // NONINFRINGEMENT OF THIRD PARTY RIGHTS.
49 // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
50 // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
51 // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
52 // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
53 // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
54 // PERFORMANCE OF THE DATA FILES OR SOFTWARE.
56 // Except as contained in this notice, the name of a copyright holder
57 // shall not be used in advertising or otherwise to promote the sale,
58 // use or other dealings in these Data Files or Software without prior
59 // written authorization of the copyright holder.
61 #ifndef _LIBCPP___FORMAT_ESCAPED_OUTPUT_TABLE_H
62 #define _LIBCPP___FORMAT_ESCAPED_OUTPUT_TABLE_H
64 #include <__algorithm/ranges_upper_bound.h>
69 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
70 # pragma GCC system_header
73 _LIBCPP_BEGIN_NAMESPACE_STD
75 #if _LIBCPP_STD_VER >= 23
77 namespace __escaped_output_table
{
79 /// The entries of the characters to escape in format's debug string.
81 /// Contains the entries for [format.string.escaped]/2.2.1.2.1
82 /// CE is a Unicode encoding and C corresponds to either a UCS scalar value
83 /// whose Unicode property General_Category has a value in the groups
84 /// Separator (Z) or Other (C) or to a UCS scalar value which has the Unicode
85 /// property Grapheme_Extend=Yes, as described by table 12 of UAX #44
87 /// Separator (Z) consists of General_Category
88 /// - Space_Separator,
90 /// - Paragraph_Separator.
92 /// Other (C) consists of General_Category
99 /// The data is generated from
100 /// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
101 /// - https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt
103 /// The table is similar to the table
104 /// __extended_grapheme_custer_property_boundary::__entries
105 /// which explains the details of these classes. The only difference is this
106 /// table lacks a property, thus having more bits available for the size.
108 /// The data has 2 values:
109 /// - bits [0, 10] The size of the range, allowing 2048 elements.
110 /// - bits [11, 31] The lower bound code point of the range. The upper bound of
111 /// the range is lower bound + size.
112 inline constexpr uint32_t __entries
[893] = {
1007 /// At the end of the valid Unicode code points space a lot of code points are
1008 /// either reserved or a noncharacter. Adding all these entries to the
1009 /// lookup table would add 446 entries to the table (in Unicode 14).
1010 /// Instead the only the start of the region is stored, every code point in
1011 /// this region needs to be escaped.
1012 inline constexpr uint32_t __unallocated_region_lower_bound
= 0x000323b0;
1014 /// Returns whether the code unit needs to be escaped.
1016 /// \pre The code point is a valid Unicode code point.
1017 [[nodiscard
]] _LIBCPP_HIDE_FROM_ABI
constexpr bool __needs_escape(const char32_t __code_point
) noexcept
{
1018 // Since __unallocated_region_lower_bound contains the unshifted range do the
1019 // comparison without shifting.
1020 if (__code_point
>= __unallocated_region_lower_bound
)
1023 ptrdiff_t __i
= std::ranges::upper_bound(__entries
, (__code_point
<< 11) | 0x7ffu
) - __entries
;
1028 uint32_t __upper_bound
= (__entries
[__i
] >> 11) + (__entries
[__i
] & 0x7ffu
);
1029 return __code_point
<= __upper_bound
;
1032 } // namespace __escaped_output_table
1034 #endif //_LIBCPP_STD_VER >= 23
1036 _LIBCPP_END_NAMESPACE_STD
1038 #endif // _LIBCPP___FORMAT_ESCAPED_OUTPUT_TABLE_H