1 //===-- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp ----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This tablegen backend emits an efficient function to translate HTML named
10 // character references to UTF-8 sequences.
12 //===----------------------------------------------------------------------===//
14 #include "TableGenBackends.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/Support/ConvertUTF.h"
17 #include "llvm/TableGen/Error.h"
18 #include "llvm/TableGen/Record.h"
19 #include "llvm/TableGen/StringMatcher.h"
20 #include "llvm/TableGen/TableGenBackend.h"
25 /// Convert a code point to the corresponding UTF-8 sequence represented
26 /// as a C string literal.
28 /// \returns true on success.
29 static bool translateCodePointToUTF8(unsigned CodePoint
,
30 SmallVectorImpl
<char> &CLiteral
) {
31 char Translated
[UNI_MAX_UTF8_BYTES_PER_CODE_POINT
];
32 char *TranslatedPtr
= Translated
;
33 if (!ConvertCodePointToUTF8(CodePoint
, TranslatedPtr
))
36 StringRef
UTF8(Translated
, TranslatedPtr
- Translated
);
38 raw_svector_ostream
OS(CLiteral
);
40 for (size_t i
= 0, e
= UTF8
.size(); i
!= e
; ++i
) {
42 OS
.write_hex(static_cast<unsigned char>(UTF8
[i
]));
49 void clang::EmitClangCommentHTMLNamedCharacterReferences(
50 const RecordKeeper
&Records
, raw_ostream
&OS
) {
51 std::vector
<StringMatcher::StringPair
> NameToUTF8
;
52 SmallString
<32> CLiteral
;
53 for (const Record
*Tag
: Records
.getAllDerivedDefinitions("NCR")) {
54 std::string Spelling
= Tag
->getValueAsString("Spelling").str();
55 uint64_t CodePoint
= Tag
->getValueAsInt("CodePoint");
57 CLiteral
.append("return ");
58 if (!translateCodePointToUTF8(CodePoint
, CLiteral
)) {
59 SrcMgr
.PrintMessage(Tag
->getLoc().front(), SourceMgr::DK_Error
,
60 Twine("invalid code point"));
65 StringMatcher::StringPair
Match(Spelling
, std::string(CLiteral
));
66 NameToUTF8
.push_back(Match
);
69 emitSourceFileHeader("HTML named character reference to UTF-8 translation",
72 OS
<< "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
73 " StringRef Name) {\n";
74 StringMatcher("Name", NameToUTF8
, OS
).Emit();
75 OS
<< " return StringRef();\n"