1 //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This tablegen backend emits an efficient function to translate HTML named
10 // character references to UTF-8 sequences.
12 //===----------------------------------------------------------------------===//
14 #include "TableGenBackends.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/Support/ConvertUTF.h"
17 #include "llvm/TableGen/Error.h"
18 #include "llvm/TableGen/Record.h"
19 #include "llvm/TableGen/StringMatcher.h"
20 #include "llvm/TableGen/TableGenBackend.h"
25 /// Convert a code point to the corresponding UTF-8 sequence represented
26 /// as a C string literal.
28 /// \returns true on success.
29 static bool translateCodePointToUTF8(unsigned CodePoint
,
30 SmallVectorImpl
<char> &CLiteral
) {
31 char Translated
[UNI_MAX_UTF8_BYTES_PER_CODE_POINT
];
32 char *TranslatedPtr
= Translated
;
33 if (!ConvertCodePointToUTF8(CodePoint
, TranslatedPtr
))
36 StringRef
UTF8(Translated
, TranslatedPtr
- Translated
);
38 raw_svector_ostream
OS(CLiteral
);
40 for (size_t i
= 0, e
= UTF8
.size(); i
!= e
; ++i
) {
42 OS
.write_hex(static_cast<unsigned char>(UTF8
[i
]));
49 void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper
&Records
,
51 std::vector
<Record
*> Tags
= Records
.getAllDerivedDefinitions("NCR");
52 std::vector
<StringMatcher::StringPair
> NameToUTF8
;
53 SmallString
<32> CLiteral
;
54 for (std::vector
<Record
*>::iterator I
= Tags
.begin(), E
= Tags
.end();
57 std::string Spelling
= std::string(Tag
.getValueAsString("Spelling"));
58 uint64_t CodePoint
= Tag
.getValueAsInt("CodePoint");
60 CLiteral
.append("return ");
61 if (!translateCodePointToUTF8(CodePoint
, CLiteral
)) {
62 SrcMgr
.PrintMessage(Tag
.getLoc().front(),
64 Twine("invalid code point"));
69 StringMatcher::StringPair
Match(Spelling
, std::string(CLiteral
.str()));
70 NameToUTF8
.push_back(Match
);
73 emitSourceFileHeader("HTML named character reference to UTF-8 translation",
76 OS
<< "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77 " StringRef Name) {\n";
78 StringMatcher("Name", NameToUTF8
, OS
).Emit();
79 OS
<< " return StringRef();\n"