[memprof] Update YAML traits for writer purposes (#118720)
[llvm-project.git] / clang-tools-extra / clang-tidy / misc / ConfusableTable / BuildConfusableTable.cpp
blobe269ab3983f36cfe53e3806d95547d647aec2aa5
1 //===--- BuildConfusableTable.cpp - clang-tidy---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/STLExtras.h"
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/Support/ConvertUTF.h"
12 #include "llvm/Support/MemoryBuffer.h"
13 #include "llvm/Support/raw_ostream.h"
15 using namespace llvm;
17 int main(int argc, char *argv[]) {
18 auto ErrorOrBuffer = MemoryBuffer::getFile(argv[1], true);
19 if (!ErrorOrBuffer)
20 return 1;
21 std::unique_ptr<MemoryBuffer> Buffer = std::move(ErrorOrBuffer.get());
22 StringRef Content = Buffer->getBuffer();
23 Content = Content.drop_until([](char c) { return c == '#'; });
24 SmallVector<StringRef> Lines;
25 SplitString(Content, Lines, "\r\n");
27 std::vector<std::pair<llvm::UTF32, SmallVector<llvm::UTF32>>> Entries;
28 SmallVector<StringRef> Values;
29 for (StringRef Line : Lines) {
30 if (Line.starts_with("#"))
31 continue;
33 Values.clear();
34 Line.split(Values, ';');
35 if (Values.size() < 2) {
36 errs() << "Failed to parse: " << Line << "\n";
37 return 2;
40 llvm::StringRef From = Values[0].trim();
41 llvm::UTF32 CodePoint = 0;
42 From.getAsInteger(16, CodePoint);
44 SmallVector<llvm::UTF32> To;
45 SmallVector<StringRef> ToN;
46 Values[1].split(ToN, ' ', -1, false);
47 for (StringRef To_ : ToN) {
48 llvm::UTF32 ToCodePoint = 0;
49 To_.trim().getAsInteger(16, ToCodePoint);
50 To.push_back(ToCodePoint);
52 // Sentinel
53 To.push_back(0);
55 Entries.emplace_back(CodePoint, To);
57 llvm::sort(Entries);
59 unsigned LargestValue =
60 std::max_element(Entries.begin(), Entries.end(),
61 [](const auto &Entry0, const auto &Entry1) {
62 return Entry0.second.size() < Entry1.second.size();
64 ->second.size();
66 std::error_code ec;
67 llvm::raw_fd_ostream os(argv[2], ec);
69 // FIXME: If memory consumption and/or lookup time becomes a constraint, it
70 // maybe worth using a more elaborate data structure.
71 os << "struct {llvm::UTF32 codepoint; llvm::UTF32 values[" << LargestValue
72 << "];} "
73 "ConfusableEntries[] = {\n";
74 for (const auto &Values : Entries) {
75 os << " { ";
76 os << Values.first;
77 os << ", {";
78 for (auto CP : Values.second)
79 os << CP << ", ";
81 os << "}},\n";
83 os << "};\n";
84 return 0;