Reapply "[lldb][dwarf] Compute fully qualified names on simplified template names...
[llvm-project.git] / clang-tools-extra / clang-tidy / misc / MisleadingBidirectional.cpp
blobda7139255bfaab7ceb9acc28acbdaa40e2487748
1 //===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "MisleadingBidirectional.h"
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "llvm/Support/ConvertUTF.h"
14 #include <optional>
16 using namespace clang;
17 using namespace clang::tidy::misc;
19 static bool containsMisleadingBidi(StringRef Buffer,
20 bool HonorLineBreaks = true) {
21 const char *CurPtr = Buffer.begin();
23 enum BidiChar {
24 PS = 0x2029,
25 RLO = 0x202E,
26 RLE = 0x202B,
27 LRO = 0x202D,
28 LRE = 0x202A,
29 PDF = 0x202C,
30 RLI = 0x2067,
31 LRI = 0x2066,
32 FSI = 0x2068,
33 PDI = 0x2069
36 SmallVector<BidiChar> BidiContexts;
38 // Scan each character while maintaining a stack of opened bidi context.
39 // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
40 // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
42 // Warn if we end up with an unclosed context.
43 while (CurPtr < Buffer.end()) {
44 unsigned char C = *CurPtr;
45 if (isASCII(C)) {
46 ++CurPtr;
47 bool IsParagrapSep =
48 (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
49 bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
50 if (IsParagrapSep || IsSegmentSep)
51 BidiContexts.clear();
52 continue;
54 llvm::UTF32 CodePoint = 0;
55 llvm::ConversionResult Result = llvm::convertUTF8Sequence(
56 (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
57 &CodePoint, llvm::strictConversion);
59 // If conversion fails, utf-8 is designed so that we can just try next char.
60 if (Result != llvm::conversionOK) {
61 ++CurPtr;
62 continue;
65 // Open a PDF context.
66 if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
67 CodePoint == LRE)
68 BidiContexts.push_back(PDF);
69 // Close PDF Context.
70 else if (CodePoint == PDF) {
71 if (!BidiContexts.empty() && BidiContexts.back() == PDF)
72 BidiContexts.pop_back();
74 // Open a PDI Context.
75 else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
76 BidiContexts.push_back(PDI);
77 // Close a PDI Context.
78 else if (CodePoint == PDI) {
79 auto R = llvm::find(llvm::reverse(BidiContexts), PDI);
80 if (R != BidiContexts.rend())
81 BidiContexts.resize(BidiContexts.rend() - R - 1);
83 // Line break or equivalent
84 else if (CodePoint == PS)
85 BidiContexts.clear();
87 return !BidiContexts.empty();
90 class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
91 : public CommentHandler {
92 public:
93 MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check)
94 : Check(Check) {}
96 bool HandleComment(Preprocessor &PP, SourceRange Range) override {
97 // FIXME: check that we are in a /* */ comment
98 StringRef Text =
99 Lexer::getSourceText(CharSourceRange::getCharRange(Range),
100 PP.getSourceManager(), PP.getLangOpts());
102 if (containsMisleadingBidi(Text, true))
103 Check.diag(
104 Range.getBegin(),
105 "comment contains misleading bidirectional Unicode characters");
106 return false;
109 private:
110 MisleadingBidirectionalCheck &Check;
113 MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
114 StringRef Name, ClangTidyContext *Context)
115 : ClangTidyCheck(Name, Context),
116 Handler(std::make_unique<MisleadingBidirectionalHandler>(*this)) {}
118 MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
120 void MisleadingBidirectionalCheck::registerPPCallbacks(
121 const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
122 PP->addCommentHandler(Handler.get());
125 void MisleadingBidirectionalCheck::check(
126 const ast_matchers::MatchFinder::MatchResult &Result) {
127 if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
128 StringRef Literal = SL->getBytes();
129 if (containsMisleadingBidi(Literal, false))
130 diag(SL->getBeginLoc(), "string literal contains misleading "
131 "bidirectional Unicode characters");
135 void MisleadingBidirectionalCheck::registerMatchers(
136 ast_matchers::MatchFinder *Finder) {
137 Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);