1 //===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MisleadingBidirectional.h"
11 #include "clang/Frontend/CompilerInstance.h"
12 #include "clang/Lex/Preprocessor.h"
13 #include "llvm/Support/ConvertUTF.h"
16 using namespace clang
;
17 using namespace clang::tidy::misc
;
19 static bool containsMisleadingBidi(StringRef Buffer
,
20 bool HonorLineBreaks
= true) {
21 const char *CurPtr
= Buffer
.begin();
36 SmallVector
<BidiChar
> BidiContexts
;
38 // Scan each character while maintaining a stack of opened bidi context.
39 // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
40 // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
42 // Warn if we end up with an unclosed context.
43 while (CurPtr
< Buffer
.end()) {
44 unsigned char C
= *CurPtr
;
48 (C
== 0xA || C
== 0xD || (0x1C <= C
&& C
<= 0x1E) || C
== 0x85);
49 bool IsSegmentSep
= (C
== 0x9 || C
== 0xB || C
== 0x1F);
50 if (IsParagrapSep
|| IsSegmentSep
)
54 llvm::UTF32 CodePoint
= 0;
55 llvm::ConversionResult Result
= llvm::convertUTF8Sequence(
56 (const llvm::UTF8
**)&CurPtr
, (const llvm::UTF8
*)Buffer
.end(),
57 &CodePoint
, llvm::strictConversion
);
59 // If conversion fails, utf-8 is designed so that we can just try next char.
60 if (Result
!= llvm::conversionOK
) {
65 // Open a PDF context.
66 if (CodePoint
== RLO
|| CodePoint
== RLE
|| CodePoint
== LRO
||
68 BidiContexts
.push_back(PDF
);
70 else if (CodePoint
== PDF
) {
71 if (!BidiContexts
.empty() && BidiContexts
.back() == PDF
)
72 BidiContexts
.pop_back();
74 // Open a PDI Context.
75 else if (CodePoint
== RLI
|| CodePoint
== LRI
|| CodePoint
== FSI
)
76 BidiContexts
.push_back(PDI
);
77 // Close a PDI Context.
78 else if (CodePoint
== PDI
) {
79 auto R
= llvm::find(llvm::reverse(BidiContexts
), PDI
);
80 if (R
!= BidiContexts
.rend())
81 BidiContexts
.resize(BidiContexts
.rend() - R
- 1);
83 // Line break or equivalent
84 else if (CodePoint
== PS
)
87 return !BidiContexts
.empty();
90 class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
91 : public CommentHandler
{
93 MisleadingBidirectionalHandler(MisleadingBidirectionalCheck
&Check
)
96 bool HandleComment(Preprocessor
&PP
, SourceRange Range
) override
{
97 // FIXME: check that we are in a /* */ comment
99 Lexer::getSourceText(CharSourceRange::getCharRange(Range
),
100 PP
.getSourceManager(), PP
.getLangOpts());
102 if (containsMisleadingBidi(Text
, true))
105 "comment contains misleading bidirectional Unicode characters");
110 MisleadingBidirectionalCheck
&Check
;
113 MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
114 StringRef Name
, ClangTidyContext
*Context
)
115 : ClangTidyCheck(Name
, Context
),
116 Handler(std::make_unique
<MisleadingBidirectionalHandler
>(*this)) {}
118 MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
120 void MisleadingBidirectionalCheck::registerPPCallbacks(
121 const SourceManager
&SM
, Preprocessor
*PP
, Preprocessor
*ModuleExpanderPP
) {
122 PP
->addCommentHandler(Handler
.get());
125 void MisleadingBidirectionalCheck::check(
126 const ast_matchers::MatchFinder::MatchResult
&Result
) {
127 if (const auto *SL
= Result
.Nodes
.getNodeAs
<StringLiteral
>("strlit")) {
128 StringRef Literal
= SL
->getBytes();
129 if (containsMisleadingBidi(Literal
, false))
130 diag(SL
->getBeginLoc(), "string literal contains misleading "
131 "bidirectional Unicode characters");
135 void MisleadingBidirectionalCheck::registerMatchers(
136 ast_matchers::MatchFinder
*Finder
) {
137 Finder
->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);