1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BranchCloneCheck.h"
10 #include "../utils/ASTUtils.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/ASTMatchers/ASTMatchFinder.h"
14 #include "clang/Analysis/CloneDetection.h"
15 #include "clang/Lex/Lexer.h"
16 #include "llvm/Support/Casting.h"
18 using namespace clang
;
19 using namespace clang::ast_matchers
;
22 /// A branch in a switch may consist of several statements; while a branch in
23 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
24 using SwitchBranch
= llvm::SmallVector
<const Stmt
*, 2>;
25 } // anonymous namespace
27 /// Determines if the bodies of two branches in a switch statements are Type I
28 /// clones of each other. This function only examines the body of the branch
29 /// and ignores the `case X:` or `default:` at the start of the branch.
30 static bool areSwitchBranchesIdentical(const SwitchBranch
&LHS
,
31 const SwitchBranch
&RHS
,
32 const ASTContext
&Context
) {
33 if (LHS
.size() != RHS
.size())
36 for (size_t I
= 0, Size
= LHS
.size(); I
< Size
; I
++) {
37 // NOTE: We strip goto labels and annotations in addition to stripping
38 // the `case X:` or `default:` labels, but it is very unlikely that this
39 // would cause false positives in real-world code.
40 if (!tidy::utils::areStatementsIdentical(LHS
[I
]->stripLabelLikeStatements(),
41 RHS
[I
]->stripLabelLikeStatements(),
50 static bool isFallthroughSwitchBranch(const SwitchBranch
&Branch
) {
51 struct SwitchCaseVisitor
: RecursiveASTVisitor
<SwitchCaseVisitor
> {
52 using RecursiveASTVisitor
<SwitchCaseVisitor
>::DataRecursionQueue
;
54 bool TraverseLambdaExpr(LambdaExpr
*, DataRecursionQueue
* = nullptr) {
55 return true; // Ignore lambdas
58 bool TraverseDecl(Decl
*) {
59 return true; // No need to check declarations
62 bool TraverseSwitchStmt(SwitchStmt
*, DataRecursionQueue
* = nullptr) {
63 return true; // Ignore sub-switches
66 bool TraverseSwitchCase(SwitchCase
*, DataRecursionQueue
* = nullptr) {
67 return true; // Ignore cases
70 bool TraverseDefaultStmt(DefaultStmt
*, DataRecursionQueue
* = nullptr) {
71 return true; // Ignore defaults
74 bool TraverseAttributedStmt(AttributedStmt
*S
) {
78 for (const Attr
*A
: S
->getAttrs()) {
79 if (isa
<FallThroughAttr
>(A
))
87 for (const Stmt
*Elem
: Branch
) {
88 if (!Visitor
.TraverseStmt(const_cast<Stmt
*>(Elem
)))
94 namespace clang::tidy::bugprone
{
96 void BranchCloneCheck::registerMatchers(MatchFinder
*Finder
) {
98 ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
100 hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
101 hasElse(stmt().bind("else"))),
103 Finder
->addMatcher(switchStmt().bind("switch"), this);
104 Finder
->addMatcher(conditionalOperator().bind("condOp"), this);
107 void BranchCloneCheck::check(const MatchFinder::MatchResult
&Result
) {
108 const ASTContext
&Context
= *Result
.Context
;
110 if (const auto *IS
= Result
.Nodes
.getNodeAs
<IfStmt
>("if")) {
111 const Stmt
*Then
= IS
->getThen();
112 assert(Then
&& "An IfStmt must have a `then` branch!");
114 const Stmt
*Else
= Result
.Nodes
.getNodeAs
<Stmt
>("else");
115 assert(Else
&& "We only look for `if` statements with an `else` branch!");
117 if (!isa
<IfStmt
>(Else
)) {
118 // Just a simple if with no `else if` branch.
119 if (utils::areStatementsIdentical(Then
->IgnoreContainers(),
120 Else
->IgnoreContainers(), Context
)) {
121 diag(IS
->getBeginLoc(), "if with identical then and else branches");
122 diag(IS
->getElseLoc(), "else branch starts here", DiagnosticIDs::Note
);
127 // This is the complicated case when we start an if/else if/else chain.
128 // To find all the duplicates, we collect all the branches into a vector.
129 llvm::SmallVector
<const Stmt
*, 4> Branches
;
130 const IfStmt
*Cur
= IS
;
132 // Store the `then` branch.
133 Branches
.push_back(Cur
->getThen());
135 Else
= Cur
->getElse();
136 // The chain ends if there is no `else` branch.
140 // Check if there is another `else if`...
141 Cur
= dyn_cast
<IfStmt
>(Else
);
143 // ...this is just a plain `else` branch at the end of the chain.
144 Branches
.push_back(Else
);
149 size_t N
= Branches
.size();
150 llvm::BitVector
KnownAsClone(N
);
152 for (size_t I
= 0; I
+ 1 < N
; I
++) {
153 // We have already seen Branches[i] as a clone of an earlier branch.
159 for (size_t J
= I
+ 1; J
< N
; J
++) {
160 if (KnownAsClone
[J
] || !utils::areStatementsIdentical(
161 Branches
[I
]->IgnoreContainers(),
162 Branches
[J
]->IgnoreContainers(), Context
))
166 KnownAsClone
[J
] = true;
168 if (NumCopies
== 2) {
169 // We report the first occurrence only when we find the second one.
170 diag(Branches
[I
]->getBeginLoc(),
171 "repeated branch body in conditional chain");
173 Lexer::getLocForEndOfToken(Branches
[I
]->getEndLoc(), 0,
174 *Result
.SourceManager
, getLangOpts());
176 diag(End
, "end of the original", DiagnosticIDs::Note
);
180 diag(Branches
[J
]->getBeginLoc(), "clone %0 starts here",
188 if (const auto *CO
= Result
.Nodes
.getNodeAs
<ConditionalOperator
>("condOp")) {
189 // We do not try to detect chains of ?: operators.
190 if (utils::areStatementsIdentical(CO
->getTrueExpr(), CO
->getFalseExpr(),
192 diag(CO
->getQuestionLoc(),
193 "conditional operator with identical true and false expressions");
198 if (const auto *SS
= Result
.Nodes
.getNodeAs
<SwitchStmt
>("switch")) {
199 const auto *Body
= dyn_cast_or_null
<CompoundStmt
>(SS
->getBody());
202 // switch (x) case 0: case 1: foobar();
203 // is legal and calls foobar() if and only if x is either 0 or 1;
204 // but we do not try to distinguish branches in such code.
208 // We will first collect the branches of the switch statements. For the
209 // sake of simplicity we say that branches are delimited by the SwitchCase
210 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
211 // `default:` labels embedded inside other statements and we do not follow
212 // the effects of `break` and other manipulation of the control-flow.
213 llvm::SmallVector
<SwitchBranch
, 4> Branches
;
214 for (const Stmt
*S
: Body
->body()) {
215 // If this is a `case` or `default`, we start a new, empty branch.
216 if (isa
<SwitchCase
>(S
))
217 Branches
.emplace_back();
219 // There may be code before the first branch (which can be dead code
220 // and can be code reached either through goto or through case labels
221 // that are embedded inside e.g. inner compound statements); we do not
222 // store those statements in branches.
223 if (!Branches
.empty())
224 Branches
.back().push_back(S
);
227 auto *End
= Branches
.end();
228 auto *BeginCurrent
= Branches
.begin();
229 while (BeginCurrent
< End
) {
230 if (isFallthroughSwitchBranch(*BeginCurrent
)) {
235 auto *EndCurrent
= BeginCurrent
+ 1;
236 while (EndCurrent
< End
&&
237 areSwitchBranchesIdentical(*BeginCurrent
, *EndCurrent
, Context
)) {
240 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
241 // complete family of consecutive identical branches.
243 if (EndCurrent
== (BeginCurrent
+ 1)) {
244 // No consecutive identical branches that start on BeginCurrent
245 BeginCurrent
= EndCurrent
;
249 diag(BeginCurrent
->front()->getBeginLoc(),
250 "switch has %0 consecutive identical branches")
251 << static_cast<int>(std::distance(BeginCurrent
, EndCurrent
));
253 SourceLocation EndLoc
= (EndCurrent
- 1)->back()->getEndLoc();
254 // If the case statement is generated from a macro, it's SourceLocation
255 // may be invalid, resulting in an assertion failure down the line.
256 // While not optimal, try the begin location in this case, it's still
257 // better then nothing.
258 if (EndLoc
.isInvalid())
259 EndLoc
= (EndCurrent
- 1)->back()->getBeginLoc();
260 if (EndLoc
.isMacroID())
261 EndLoc
= Context
.getSourceManager().getExpansionLoc(EndLoc
);
262 EndLoc
= Lexer::getLocForEndOfToken(EndLoc
, 0, *Result
.SourceManager
,
264 if (EndLoc
.isValid()) {
265 diag(EndLoc
, "last of these clones ends here", DiagnosticIDs::Note
);
267 BeginCurrent
= EndCurrent
;
272 llvm_unreachable("No if statement and no switch statement.");
275 } // namespace clang::tidy::bugprone