1 //===--- BranchCloneCheck.cpp - clang-tidy --------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "BranchCloneCheck.h"
10 #include "../utils/ASTUtils.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/ASTMatchers/ASTMatchFinder.h"
14 #include "clang/Analysis/CloneDetection.h"
15 #include "clang/Lex/Lexer.h"
16 #include "llvm/Support/Casting.h"
18 using namespace clang
;
19 using namespace clang::ast_matchers
;
22 /// A branch in a switch may consist of several statements; while a branch in
23 /// an if/else if/else chain is one statement (which may be a CompoundStmt).
24 using SwitchBranch
= llvm::SmallVector
<const Stmt
*, 2>;
25 } // anonymous namespace
27 /// Determines if the bodies of two branches in a switch statements are Type I
28 /// clones of each other. This function only examines the body of the branch
29 /// and ignores the `case X:` or `default:` at the start of the branch.
30 static bool areSwitchBranchesIdentical(const SwitchBranch
&LHS
,
31 const SwitchBranch
&RHS
,
32 const ASTContext
&Context
) {
33 if (LHS
.size() != RHS
.size())
36 for (size_t I
= 0, Size
= LHS
.size(); I
< Size
; I
++) {
37 // NOTE: We strip goto labels and annotations in addition to stripping
38 // the `case X:` or `default:` labels, but it is very unlikely that this
39 // would cause false positives in real-world code.
40 if (!tidy::utils::areStatementsIdentical(LHS
[I
]->stripLabelLikeStatements(),
41 RHS
[I
]->stripLabelLikeStatements(),
50 static bool isFallthroughSwitchBranch(const SwitchBranch
&Branch
) {
51 struct SwitchCaseVisitor
: RecursiveASTVisitor
<SwitchCaseVisitor
> {
52 using RecursiveASTVisitor
<SwitchCaseVisitor
>::DataRecursionQueue
;
54 bool TraverseLambdaExpr(LambdaExpr
*, DataRecursionQueue
* = nullptr) {
55 return true; // Ignore lambdas
58 bool TraverseDecl(Decl
*) {
59 return true; // No need to check declarations
62 bool TraverseSwitchStmt(SwitchStmt
*, DataRecursionQueue
* = nullptr) {
63 return true; // Ignore sub-switches
66 bool TraverseSwitchCase(SwitchCase
*, DataRecursionQueue
* = nullptr) {
67 return true; // Ignore cases
70 bool TraverseDefaultStmt(DefaultStmt
*, DataRecursionQueue
* = nullptr) {
71 return true; // Ignore defaults
74 bool TraverseAttributedStmt(AttributedStmt
*S
) {
78 for (const Attr
*A
: S
->getAttrs()) {
79 if (isa
<FallThroughAttr
>(A
))
87 for (const Stmt
*Elem
: Branch
) {
88 if (!Visitor
.TraverseStmt(const_cast<Stmt
*>(Elem
)))
94 namespace clang::tidy::bugprone
{
96 void BranchCloneCheck::registerMatchers(MatchFinder
*Finder
) {
98 ifStmt(unless(allOf(isConstexpr(), isInTemplateInstantiation())),
100 hasParent(stmt(unless(ifStmt(hasElse(equalsBoundNode("if")))))),
101 hasElse(stmt().bind("else"))),
103 Finder
->addMatcher(switchStmt().bind("switch"), this);
104 Finder
->addMatcher(conditionalOperator().bind("condOp"), this);
106 ifStmt((hasThen(hasDescendant(ifStmt())))).bind("ifWithDescendantIf"),
110 /// Determines whether two statement trees are identical regarding
111 /// operators and symbols.
113 /// Exceptions: expressions containing macros or functions with possible side
114 /// effects are never considered identical.
115 /// Limitations: (t + u) and (u + t) are not considered identical.
116 /// t*(u + t) and t*u + t*t are not considered identical.
118 static bool isIdenticalStmt(const ASTContext
&Ctx
, const Stmt
*Stmt1
,
119 const Stmt
*Stmt2
, bool IgnoreSideEffects
) {
121 if (!Stmt1
|| !Stmt2
)
122 return !Stmt1
&& !Stmt2
;
124 // If Stmt1 & Stmt2 are of different class then they are not
125 // identical statements.
126 if (Stmt1
->getStmtClass() != Stmt2
->getStmtClass())
129 const auto *Expr1
= dyn_cast
<Expr
>(Stmt1
);
130 const auto *Expr2
= dyn_cast
<Expr
>(Stmt2
);
132 if (Expr1
&& Expr2
) {
133 // If Stmt1 has side effects then don't warn even if expressions
135 if (!IgnoreSideEffects
&& Expr1
->HasSideEffects(Ctx
) &&
136 Expr2
->HasSideEffects(Ctx
))
138 // If either expression comes from a macro then don't warn even if
139 // the expressions are identical.
140 if ((Expr1
->getExprLoc().isMacroID()) || (Expr2
->getExprLoc().isMacroID()))
143 // If all children of two expressions are identical, return true.
144 Expr::const_child_iterator I1
= Expr1
->child_begin();
145 Expr::const_child_iterator I2
= Expr2
->child_begin();
146 while (I1
!= Expr1
->child_end() && I2
!= Expr2
->child_end()) {
147 if (!isIdenticalStmt(Ctx
, *I1
, *I2
, IgnoreSideEffects
))
152 // If there are different number of children in the statements, return
154 if (I1
!= Expr1
->child_end())
156 if (I2
!= Expr2
->child_end())
160 switch (Stmt1
->getStmtClass()) {
163 case Stmt::CallExprClass
:
164 case Stmt::ArraySubscriptExprClass
:
165 case Stmt::ArraySectionExprClass
:
166 case Stmt::OMPArrayShapingExprClass
:
167 case Stmt::OMPIteratorExprClass
:
168 case Stmt::ImplicitCastExprClass
:
169 case Stmt::ParenExprClass
:
170 case Stmt::BreakStmtClass
:
171 case Stmt::ContinueStmtClass
:
172 case Stmt::NullStmtClass
:
174 case Stmt::CStyleCastExprClass
: {
175 const auto *CastExpr1
= cast
<CStyleCastExpr
>(Stmt1
);
176 const auto *CastExpr2
= cast
<CStyleCastExpr
>(Stmt2
);
178 return CastExpr1
->getTypeAsWritten() == CastExpr2
->getTypeAsWritten();
180 case Stmt::ReturnStmtClass
: {
181 const auto *ReturnStmt1
= cast
<ReturnStmt
>(Stmt1
);
182 const auto *ReturnStmt2
= cast
<ReturnStmt
>(Stmt2
);
184 return isIdenticalStmt(Ctx
, ReturnStmt1
->getRetValue(),
185 ReturnStmt2
->getRetValue(), IgnoreSideEffects
);
187 case Stmt::ForStmtClass
: {
188 const auto *ForStmt1
= cast
<ForStmt
>(Stmt1
);
189 const auto *ForStmt2
= cast
<ForStmt
>(Stmt2
);
191 if (!isIdenticalStmt(Ctx
, ForStmt1
->getInit(), ForStmt2
->getInit(),
194 if (!isIdenticalStmt(Ctx
, ForStmt1
->getCond(), ForStmt2
->getCond(),
197 if (!isIdenticalStmt(Ctx
, ForStmt1
->getInc(), ForStmt2
->getInc(),
200 if (!isIdenticalStmt(Ctx
, ForStmt1
->getBody(), ForStmt2
->getBody(),
205 case Stmt::DoStmtClass
: {
206 const auto *DStmt1
= cast
<DoStmt
>(Stmt1
);
207 const auto *DStmt2
= cast
<DoStmt
>(Stmt2
);
209 if (!isIdenticalStmt(Ctx
, DStmt1
->getCond(), DStmt2
->getCond(),
212 if (!isIdenticalStmt(Ctx
, DStmt1
->getBody(), DStmt2
->getBody(),
217 case Stmt::WhileStmtClass
: {
218 const auto *WStmt1
= cast
<WhileStmt
>(Stmt1
);
219 const auto *WStmt2
= cast
<WhileStmt
>(Stmt2
);
221 if (!isIdenticalStmt(Ctx
, WStmt1
->getCond(), WStmt2
->getCond(),
224 if (!isIdenticalStmt(Ctx
, WStmt1
->getBody(), WStmt2
->getBody(),
229 case Stmt::IfStmtClass
: {
230 const auto *IStmt1
= cast
<IfStmt
>(Stmt1
);
231 const auto *IStmt2
= cast
<IfStmt
>(Stmt2
);
233 if (!isIdenticalStmt(Ctx
, IStmt1
->getCond(), IStmt2
->getCond(),
236 if (!isIdenticalStmt(Ctx
, IStmt1
->getThen(), IStmt2
->getThen(),
239 if (!isIdenticalStmt(Ctx
, IStmt1
->getElse(), IStmt2
->getElse(),
244 case Stmt::CompoundStmtClass
: {
245 const auto *CompStmt1
= cast
<CompoundStmt
>(Stmt1
);
246 const auto *CompStmt2
= cast
<CompoundStmt
>(Stmt2
);
248 if (CompStmt1
->size() != CompStmt2
->size())
251 if (!llvm::all_of(llvm::zip(CompStmt1
->body(), CompStmt2
->body()),
252 [&Ctx
, IgnoreSideEffects
](
253 std::tuple
<const Stmt
*, const Stmt
*> stmtPair
) {
254 const Stmt
*stmt0
= std::get
<0>(stmtPair
);
255 const Stmt
*stmt1
= std::get
<1>(stmtPair
);
256 return isIdenticalStmt(Ctx
, stmt0
, stmt1
,
264 case Stmt::CompoundAssignOperatorClass
:
265 case Stmt::BinaryOperatorClass
: {
266 const auto *BinOp1
= cast
<BinaryOperator
>(Stmt1
);
267 const auto *BinOp2
= cast
<BinaryOperator
>(Stmt2
);
268 return BinOp1
->getOpcode() == BinOp2
->getOpcode();
270 case Stmt::CharacterLiteralClass
: {
271 const auto *CharLit1
= cast
<CharacterLiteral
>(Stmt1
);
272 const auto *CharLit2
= cast
<CharacterLiteral
>(Stmt2
);
273 return CharLit1
->getValue() == CharLit2
->getValue();
275 case Stmt::DeclRefExprClass
: {
276 const auto *DeclRef1
= cast
<DeclRefExpr
>(Stmt1
);
277 const auto *DeclRef2
= cast
<DeclRefExpr
>(Stmt2
);
278 return DeclRef1
->getDecl() == DeclRef2
->getDecl();
280 case Stmt::IntegerLiteralClass
: {
281 const auto *IntLit1
= cast
<IntegerLiteral
>(Stmt1
);
282 const auto *IntLit2
= cast
<IntegerLiteral
>(Stmt2
);
284 llvm::APInt I1
= IntLit1
->getValue();
285 llvm::APInt I2
= IntLit2
->getValue();
286 if (I1
.getBitWidth() != I2
.getBitWidth())
290 case Stmt::FloatingLiteralClass
: {
291 const auto *FloatLit1
= cast
<FloatingLiteral
>(Stmt1
);
292 const auto *FloatLit2
= cast
<FloatingLiteral
>(Stmt2
);
293 return FloatLit1
->getValue().bitwiseIsEqual(FloatLit2
->getValue());
295 case Stmt::StringLiteralClass
: {
296 const auto *StringLit1
= cast
<StringLiteral
>(Stmt1
);
297 const auto *StringLit2
= cast
<StringLiteral
>(Stmt2
);
298 return StringLit1
->getBytes() == StringLit2
->getBytes();
300 case Stmt::MemberExprClass
: {
301 const auto *MemberStmt1
= cast
<MemberExpr
>(Stmt1
);
302 const auto *MemberStmt2
= cast
<MemberExpr
>(Stmt2
);
303 return MemberStmt1
->getMemberDecl() == MemberStmt2
->getMemberDecl();
305 case Stmt::UnaryOperatorClass
: {
306 const auto *UnaryOp1
= cast
<UnaryOperator
>(Stmt1
);
307 const auto *UnaryOp2
= cast
<UnaryOperator
>(Stmt2
);
308 return UnaryOp1
->getOpcode() == UnaryOp2
->getOpcode();
313 void BranchCloneCheck::check(const MatchFinder::MatchResult
&Result
) {
314 const ASTContext
&Context
= *Result
.Context
;
316 if (const auto *IS
= Result
.Nodes
.getNodeAs
<IfStmt
>("if")) {
317 const Stmt
*Then
= IS
->getThen();
318 assert(Then
&& "An IfStmt must have a `then` branch!");
320 const Stmt
*Else
= Result
.Nodes
.getNodeAs
<Stmt
>("else");
321 assert(Else
&& "We only look for `if` statements with an `else` branch!");
323 if (!isa
<IfStmt
>(Else
)) {
324 // Just a simple if with no `else if` branch.
325 if (utils::areStatementsIdentical(Then
->IgnoreContainers(),
326 Else
->IgnoreContainers(), Context
)) {
327 diag(IS
->getBeginLoc(), "if with identical then and else branches");
328 diag(IS
->getElseLoc(), "else branch starts here", DiagnosticIDs::Note
);
333 // This is the complicated case when we start an if/else if/else chain.
334 // To find all the duplicates, we collect all the branches into a vector.
335 llvm::SmallVector
<const Stmt
*, 4> Branches
;
336 const IfStmt
*Cur
= IS
;
338 // Store the `then` branch.
339 Branches
.push_back(Cur
->getThen());
341 Else
= Cur
->getElse();
342 // The chain ends if there is no `else` branch.
346 // Check if there is another `else if`...
347 Cur
= dyn_cast
<IfStmt
>(Else
);
349 // ...this is just a plain `else` branch at the end of the chain.
350 Branches
.push_back(Else
);
355 size_t N
= Branches
.size();
356 llvm::BitVector
KnownAsClone(N
);
358 for (size_t I
= 0; I
+ 1 < N
; I
++) {
359 // We have already seen Branches[i] as a clone of an earlier branch.
365 for (size_t J
= I
+ 1; J
< N
; J
++) {
366 if (KnownAsClone
[J
] || !utils::areStatementsIdentical(
367 Branches
[I
]->IgnoreContainers(),
368 Branches
[J
]->IgnoreContainers(), Context
))
372 KnownAsClone
[J
] = true;
374 if (NumCopies
== 2) {
375 // We report the first occurrence only when we find the second one.
376 diag(Branches
[I
]->getBeginLoc(),
377 "repeated branch body in conditional chain");
379 Lexer::getLocForEndOfToken(Branches
[I
]->getEndLoc(), 0,
380 *Result
.SourceManager
, getLangOpts());
382 diag(End
, "end of the original", DiagnosticIDs::Note
);
386 diag(Branches
[J
]->getBeginLoc(), "clone %0 starts here",
394 if (const auto *CO
= Result
.Nodes
.getNodeAs
<ConditionalOperator
>("condOp")) {
395 // We do not try to detect chains of ?: operators.
396 if (utils::areStatementsIdentical(CO
->getTrueExpr(), CO
->getFalseExpr(),
398 diag(CO
->getQuestionLoc(),
399 "conditional operator with identical true and false expressions");
404 if (const auto *SS
= Result
.Nodes
.getNodeAs
<SwitchStmt
>("switch")) {
405 const auto *Body
= dyn_cast_or_null
<CompoundStmt
>(SS
->getBody());
408 // switch (x) case 0: case 1: foobar();
409 // is legal and calls foobar() if and only if x is either 0 or 1;
410 // but we do not try to distinguish branches in such code.
414 // We will first collect the branches of the switch statements. For the
415 // sake of simplicity we say that branches are delimited by the SwitchCase
416 // (`case:` or `default:`) children of Body; that is, we ignore `case:` or
417 // `default:` labels embedded inside other statements and we do not follow
418 // the effects of `break` and other manipulation of the control-flow.
419 llvm::SmallVector
<SwitchBranch
, 4> Branches
;
420 for (const Stmt
*S
: Body
->body()) {
421 // If this is a `case` or `default`, we start a new, empty branch.
422 if (isa
<SwitchCase
>(S
))
423 Branches
.emplace_back();
425 // There may be code before the first branch (which can be dead code
426 // and can be code reached either through goto or through case labels
427 // that are embedded inside e.g. inner compound statements); we do not
428 // store those statements in branches.
429 if (!Branches
.empty())
430 Branches
.back().push_back(S
);
433 auto *End
= Branches
.end();
434 auto *BeginCurrent
= Branches
.begin();
435 while (BeginCurrent
< End
) {
436 if (isFallthroughSwitchBranch(*BeginCurrent
)) {
441 auto *EndCurrent
= BeginCurrent
+ 1;
442 while (EndCurrent
< End
&&
443 areSwitchBranchesIdentical(*BeginCurrent
, *EndCurrent
, Context
)) {
446 // At this point the iterator range {BeginCurrent, EndCurrent} contains a
447 // complete family of consecutive identical branches.
449 if (EndCurrent
== (BeginCurrent
+ 1)) {
450 // No consecutive identical branches that start on BeginCurrent
451 BeginCurrent
= EndCurrent
;
455 diag(BeginCurrent
->front()->getBeginLoc(),
456 "switch has %0 consecutive identical branches")
457 << static_cast<int>(std::distance(BeginCurrent
, EndCurrent
));
459 SourceLocation EndLoc
= (EndCurrent
- 1)->back()->getEndLoc();
460 // If the case statement is generated from a macro, it's SourceLocation
461 // may be invalid, resulting in an assertion failure down the line.
462 // While not optimal, try the begin location in this case, it's still
463 // better then nothing.
464 if (EndLoc
.isInvalid())
465 EndLoc
= (EndCurrent
- 1)->back()->getBeginLoc();
466 if (EndLoc
.isMacroID())
467 EndLoc
= Context
.getSourceManager().getExpansionLoc(EndLoc
);
468 EndLoc
= Lexer::getLocForEndOfToken(EndLoc
, 0, *Result
.SourceManager
,
470 if (EndLoc
.isValid()) {
471 diag(EndLoc
, "last of these clones ends here", DiagnosticIDs::Note
);
473 BeginCurrent
= EndCurrent
;
478 if (const auto *IS
= Result
.Nodes
.getNodeAs
<IfStmt
>("ifWithDescendantIf")) {
479 const Stmt
*Then
= IS
->getThen();
480 auto CS
= dyn_cast
<CompoundStmt
>(Then
);
481 if (CS
&& (!CS
->body_empty())) {
482 const auto *InnerIf
= dyn_cast
<IfStmt
>(*CS
->body_begin());
483 if (InnerIf
&& isIdenticalStmt(Context
, IS
->getCond(), InnerIf
->getCond(),
484 /*IgnoreSideEffects=*/false)) {
485 diag(IS
->getBeginLoc(), "if with identical inner if statement");
486 diag(InnerIf
->getBeginLoc(), "inner if starts here",
487 DiagnosticIDs::Note
);
493 llvm_unreachable("No if statement and no switch statement.");
496 } // namespace clang::tidy::bugprone