update credits
[LibreOffice.git] / compilerplugins / clang / stringadd.cxx
blob0ac4ee6d3c65cd8ee56337223cb082f3dd7f7810
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9 #ifndef LO_CLANG_SHARED_PLUGINS
11 #include <cassert>
12 #include <string>
13 #include <iostream>
14 #include <unordered_map>
15 #include <unordered_set>
17 #include "plugin.hxx"
18 #include "check.hxx"
19 #include "compat.hxx"
20 #include "config_clang.h"
21 #include "clang/AST/CXXInheritance.h"
22 #include "clang/AST/StmtVisitor.h"
24 /**
25 Look for repeated addition to OUString/OString/OUStringBuffer/OStringBuffer.
27 Eg.
28 OUString x = "xxx";
29 x += b;
31 which can be simplified to
32 x = "xxx" + b
34 which is more efficient, because of the OUStringConcat magic.
37 namespace
39 class StringAdd : public loplugin::FilteringPlugin<StringAdd>
41 public:
42 explicit StringAdd(loplugin::InstantiationData const& data)
43 : FilteringPlugin(data)
47 bool preRun() override
49 std::string fn(handler.getMainFileName());
50 loplugin::normalizeDotDotInFilePath(fn);
51 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustring/"))
52 return false;
53 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustringbuffer/"))
54 return false;
55 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/strings/"))
56 return false;
57 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/OStringBuffer/"))
58 return false;
59 // there is an ifdef here, but my check is not working, not sure why
60 if (fn == SRCDIR "/pyuno/source/module/pyuno_runtime.cxx")
61 return false;
62 // TODO the += depends on the result of the preceding assign, so can't merge
63 if (fn == SRCDIR "/editeng/source/misc/svxacorr.cxx")
64 return false;
65 // TODO this file has a boatload of buffer appends' and I don't feel like fixing them all now
66 if (fn == SRCDIR "/vcl/source/gdi/pdfwriter_impl.cxx")
67 return false;
68 return true;
71 virtual void run() override
73 if (!preRun())
74 return;
75 TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
78 bool VisitCompoundStmt(CompoundStmt const*);
79 bool VisitCXXOperatorCallExpr(CXXOperatorCallExpr const*);
80 bool VisitCXXMemberCallExpr(CXXMemberCallExpr const*);
82 private:
83 enum class Summands
85 OnlyCompileTimeConstants,
86 OnlySideEffectFree,
87 SideEffect
90 struct VarDeclAndSummands
92 const VarDecl* varDecl;
93 Summands summands;
96 VarDeclAndSummands findAssignOrAdd(Stmt const*);
97 bool checkForCompoundAssign(Stmt const* stmt1, Stmt const* stmt2, VarDeclAndSummands& varDecl);
99 Expr const* ignore(Expr const*);
100 bool isSideEffectFree(Expr const*);
101 bool isCompileTimeConstant(Expr const*);
104 bool StringAdd::VisitCompoundStmt(CompoundStmt const* compoundStmt)
106 if (ignoreLocation(compoundStmt))
107 return true;
109 auto it = compoundStmt->body_begin();
110 while (true)
112 if (it == compoundStmt->body_end())
113 break;
114 VarDeclAndSummands foundVar = findAssignOrAdd(*it);
115 // reference types have slightly weird behaviour
116 if (foundVar.varDecl && !foundVar.varDecl->getType()->isReferenceType())
118 auto stmt1 = *it;
119 ++it;
120 while (it != compoundStmt->body_end())
122 if (!checkForCompoundAssign(stmt1, *it, foundVar))
124 break;
126 stmt1 = *it;
127 ++it;
130 else
131 ++it;
134 return true;
137 StringAdd::VarDeclAndSummands StringAdd::findAssignOrAdd(Stmt const* stmt)
139 if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt))
140 stmt = exprCleanup->getSubExpr();
141 if (auto switchCase = dyn_cast<SwitchCase>(stmt))
142 stmt = switchCase->getSubStmt();
144 if (auto declStmt = dyn_cast<DeclStmt>(stmt))
145 if (declStmt->isSingleDecl())
146 if (auto varDeclLHS = dyn_cast_or_null<VarDecl>(declStmt->getSingleDecl()))
148 auto tc = loplugin::TypeCheck(varDeclLHS->getType());
149 if (!tc.Class("OUString").Namespace("rtl").GlobalNamespace()
150 && !tc.Class("OString").Namespace("rtl").GlobalNamespace()
151 && !tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
152 && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
153 return {};
154 if (varDeclLHS->getStorageDuration() == SD_Static)
155 return {};
156 if (!varDeclLHS->hasInit())
157 return {};
158 if (tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
159 || tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
161 // ignore the constructor that gives the buffer a default size
162 if (auto cxxConstructor = dyn_cast<CXXConstructExpr>(varDeclLHS->getInit()))
163 if (auto constructorDecl = cxxConstructor->getConstructor())
164 if ((constructorDecl->getNumParams() == 1
165 && loplugin::TypeCheck(constructorDecl->getParamDecl(0)->getType())
166 .Typedef("sal_Int32")
167 .GlobalNamespace())
168 || (constructorDecl->getNumParams() == 2
169 && constructorDecl->getParamDecl(0)->getType()->isIntegralType(
170 compiler.getASTContext())
171 && constructorDecl->getParamDecl(1)
172 ->getType()
173 ->isSpecificBuiltinType(BuiltinType::Int)))
174 return {};
176 return { varDeclLHS, (isCompileTimeConstant(varDeclLHS->getInit())
177 ? Summands::OnlyCompileTimeConstants
178 : (isSideEffectFree(varDeclLHS->getInit())
179 ? Summands::OnlySideEffectFree
180 : Summands::SideEffect)) };
182 if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(stmt))
183 if (operatorCall->getOperator() == OO_Equal || operatorCall->getOperator() == OO_PlusEqual)
184 if (auto declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(operatorCall->getArg(0))))
185 if (auto varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl()))
187 auto tc = loplugin::TypeCheck(varDeclLHS->getType());
188 if (!tc.Class("OUString").Namespace("rtl").GlobalNamespace()
189 && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
190 return {};
191 auto rhs = operatorCall->getArg(1);
192 return { varDeclLHS,
193 (isCompileTimeConstant(rhs)
194 ? Summands::OnlyCompileTimeConstants
195 : (isSideEffectFree(rhs) ? Summands::OnlySideEffectFree
196 : Summands::SideEffect)) };
198 if (auto memberCall = dyn_cast<CXXMemberCallExpr>(stmt))
199 if (auto cxxMethodDecl = dyn_cast_or_null<CXXMethodDecl>(memberCall->getDirectCallee()))
200 if (cxxMethodDecl->getIdentifier() && cxxMethodDecl->getName() == "append")
201 if (auto declRefExprLHS
202 = dyn_cast<DeclRefExpr>(ignore(memberCall->getImplicitObjectArgument())))
203 if (auto varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl()))
205 auto tc = loplugin::TypeCheck(varDeclLHS->getType());
206 if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
207 && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
208 return {};
209 auto rhs = memberCall->getArg(0);
210 return { varDeclLHS,
211 (isCompileTimeConstant(rhs)
212 ? Summands::OnlyCompileTimeConstants
213 : (isSideEffectFree(rhs) ? Summands::OnlySideEffectFree
214 : Summands::SideEffect)) };
216 return {};
219 bool StringAdd::checkForCompoundAssign(Stmt const* stmt1, Stmt const* stmt2,
220 VarDeclAndSummands& varDecl)
222 // OString additions are frequently wrapped in these
223 if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt2))
224 stmt2 = exprCleanup->getSubExpr();
225 if (auto switchCase = dyn_cast<SwitchCase>(stmt2))
226 stmt2 = switchCase->getSubStmt();
228 const DeclRefExpr* declRefExprLHS;
229 const Expr* rhs;
230 auto tc = loplugin::TypeCheck(varDecl.varDecl->getType());
231 if (tc.Class("OString") || tc.Class("OUString"))
233 auto operatorCall = dyn_cast<CXXOperatorCallExpr>(stmt2);
234 if (!operatorCall)
235 return false;
236 if (operatorCall->getOperator() != OO_PlusEqual)
237 return false;
238 declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(operatorCall->getArg(0)));
239 rhs = operatorCall->getArg(1);
241 else
243 // OUStringBuffer, OStringBuffer
244 auto memberCall = dyn_cast<CXXMemberCallExpr>(stmt2);
245 if (!memberCall)
246 return false;
247 auto cxxMethodDecl = dyn_cast_or_null<CXXMethodDecl>(memberCall->getDirectCallee());
248 if (!cxxMethodDecl)
249 return false;
250 if (!cxxMethodDecl->getIdentifier() || cxxMethodDecl->getName() != "append")
251 return false;
252 declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(memberCall->getImplicitObjectArgument()));
253 rhs = memberCall->getArg(0);
255 if (!declRefExprLHS)
256 return false;
257 if (declRefExprLHS->getDecl() != varDecl.varDecl)
258 return false;
259 // if either side is a compile-time-constant, then we don't care about
260 // side-effects
261 bool const ctcRhs = isCompileTimeConstant(rhs);
262 if (!ctcRhs)
264 auto const sefRhs = isSideEffectFree(rhs);
265 auto const oldSummands = varDecl.summands;
266 varDecl.summands = sefRhs ? Summands::OnlySideEffectFree : Summands::SideEffect;
267 if (oldSummands != Summands::OnlyCompileTimeConstants
268 && (oldSummands == Summands::SideEffect || !sefRhs))
270 return true;
273 SourceRange mergeRange(stmt1->getSourceRange().getBegin(), stmt2->getSourceRange().getEnd());
274 // if we cross a #ifdef boundary
275 if (containsPreprocessingConditionalInclusion(mergeRange))
277 varDecl.summands
278 = ctcRhs ? Summands::OnlyCompileTimeConstants
279 : isSideEffectFree(rhs) ? Summands::OnlySideEffectFree : Summands::SideEffect;
280 return true;
282 // If there is a comment between two calls, rather don't suggest merge
283 // IMO, code clarity trumps efficiency (as far as plugin warnings go, anyway).
284 if (containsComment(mergeRange))
285 return true;
286 // I don't think the OUStringAppend functionality can handle this efficiently
287 if (isa<ConditionalOperator>(ignore(rhs)))
288 return false;
289 report(DiagnosticsEngine::Warning, "simplify by merging with the preceding assign/append",
290 stmt2->getBeginLoc())
291 << stmt2->getSourceRange();
292 return true;
295 // Check for generating temporaries when adding strings
297 bool StringAdd::VisitCXXOperatorCallExpr(CXXOperatorCallExpr const* operatorCall)
299 if (ignoreLocation(operatorCall))
300 return true;
301 if (operatorCall->getOperator() != OO_Plus)
302 return true;
303 auto tc = loplugin::TypeCheck(operatorCall->getType()->getUnqualifiedDesugaredType());
304 if (!tc.Struct("StringConcat").Namespace("rtl").GlobalNamespace()
305 && !tc.Class("OUString").Namespace("rtl").GlobalNamespace()
306 && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
307 return true;
309 auto check = [operatorCall, this](unsigned arg) {
310 auto const e
311 = dyn_cast<CXXFunctionalCastExpr>(operatorCall->getArg(arg)->IgnoreParenImpCasts());
312 if (e == nullptr)
313 return;
314 auto tc3 = loplugin::TypeCheck(e->getType());
315 if (!tc3.Class("OUString").Namespace("rtl").GlobalNamespace()
316 && !tc3.Class("OString").Namespace("rtl").GlobalNamespace()
317 && !tc3.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
318 && !tc3.Class("OStringLiteral").Namespace("rtl").GlobalNamespace()
319 && !tc3.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
320 && !tc3.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
321 return;
322 report(DiagnosticsEngine::Warning,
323 ("rather use O[U]String::Concat than constructing %0 from %1 on %select{L|R}2HS of "
324 "+ (where %select{R|L}2HS is of"
325 " type %3)"),
326 e->getBeginLoc())
327 << e->getType().getLocalUnqualifiedType() << e->getSubExprAsWritten()->getType() << arg
328 << operatorCall->getArg(1 - arg)->IgnoreImpCasts()->getType() << e->getSourceRange();
331 check(0);
332 check(1);
333 return true;
336 bool StringAdd::VisitCXXMemberCallExpr(CXXMemberCallExpr const* methodCall)
338 if (ignoreLocation(methodCall))
339 return true;
341 auto methodDecl = methodCall->getMethodDecl();
342 if (!methodDecl || !methodDecl->getIdentifier() || methodDecl->getName() != "append"
343 || methodCall->getNumArgs() == 0)
344 return true;
345 auto tc1 = loplugin::TypeCheck(methodCall->getType());
346 if (!tc1.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
347 && !tc1.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
348 return true;
349 auto arg = methodCall->getArg(0);
350 // I don't think the OUStringAppend functionality can handle this efficiently
351 if (isa<ConditionalOperator>(ignore(arg)))
352 return true;
354 auto methodCall2 = dyn_cast<CXXMemberCallExpr>(ignore(methodCall->getImplicitObjectArgument()));
355 if (!methodCall2)
356 return true;
357 auto tc = loplugin::TypeCheck(methodCall2->getType());
358 if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
359 && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
360 return true;
361 auto methodDecl2 = methodCall2->getMethodDecl();
362 if (!methodDecl2->getIdentifier() || methodDecl2->getName() != "append"
363 || methodCall2->getNumArgs() == 0)
364 return true;
365 arg = methodCall2->getArg(0);
366 // I don't think the OUStringAppend functionality can handle this efficiently
367 if (isa<ConditionalOperator>(ignore(arg)))
368 return true;
369 report(DiagnosticsEngine::Warning,
370 "chained append, rather use single append call and + operator",
371 methodCall2->getBeginLoc())
372 << methodCall2->getSourceRange();
374 return true;
377 Expr const* StringAdd::ignore(Expr const* expr)
379 return expr->IgnoreImplicit()->IgnoreParens()->IgnoreImplicit();
382 bool StringAdd::isSideEffectFree(Expr const* expr)
384 expr = ignore(expr);
385 // I don't think the OUStringAppend functionality can handle this efficiently
386 if (isa<ConditionalOperator>(expr))
387 return false;
388 // Multiple statements have a well defined evaluation order (sequence points between them)
389 // but a single expression may be evaluated in arbitrary order;
390 // if there are side effects in one of the sub-expressions that have an effect on another subexpression,
391 // the result may be incorrect, and you don't necessarily notice in tests because the order is compiler-dependent.
392 // for example see commit afd743141f7a7dd05914d0872c9afe079f16fe0c where such a refactoring introduced such a bug.
393 // So only consider simple RHS expressions.
394 if (!expr->HasSideEffects(compiler.getASTContext()))
395 return true;
397 // check for chained adds which are side-effect free
398 if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(expr))
400 auto op = operatorCall->getOperator();
401 if (op == OO_PlusEqual || op == OO_Plus)
402 if (isSideEffectFree(operatorCall->getArg(0))
403 && isSideEffectFree(operatorCall->getArg(1)))
404 return true;
407 if (auto callExpr = dyn_cast<CallExpr>(expr))
409 // check for calls through OUString::number/OUString::unacquired
410 if (auto calleeMethodDecl = dyn_cast_or_null<CXXMethodDecl>(callExpr->getCalleeDecl()))
412 if (calleeMethodDecl->getIdentifier())
414 auto name = calleeMethodDecl->getName();
415 if (callExpr->getNumArgs() > 0
416 && (name == "number" || name == "unacquired" || name == "boolean"
417 || name == "copy"))
419 auto tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
420 if (tc.Class("OUString") || tc.Class("OString"))
422 if (isSideEffectFree(callExpr->getArg(0)))
423 return true;
427 else if (auto const d = dyn_cast<CXXConversionDecl>(calleeMethodDecl))
429 if (loplugin::TypeCheck(d->getConversionType())
430 .ClassOrStruct("basic_string_view")
431 .StdNamespace())
433 auto const tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
434 if (tc.Class("OUString").Namespace("rtl").GlobalNamespace()
435 || tc.Class("OString").Namespace("rtl").GlobalNamespace())
437 if (isSideEffectFree(callExpr->getCallee()))
438 return true;
442 // Aggressively assume that calls to const member functions are side effect free (if
443 // all of the call's sub-expressions are):
444 if (calleeMethodDecl->isConst())
446 auto sef = true;
447 // Other options besides CXXMemberCallExpr are e.g. CXXOperatorCallExpr which
448 // does not have such a target expression:
449 if (auto const mce = dyn_cast<CXXMemberCallExpr>(callExpr))
451 if (!isSideEffectFree(mce->getImplicitObjectArgument()))
453 sef = false;
456 if (sef)
458 for (unsigned i = 0; i != callExpr->getNumArgs(); ++i)
460 if (!isSideEffectFree(callExpr->getArg(i)))
462 sef = false;
463 break;
467 if (sef)
469 return true;
473 if (auto calleeFunctionDecl = dyn_cast_or_null<FunctionDecl>(callExpr->getCalleeDecl()))
474 if (calleeFunctionDecl && calleeFunctionDecl->getIdentifier())
476 auto name = calleeFunctionDecl->getName();
477 // check for calls through OUStringToOString
478 if (name == "OUStringToOString" || name == "OStringToOUString")
479 if (isSideEffectFree(callExpr->getArg(0)))
480 return true;
481 // allowlist some known-safe methods
482 if (compat::ends_with(name, "ResId") || name == "GetXMLToken")
483 if (isSideEffectFree(callExpr->getArg(0)))
484 return true;
488 // sometimes we have a constructor call on the RHS
489 if (auto constructExpr = dyn_cast<CXXConstructExpr>(expr))
491 auto dc = loplugin::DeclCheck(constructExpr->getConstructor());
492 if (dc.MemberFunction().Class("OUString") || dc.MemberFunction().Class("OString")
493 || dc.MemberFunction().Class("OUStringBuffer")
494 || dc.MemberFunction().Class("OStringBuffer"))
495 if (constructExpr->getNumArgs() == 0 || isSideEffectFree(constructExpr->getArg(0)))
496 return true;
497 // Expr::HasSideEffects does not like stuff that passes through OUStringLiteral
498 auto dc2 = loplugin::DeclCheck(constructExpr->getConstructor()->getParent());
499 if (dc2.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
500 || dc2.Class("OStringLiteral").Namespace("rtl").GlobalNamespace())
501 return true;
504 // when adding literals, we sometimes get this
505 if (auto functionalCastExpr = dyn_cast<CXXFunctionalCastExpr>(expr))
507 auto tc = loplugin::TypeCheck(functionalCastExpr->getType());
508 if (tc.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
509 || tc.Class("OStringLiteral").Namespace("rtl").GlobalNamespace())
510 return isSideEffectFree(functionalCastExpr->getSubExpr());
513 return false;
516 bool StringAdd::isCompileTimeConstant(Expr const* expr)
518 expr = expr->IgnoreImplicit();
519 if (auto cxxConstructExpr = dyn_cast<CXXConstructExpr>(expr))
520 if (cxxConstructExpr->getNumArgs() > 0)
521 expr = cxxConstructExpr->getArg(0);
522 return isa<clang::StringLiteral>(expr);
525 loplugin::Plugin::Registration<StringAdd> stringadd("stringadd");
528 #endif // LO_CLANG_SHARED_PLUGINS
530 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */