Version 24.2.2.2, tag libreoffice-24.2.2.2
[LibreOffice.git] / compilerplugins / clang / stringadd.cxx
blob022bffa804fc6d88640ad92d8d303ed6ae2ec96e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9 #ifndef LO_CLANG_SHARED_PLUGINS
11 #include <cassert>
12 #include <string>
13 #include <iostream>
14 #include <unordered_map>
15 #include <unordered_set>
17 #include "plugin.hxx"
18 #include "check.hxx"
19 #include "config_clang.h"
20 #include "clang/AST/CXXInheritance.h"
21 #include "clang/AST/StmtVisitor.h"
23 /**
24 Look for repeated addition to OUString/OString/OUStringBuffer/OStringBuffer.
26 Eg.
27 OUString x = "xxx";
28 x += b;
30 which can be simplified to
31 x = "xxx" + b
33 which is more efficient, because of the OUStringConcat magic.
36 namespace
38 class StringAdd : public loplugin::FilteringPlugin<StringAdd>
40 public:
41 explicit StringAdd(loplugin::InstantiationData const& data)
42 : FilteringPlugin(data)
46 bool preRun() override
48 std::string fn(handler.getMainFileName());
49 loplugin::normalizeDotDotInFilePath(fn);
50 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustring/"))
51 return false;
52 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/oustringbuffer/"))
53 return false;
54 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/rtl/strings/"))
55 return false;
56 if (loplugin::hasPathnamePrefix(fn, SRCDIR "/sal/qa/OStringBuffer/"))
57 return false;
58 // there is an ifdef here, but my check is not working, not sure why
59 if (fn == SRCDIR "/pyuno/source/module/pyuno_runtime.cxx")
60 return false;
61 // TODO the += depends on the result of the preceding assign, so can't merge
62 if (fn == SRCDIR "/editeng/source/misc/svxacorr.cxx")
63 return false;
64 // TODO this file has a boatload of buffer appends' and I don't feel like fixing them all now
65 if (fn == SRCDIR "/vcl/source/gdi/pdfwriter_impl.cxx")
66 return false;
67 return true;
70 virtual void run() override
72 if (!preRun())
73 return;
74 TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
77 bool VisitCompoundStmt(CompoundStmt const*);
78 bool VisitCXXOperatorCallExpr(CXXOperatorCallExpr const*);
79 bool VisitCXXMemberCallExpr(CXXMemberCallExpr const*);
81 private:
82 enum class Summands
84 OnlyCompileTimeConstants,
85 OnlySideEffectFree,
86 SideEffect
89 struct VarDeclAndSummands
91 const VarDecl* varDecl;
92 Summands summands;
95 VarDeclAndSummands findAssignOrAdd(Stmt const*);
96 bool checkForCompoundAssign(Stmt const* stmt1, Stmt const* stmt2, VarDeclAndSummands& varDecl);
98 Expr const* ignore(Expr const*);
99 bool isSideEffectFree(Expr const*);
100 bool isCompileTimeConstant(Expr const*);
103 bool StringAdd::VisitCompoundStmt(CompoundStmt const* compoundStmt)
105 if (ignoreLocation(compoundStmt))
106 return true;
108 auto it = compoundStmt->body_begin();
109 while (true)
111 if (it == compoundStmt->body_end())
112 break;
113 VarDeclAndSummands foundVar = findAssignOrAdd(*it);
114 // reference types have slightly weird behaviour
115 if (foundVar.varDecl && !foundVar.varDecl->getType()->isReferenceType())
117 auto stmt1 = *it;
118 ++it;
119 while (it != compoundStmt->body_end())
121 if (!checkForCompoundAssign(stmt1, *it, foundVar))
123 break;
125 stmt1 = *it;
126 ++it;
129 else
130 ++it;
133 return true;
136 StringAdd::VarDeclAndSummands StringAdd::findAssignOrAdd(Stmt const* stmt)
138 if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt))
139 stmt = exprCleanup->getSubExpr();
140 if (auto switchCase = dyn_cast<SwitchCase>(stmt))
141 stmt = switchCase->getSubStmt();
143 if (auto declStmt = dyn_cast<DeclStmt>(stmt))
144 if (declStmt->isSingleDecl())
145 if (auto varDeclLHS = dyn_cast_or_null<VarDecl>(declStmt->getSingleDecl()))
147 auto tc = loplugin::TypeCheck(varDeclLHS->getType());
148 if (!tc.Class("OUString").Namespace("rtl").GlobalNamespace()
149 && !tc.Class("OString").Namespace("rtl").GlobalNamespace()
150 && !tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
151 && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
152 return {};
153 if (varDeclLHS->getStorageDuration() == SD_Static)
154 return {};
155 if (!varDeclLHS->hasInit())
156 return {};
157 if (tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
158 || tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
160 // ignore the constructor that gives the buffer a default size
161 if (auto cxxConstructor = dyn_cast<CXXConstructExpr>(varDeclLHS->getInit()))
162 if (auto constructorDecl = cxxConstructor->getConstructor())
163 if ((constructorDecl->getNumParams() == 1
164 && loplugin::TypeCheck(constructorDecl->getParamDecl(0)->getType())
165 .Typedef("sal_Int32")
166 .GlobalNamespace())
167 || (constructorDecl->getNumParams() == 2
168 && constructorDecl->getParamDecl(0)->getType()->isIntegralType(
169 compiler.getASTContext())
170 && constructorDecl->getParamDecl(1)
171 ->getType()
172 ->isSpecificBuiltinType(BuiltinType::Int)))
173 return {};
175 return { varDeclLHS, (isCompileTimeConstant(varDeclLHS->getInit())
176 ? Summands::OnlyCompileTimeConstants
177 : (isSideEffectFree(varDeclLHS->getInit())
178 ? Summands::OnlySideEffectFree
179 : Summands::SideEffect)) };
181 if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(stmt))
182 if (operatorCall->getOperator() == OO_Equal || operatorCall->getOperator() == OO_PlusEqual)
183 if (auto declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(operatorCall->getArg(0))))
184 if (auto varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl()))
186 auto tc = loplugin::TypeCheck(varDeclLHS->getType());
187 if (!tc.Class("OUString").Namespace("rtl").GlobalNamespace()
188 && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
189 return {};
190 auto rhs = operatorCall->getArg(1);
191 return { varDeclLHS,
192 (isCompileTimeConstant(rhs)
193 ? Summands::OnlyCompileTimeConstants
194 : (isSideEffectFree(rhs) ? Summands::OnlySideEffectFree
195 : Summands::SideEffect)) };
197 if (auto memberCall = dyn_cast<CXXMemberCallExpr>(stmt))
198 if (auto cxxMethodDecl = dyn_cast_or_null<CXXMethodDecl>(memberCall->getDirectCallee()))
199 if (cxxMethodDecl->getIdentifier() && cxxMethodDecl->getName() == "append")
200 if (auto declRefExprLHS
201 = dyn_cast<DeclRefExpr>(ignore(memberCall->getImplicitObjectArgument())))
202 if (auto varDeclLHS = dyn_cast<VarDecl>(declRefExprLHS->getDecl()))
204 auto tc = loplugin::TypeCheck(varDeclLHS->getType());
205 if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
206 && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
207 return {};
208 auto rhs = memberCall->getArg(0);
209 return { varDeclLHS,
210 (isCompileTimeConstant(rhs)
211 ? Summands::OnlyCompileTimeConstants
212 : (isSideEffectFree(rhs) ? Summands::OnlySideEffectFree
213 : Summands::SideEffect)) };
215 return {};
218 bool StringAdd::checkForCompoundAssign(Stmt const* stmt1, Stmt const* stmt2,
219 VarDeclAndSummands& varDecl)
221 // OString additions are frequently wrapped in these
222 if (auto exprCleanup = dyn_cast<ExprWithCleanups>(stmt2))
223 stmt2 = exprCleanup->getSubExpr();
224 if (auto switchCase = dyn_cast<SwitchCase>(stmt2))
225 stmt2 = switchCase->getSubStmt();
227 const DeclRefExpr* declRefExprLHS;
228 const Expr* rhs;
229 auto tc = loplugin::TypeCheck(varDecl.varDecl->getType());
230 if (tc.Class("OString") || tc.Class("OUString"))
232 auto operatorCall = dyn_cast<CXXOperatorCallExpr>(stmt2);
233 if (!operatorCall)
234 return false;
235 if (operatorCall->getOperator() != OO_PlusEqual)
236 return false;
237 declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(operatorCall->getArg(0)));
238 rhs = operatorCall->getArg(1);
240 else
242 // OUStringBuffer, OStringBuffer
243 auto memberCall = dyn_cast<CXXMemberCallExpr>(stmt2);
244 if (!memberCall)
245 return false;
246 auto cxxMethodDecl = dyn_cast_or_null<CXXMethodDecl>(memberCall->getDirectCallee());
247 if (!cxxMethodDecl)
248 return false;
249 if (!cxxMethodDecl->getIdentifier() || cxxMethodDecl->getName() != "append")
250 return false;
251 declRefExprLHS = dyn_cast<DeclRefExpr>(ignore(memberCall->getImplicitObjectArgument()));
252 rhs = memberCall->getArg(0);
254 if (!declRefExprLHS)
255 return false;
256 if (declRefExprLHS->getDecl() != varDecl.varDecl)
257 return false;
258 // if either side is a compile-time-constant, then we don't care about
259 // side-effects
260 bool const ctcRhs = isCompileTimeConstant(rhs);
261 if (!ctcRhs)
263 auto const sefRhs = isSideEffectFree(rhs);
264 auto const oldSummands = varDecl.summands;
265 varDecl.summands = sefRhs ? Summands::OnlySideEffectFree : Summands::SideEffect;
266 if (oldSummands != Summands::OnlyCompileTimeConstants
267 && (oldSummands == Summands::SideEffect || !sefRhs))
269 return true;
272 SourceRange mergeRange(stmt1->getSourceRange().getBegin(), stmt2->getSourceRange().getEnd());
273 // if we cross a #ifdef boundary
274 if (containsPreprocessingConditionalInclusion(mergeRange))
276 varDecl.summands
277 = ctcRhs ? Summands::OnlyCompileTimeConstants
278 : isSideEffectFree(rhs) ? Summands::OnlySideEffectFree : Summands::SideEffect;
279 return true;
281 // If there is a comment between two calls, rather don't suggest merge
282 // IMO, code clarity trumps efficiency (as far as plugin warnings go, anyway).
283 if (containsComment(mergeRange))
284 return true;
285 // I don't think the OUStringAppend functionality can handle this efficiently
286 if (isa<ConditionalOperator>(ignore(rhs)))
287 return false;
288 report(DiagnosticsEngine::Warning, "simplify by merging with the preceding assign/append",
289 stmt2->getBeginLoc())
290 << stmt2->getSourceRange();
291 return true;
294 // Check for generating temporaries when adding strings
296 bool StringAdd::VisitCXXOperatorCallExpr(CXXOperatorCallExpr const* operatorCall)
298 if (ignoreLocation(operatorCall))
299 return true;
300 if (operatorCall->getOperator() != OO_Plus)
301 return true;
302 auto tc = loplugin::TypeCheck(operatorCall->getType()->getUnqualifiedDesugaredType());
303 if (!tc.Struct("StringConcat").Namespace("rtl").GlobalNamespace()
304 && !tc.Class("OUString").Namespace("rtl").GlobalNamespace()
305 && !tc.Class("OString").Namespace("rtl").GlobalNamespace())
306 return true;
308 auto check = [operatorCall, this](unsigned arg) {
309 auto const e
310 = dyn_cast<CXXFunctionalCastExpr>(operatorCall->getArg(arg)->IgnoreParenImpCasts());
311 if (e == nullptr)
312 return;
313 auto tc3 = loplugin::TypeCheck(e->getType());
314 if (!tc3.Class("OUString").Namespace("rtl").GlobalNamespace()
315 && !tc3.Class("OString").Namespace("rtl").GlobalNamespace()
316 && !tc3.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
317 && !tc3.Class("OStringLiteral").Namespace("rtl").GlobalNamespace()
318 && !tc3.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
319 && !tc3.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
320 return;
321 report(DiagnosticsEngine::Warning,
322 ("rather use O[U]String::Concat than constructing %0 from %1 on %select{L|R}2HS of "
323 "+ (where %select{R|L}2HS is of"
324 " type %3)"),
325 e->getBeginLoc())
326 << e->getType().getLocalUnqualifiedType() << e->getSubExprAsWritten()->getType() << arg
327 << operatorCall->getArg(1 - arg)->IgnoreImpCasts()->getType() << e->getSourceRange();
330 check(0);
331 check(1);
332 return true;
335 bool StringAdd::VisitCXXMemberCallExpr(CXXMemberCallExpr const* methodCall)
337 if (ignoreLocation(methodCall))
338 return true;
340 auto methodDecl = methodCall->getMethodDecl();
341 if (!methodDecl || !methodDecl->getIdentifier() || methodDecl->getName() != "append"
342 || methodCall->getNumArgs() == 0)
343 return true;
344 auto tc1 = loplugin::TypeCheck(methodCall->getType());
345 if (!tc1.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
346 && !tc1.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
347 return true;
348 auto arg = methodCall->getArg(0);
349 // I don't think the OUStringAppend functionality can handle this efficiently
350 if (isa<ConditionalOperator>(ignore(arg)))
351 return true;
353 auto methodCall2 = dyn_cast<CXXMemberCallExpr>(ignore(methodCall->getImplicitObjectArgument()));
354 if (!methodCall2)
355 return true;
356 auto tc = loplugin::TypeCheck(methodCall2->getType());
357 if (!tc.Class("OUStringBuffer").Namespace("rtl").GlobalNamespace()
358 && !tc.Class("OStringBuffer").Namespace("rtl").GlobalNamespace())
359 return true;
360 auto methodDecl2 = methodCall2->getMethodDecl();
361 if (!methodDecl2->getIdentifier() || methodDecl2->getName() != "append"
362 || methodCall2->getNumArgs() == 0)
363 return true;
364 arg = methodCall2->getArg(0);
365 // I don't think the OUStringAppend functionality can handle this efficiently
366 if (isa<ConditionalOperator>(ignore(arg)))
367 return true;
368 report(DiagnosticsEngine::Warning,
369 "chained append, rather use single append call and + operator",
370 methodCall2->getBeginLoc())
371 << methodCall2->getSourceRange();
373 return true;
376 Expr const* StringAdd::ignore(Expr const* expr)
378 return expr->IgnoreImplicit()->IgnoreParens()->IgnoreImplicit();
381 bool StringAdd::isSideEffectFree(Expr const* expr)
383 expr = ignore(expr);
384 // I don't think the OUStringAppend functionality can handle this efficiently
385 if (isa<ConditionalOperator>(expr))
386 return false;
387 // Multiple statements have a well defined evaluation order (sequence points between them)
388 // but a single expression may be evaluated in arbitrary order;
389 // if there are side effects in one of the sub-expressions that have an effect on another subexpression,
390 // the result may be incorrect, and you don't necessarily notice in tests because the order is compiler-dependent.
391 // for example see commit afd743141f7a7dd05914d0872c9afe079f16fe0c where such a refactoring introduced such a bug.
392 // So only consider simple RHS expressions.
393 if (!expr->HasSideEffects(compiler.getASTContext()))
394 return true;
396 // check for chained adds which are side-effect free
397 if (auto operatorCall = dyn_cast<CXXOperatorCallExpr>(expr))
399 auto op = operatorCall->getOperator();
400 if (op == OO_PlusEqual || op == OO_Plus)
401 if (isSideEffectFree(operatorCall->getArg(0))
402 && isSideEffectFree(operatorCall->getArg(1)))
403 return true;
406 if (auto callExpr = dyn_cast<CallExpr>(expr))
408 // check for calls through OUString::number/OUString::unacquired
409 if (auto calleeMethodDecl = dyn_cast_or_null<CXXMethodDecl>(callExpr->getCalleeDecl()))
411 if (calleeMethodDecl->getIdentifier())
413 auto name = calleeMethodDecl->getName();
414 if (callExpr->getNumArgs() > 0
415 && (name == "number" || name == "unacquired" || name == "boolean"
416 || name == "copy"))
418 auto tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
419 if (tc.Class("OUString") || tc.Class("OString"))
421 if (isSideEffectFree(callExpr->getArg(0)))
422 return true;
426 else if (auto const d = dyn_cast<CXXConversionDecl>(calleeMethodDecl))
428 if (loplugin::TypeCheck(d->getConversionType())
429 .ClassOrStruct("basic_string_view")
430 .StdNamespace())
432 auto const tc = loplugin::TypeCheck(calleeMethodDecl->getParent());
433 if (tc.Class("OUString").Namespace("rtl").GlobalNamespace()
434 || tc.Class("OString").Namespace("rtl").GlobalNamespace())
436 if (isSideEffectFree(callExpr->getCallee()))
437 return true;
441 // Aggressively assume that calls to const member functions are side effect free (if
442 // all of the call's sub-expressions are):
443 if (calleeMethodDecl->isConst())
445 auto sef = true;
446 // Other options besides CXXMemberCallExpr are e.g. CXXOperatorCallExpr which
447 // does not have such a target expression:
448 if (auto const mce = dyn_cast<CXXMemberCallExpr>(callExpr))
450 if (!isSideEffectFree(mce->getImplicitObjectArgument()))
452 sef = false;
455 if (sef)
457 for (unsigned i = 0; i != callExpr->getNumArgs(); ++i)
459 if (!isSideEffectFree(callExpr->getArg(i)))
461 sef = false;
462 break;
466 if (sef)
468 return true;
472 if (auto calleeFunctionDecl = dyn_cast_or_null<FunctionDecl>(callExpr->getCalleeDecl()))
473 if (calleeFunctionDecl && calleeFunctionDecl->getIdentifier())
475 auto name = calleeFunctionDecl->getName();
476 // check for calls through OUStringToOString
477 if (name == "OUStringToOString" || name == "OStringToOUString")
478 if (isSideEffectFree(callExpr->getArg(0)))
479 return true;
480 // allowlist some known-safe methods
481 if (name.endswith("ResId") || name == "GetXMLToken")
482 if (isSideEffectFree(callExpr->getArg(0)))
483 return true;
487 // sometimes we have a constructor call on the RHS
488 if (auto constructExpr = dyn_cast<CXXConstructExpr>(expr))
490 auto dc = loplugin::DeclCheck(constructExpr->getConstructor());
491 if (dc.MemberFunction().Class("OUString") || dc.MemberFunction().Class("OString")
492 || dc.MemberFunction().Class("OUStringBuffer")
493 || dc.MemberFunction().Class("OStringBuffer"))
494 if (constructExpr->getNumArgs() == 0 || isSideEffectFree(constructExpr->getArg(0)))
495 return true;
496 // Expr::HasSideEffects does not like stuff that passes through OUStringLiteral
497 auto dc2 = loplugin::DeclCheck(constructExpr->getConstructor()->getParent());
498 if (dc2.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
499 || dc2.Class("OStringLiteral").Namespace("rtl").GlobalNamespace())
500 return true;
503 // when adding literals, we sometimes get this
504 if (auto functionalCastExpr = dyn_cast<CXXFunctionalCastExpr>(expr))
506 auto tc = loplugin::TypeCheck(functionalCastExpr->getType());
507 if (tc.Class("OUStringLiteral").Namespace("rtl").GlobalNamespace()
508 || tc.Class("OStringLiteral").Namespace("rtl").GlobalNamespace())
509 return isSideEffectFree(functionalCastExpr->getSubExpr());
512 return false;
515 bool StringAdd::isCompileTimeConstant(Expr const* expr)
517 expr = expr->IgnoreImplicit();
518 if (auto cxxConstructExpr = dyn_cast<CXXConstructExpr>(expr))
519 if (cxxConstructExpr->getNumArgs() > 0)
520 expr = cxxConstructExpr->getArg(0);
521 return isa<clang::StringLiteral>(expr);
524 loplugin::Plugin::Registration<StringAdd> stringadd("stringadd");
527 #endif // LO_CLANG_SHARED_PLUGINS
529 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */