1 //===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "RawStringLiteralCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
14 using namespace clang::ast_matchers
;
16 namespace clang::tidy::modernize
{
20 bool containsEscapes(StringRef HayStack
, StringRef Escapes
) {
21 size_t BackSlash
= HayStack
.find('\\');
22 if (BackSlash
== StringRef::npos
)
25 while (BackSlash
!= StringRef::npos
) {
26 if (!Escapes
.contains(HayStack
[BackSlash
+ 1]))
28 BackSlash
= HayStack
.find('\\', BackSlash
+ 2);
34 bool isRawStringLiteral(StringRef Text
) {
35 // Already a raw string literal if R comes before ".
36 const size_t QuotePos
= Text
.find('"');
37 assert(QuotePos
!= StringRef::npos
);
38 return (QuotePos
> 0) && (Text
[QuotePos
- 1] == 'R');
41 bool containsEscapedCharacters(const MatchFinder::MatchResult
&Result
,
42 const StringLiteral
*Literal
,
43 const CharsBitSet
&DisallowedChars
) {
44 // FIXME: Handle L"", u8"", u"" and U"" literals.
45 if (!Literal
->isOrdinary())
48 for (const unsigned char C
: Literal
->getBytes())
49 if (DisallowedChars
.test(C
))
52 CharSourceRange CharRange
= Lexer::makeFileCharRange(
53 CharSourceRange::getTokenRange(Literal
->getSourceRange()),
54 *Result
.SourceManager
, Result
.Context
->getLangOpts());
55 StringRef Text
= Lexer::getSourceText(CharRange
, *Result
.SourceManager
,
56 Result
.Context
->getLangOpts());
57 if (Text
.empty() || isRawStringLiteral(Text
))
60 return containsEscapes(Text
, R
"('\"?x01
)");
63 bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
64 return Bytes.find(Delimiter.empty()
65 ? std::string(R"lit()")lit")
66 : (")" + Delimiter
+ R
"(")")) != StringRef::npos;
69 std::string asRawStringLiteral(const StringLiteral *Literal,
70 const std::string &DelimiterStem) {
71 const StringRef Bytes = Literal->getBytes();
72 std::string Delimiter;
73 for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
74 Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
77 if (Delimiter.empty())
78 return (R"(R
"()" + Bytes
+ R
"lit()")lit
").str();
80 return (R"(R
")" + Delimiter
+ "(" + Bytes
+ ")" + Delimiter
+ R
"(")").str();
85 RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
86 ClangTidyContext *Context)
87 : ClangTidyCheck(Name, Context),
88 DelimiterStem(Options.get("DelimiterStem
", "lit
")),
89 ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals
", false)) {
90 // Non-printing characters are disallowed:
92 // \010 = \b backspace
93 // \011 = \t horizontal tab
95 // \013 = \v vertical tab
96 // \014 = \f form feed
97 // \015 = \r carriage return
99 for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
100 "\b\t\n\v\f\r\016\017"
101 "\020\021\022\023\024\025\026\027"
102 "\030\031\032\033\034\035\036\037"
105 DisallowedChars.set(C);
107 // Non-ASCII are disallowed too.
108 for (unsigned int C = 0x80U; C <= 0xFFU; ++C)
109 DisallowedChars.set(static_cast<unsigned char>(C));
112 void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
113 Options.store(Opts, "DelimiterStem
", DelimiterStem);
114 Options.store(Opts, "ReplaceShorterLiterals
", ReplaceShorterLiterals);
117 void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
119 stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit
"), this);
122 void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
123 const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit
");
124 if (Literal->getBeginLoc().isMacroID())
127 if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
128 std::string Replacement = asRawStringLiteral(Literal, DelimiterStem);
129 if (ReplaceShorterLiterals ||
130 Replacement.length() <=
131 Lexer::MeasureTokenLength(Literal->getBeginLoc(),
132 *Result.SourceManager, getLangOpts()))
133 replaceWithRawStringLiteral(Result, Literal, Replacement);
137 void RawStringLiteralCheck::replaceWithRawStringLiteral(
138 const MatchFinder::MatchResult &Result, const StringLiteral *Literal,
139 StringRef Replacement) {
140 CharSourceRange CharRange = Lexer::makeFileCharRange(
141 CharSourceRange::getTokenRange(Literal->getSourceRange()),
142 *Result.SourceManager, getLangOpts());
143 diag(Literal->getBeginLoc(),
144 "escaped string literal can be written as a raw string literal
")
145 << FixItHint::CreateReplacement(CharRange, Replacement);
148 } // namespace clang::tidy::modernize