clang/lib/Format/NamespaceEndCommentsFixer.cpp

   1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
  11 /// fixes namespace end comments.
  12 ///
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "NamespaceEndCommentsFixer.h"
  16 #include "clang/Basic/TokenKinds.h"
  17 #include "llvm/Support/Debug.h"
  18 #include "llvm/Support/Regex.h"
  19
  20 #define DEBUG_TYPE "namespace-end-comments-fixer"
  21
  22 namespace clang {
  23 namespace format {
  24
  25 namespace {
  26 // Iterates all tokens starting from StartTok to EndTok and apply Fn to all
  27 // tokens between them including StartTok and EndTok. Returns the token after
  28 // EndTok.
  29 const FormatToken *
  30 processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
  31               tok::TokenKind EndTok,
  32               llvm::function_ref<void(const FormatToken *)> Fn) {
  33   if (!Tok || Tok->isNot(StartTok))
  34     return Tok;
  35   int NestLevel = 0;
  36   do {
  37     if (Tok->is(StartTok))
  38       ++NestLevel;
  39     else if (Tok->is(EndTok))
  40       --NestLevel;
  41     if (Fn)
  42       Fn(Tok);
  43     Tok = Tok->getNextNonComment();
  44   } while (Tok && NestLevel > 0);
  45   return Tok;
  46 }
  47
  48 const FormatToken *skipAttribute(const FormatToken *Tok) {
  49   if (!Tok)
  50     return nullptr;
  51   if (Tok->is(tok::kw___attribute)) {
  52     Tok = Tok->getNextNonComment();
  53     Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
  54   } else if (Tok->is(tok::l_square)) {
  55     Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
  56   }
  57   return Tok;
  58 }
  59
  60 // Computes the name of a namespace given the namespace token.
  61 // Returns "" for anonymous namespace.
  62 std::string computeName(const FormatToken *NamespaceTok) {
  63   assert(NamespaceTok &&
  64          NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
  65          "expecting a namespace token");
  66   std::string name;
  67   const FormatToken *Tok = NamespaceTok->getNextNonComment();
  68   if (NamespaceTok->is(TT_NamespaceMacro)) {
  69     // Collects all the non-comment tokens between opening parenthesis
  70     // and closing parenthesis or comma.
  71     assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
  72     Tok = Tok->getNextNonComment();
  73     while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
  74       name += Tok->TokenText;
  75       Tok = Tok->getNextNonComment();
  76     }
  77     return name;
  78   }
  79   Tok = skipAttribute(Tok);
  80
  81   std::string FirstNSName;
  82   // For `namespace [[foo]] A::B::inline C {` or
  83   // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
  84   // Peek for the first '::' (or '{' or '(')) and then return all tokens from
  85   // one token before that up until the '{'. A '(' might be a macro with
  86   // arguments.
  87   const FormatToken *FirstNSTok = nullptr;
  88   while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
  89     if (FirstNSTok)
  90       FirstNSName += FirstNSTok->TokenText;
  91     FirstNSTok = Tok;
  92     Tok = Tok->getNextNonComment();
  93   }
  94
  95   if (FirstNSTok)
  96     Tok = FirstNSTok;
  97   Tok = skipAttribute(Tok);
  98
  99   FirstNSTok = nullptr;
 100   // Add everything from '(' to ')'.
 101   auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
 102   bool IsPrevColoncolon = false;
 103   bool HasColoncolon = false;
 104   bool IsPrevInline = false;
 105   bool NameFinished = false;
 106   // If we found '::' in name, then it's the name. Otherwise, we can't tell
 107   // which one is name. For example, `namespace A B {`.
 108   while (Tok && Tok->isNot(tok::l_brace)) {
 109     if (FirstNSTok) {
 110       if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
 111         if (FirstNSTok->is(tok::l_paren)) {
 112           FirstNSTok = Tok =
 113               processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
 114           continue;
 115         }
 116         if (FirstNSTok->isNot(tok::coloncolon)) {
 117           NameFinished = true;
 118           break;
 119         }
 120       }
 121       name += FirstNSTok->TokenText;
 122       IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
 123       HasColoncolon = HasColoncolon || IsPrevColoncolon;
 124       if (FirstNSTok->is(tok::kw_inline)) {
 125         name += " ";
 126         IsPrevInline = true;
 127       }
 128     }
 129     FirstNSTok = Tok;
 130     Tok = Tok->getNextNonComment();
 131     const FormatToken *TokAfterAttr = skipAttribute(Tok);
 132     if (TokAfterAttr != Tok)
 133       FirstNSTok = Tok = TokAfterAttr;
 134   }
 135   if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
 136     name += FirstNSTok->TokenText;
 137   if (FirstNSName.empty() || HasColoncolon)
 138     return name;
 139   return name.empty() ? FirstNSName : FirstNSName + " " + name;
 140 }
 141
 142 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
 143                                   const FormatToken *NamespaceTok,
 144                                   unsigned SpacesToAdd) {
 145   std::string text = "//";
 146   text.append(SpacesToAdd, ' ');
 147   text += NamespaceTok->TokenText;
 148   if (NamespaceTok->is(TT_NamespaceMacro))
 149     text += "(";
 150   else if (!NamespaceName.empty())
 151     text += ' ';
 152   text += NamespaceName;
 153   if (NamespaceTok->is(TT_NamespaceMacro))
 154     text += ")";
 155   if (AddNewline)
 156     text += '\n';
 157   return text;
 158 }
 159
 160 bool hasEndComment(const FormatToken *RBraceTok) {
 161   return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
 162 }
 163
 164 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
 165                      const FormatToken *NamespaceTok) {
 166   assert(hasEndComment(RBraceTok));
 167   const FormatToken *Comment = RBraceTok->Next;
 168
 169   // Matches a valid namespace end comment.
 170   // Valid namespace end comments don't need to be edited.
 171   static const llvm::Regex NamespaceCommentPattern =
 172       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
 173                   "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
 174                   llvm::Regex::IgnoreCase);
 175   static const llvm::Regex NamespaceMacroCommentPattern =
 176       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
 177                   "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
 178                   llvm::Regex::IgnoreCase);
 179
 180   SmallVector<StringRef, 8> Groups;
 181   if (NamespaceTok->is(TT_NamespaceMacro) &&
 182       NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
 183     StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
 184     // The name of the macro must be used.
 185     if (NamespaceTokenText != NamespaceTok->TokenText)
 186       return false;
 187   } else if (NamespaceTok->isNot(tok::kw_namespace) ||
 188              !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
 189     // Comment does not match regex.
 190     return false;
 191   }
 192   StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
 193   // Anonymous namespace comments must not mention a namespace name.
 194   if (NamespaceName.empty() && !NamespaceNameInComment.empty())
 195     return false;
 196   StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
 197   // Named namespace comments must not mention anonymous namespace.
 198   if (!NamespaceName.empty() && !AnonymousInComment.empty())
 199     return false;
 200   if (NamespaceNameInComment == NamespaceName)
 201     return true;
 202
 203   // Has namespace comment flowed onto the next line.
 204   // } // namespace
 205   //   // verylongnamespacenamethatdidnotfitonthepreviouscommentline
 206   if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
 207     return false;
 208
 209   static const llvm::Regex CommentPattern = llvm::Regex(
 210       "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
 211
 212   // Pull out just the comment text.
 213   if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
 214     return false;
 215   NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
 216
 217   return NamespaceNameInComment == NamespaceName;
 218 }
 219
 220 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
 221                    const SourceManager &SourceMgr,
 222                    tooling::Replacements *Fixes) {
 223   auto EndLoc = RBraceTok->Tok.getEndLoc();
 224   auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
 225   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
 226   if (Err) {
 227     llvm::errs() << "Error while adding namespace end comment: "
 228                  << llvm::toString(std::move(Err)) << "\n";
 229   }
 230 }
 231
 232 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
 233                       const SourceManager &SourceMgr,
 234                       tooling::Replacements *Fixes) {
 235   assert(hasEndComment(RBraceTok));
 236   const FormatToken *Comment = RBraceTok->Next;
 237   auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
 238                                              Comment->Tok.getEndLoc());
 239   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
 240   if (Err) {
 241     llvm::errs() << "Error while updating namespace end comment: "
 242                  << llvm::toString(std::move(Err)) << "\n";
 243   }
 244 }
 245 } // namespace
 246
 247 const FormatToken *
 248 getNamespaceToken(const AnnotatedLine *Line,
 249                   const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
 250   if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
 251     return nullptr;
 252   size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
 253   if (StartLineIndex == UnwrappedLine::kInvalidIndex)
 254     return nullptr;
 255   assert(StartLineIndex < AnnotatedLines.size());
 256   const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
 257   if (NamespaceTok->is(tok::l_brace)) {
 258     // "namespace" keyword can be on the line preceding '{', e.g. in styles
 259     // where BraceWrapping.AfterNamespace is true.
 260     if (StartLineIndex > 0) {
 261       NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
 262       if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
 263         return nullptr;
 264     }
 265   }
 266
 267   return NamespaceTok->getNamespaceToken();
 268 }
 269
 270 StringRef
 271 getNamespaceTokenText(const AnnotatedLine *Line,
 272                       const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
 273   const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
 274   return NamespaceTok ? NamespaceTok->TokenText : StringRef();
 275 }
 276
 277 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
 278                                                      const FormatStyle &Style)
 279     : TokenAnalyzer(Env, Style) {}
 280
 281 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
 282     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
 283     FormatTokenLexer &Tokens) {
 284   const SourceManager &SourceMgr = Env.getSourceManager();
 285   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
 286   tooling::Replacements Fixes;
 287
 288   // Spin through the lines and ensure we have balanced braces.
 289   int Braces = 0;
 290   for (AnnotatedLine *Line : AnnotatedLines) {
 291     FormatToken *Tok = Line->First;
 292     while (Tok) {
 293       Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
 294       Tok = Tok->Next;
 295     }
 296   }
 297   // Don't attempt to comment unbalanced braces or this can
 298   // lead to comments being placed on the closing brace which isn't
 299   // the matching brace of the namespace. (occurs during incomplete editing).
 300   if (Braces != 0)
 301     return {Fixes, 0};
 302
 303   std::string AllNamespaceNames;
 304   size_t StartLineIndex = SIZE_MAX;
 305   StringRef NamespaceTokenText;
 306   unsigned int CompactedNamespacesCount = 0;
 307   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
 308     const AnnotatedLine *EndLine = AnnotatedLines[I];
 309     const FormatToken *NamespaceTok =
 310         getNamespaceToken(EndLine, AnnotatedLines);
 311     if (!NamespaceTok)
 312       continue;
 313     FormatToken *RBraceTok = EndLine->First;
 314     if (RBraceTok->Finalized)
 315       continue;
 316     RBraceTok->Finalized = true;
 317     const FormatToken *EndCommentPrevTok = RBraceTok;
 318     // Namespaces often end with '};'. In that case, attach namespace end
 319     // comments to the semicolon tokens.
 320     if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
 321       EndCommentPrevTok = RBraceTok->Next;
 322     if (StartLineIndex == SIZE_MAX)
 323       StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
 324     std::string NamespaceName = computeName(NamespaceTok);
 325     if (Style.CompactNamespaces) {
 326       if (CompactedNamespacesCount == 0)
 327         NamespaceTokenText = NamespaceTok->TokenText;
 328       if ((I + 1 < E) &&
 329           NamespaceTokenText ==
 330               getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
 331           StartLineIndex - CompactedNamespacesCount - 1 ==
 332               AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
 333           !AnnotatedLines[I + 1]->First->Finalized) {
 334         if (hasEndComment(EndCommentPrevTok)) {
 335           // remove end comment, it will be merged in next one
 336           updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
 337         }
 338         ++CompactedNamespacesCount;
 339         if (!NamespaceName.empty())
 340           AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
 341         continue;
 342       }
 343       NamespaceName += AllNamespaceNames;
 344       CompactedNamespacesCount = 0;
 345       AllNamespaceNames = std::string();
 346     }
 347     // The next token in the token stream after the place where the end comment
 348     // token must be. This is either the next token on the current line or the
 349     // first token on the next line.
 350     const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
 351     if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
 352       EndCommentNextTok = EndCommentNextTok->Next;
 353     if (!EndCommentNextTok && I + 1 < E)
 354       EndCommentNextTok = AnnotatedLines[I + 1]->First;
 355     bool AddNewline = EndCommentNextTok &&
 356                       EndCommentNextTok->NewlinesBefore == 0 &&
 357                       EndCommentNextTok->isNot(tok::eof);
 358     const std::string EndCommentText =
 359         computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
 360                               Style.SpacesInLineCommentPrefix.Minimum);
 361     if (!hasEndComment(EndCommentPrevTok)) {
 362       bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
 363       if (!isShort)
 364         addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
 365     } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
 366                                 NamespaceTok)) {
 367       updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
 368     }
 369     StartLineIndex = SIZE_MAX;
 370   }
 371   return {Fixes, 0};
 372 }
 373
 374 } // namespace format
 375 } // namespace clang