1 //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "clang/AST/RawCommentList.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/Comment.h"
13 #include "clang/AST/CommentBriefParser.h"
14 #include "clang/AST/CommentCommandTraits.h"
15 #include "clang/AST/CommentLexer.h"
16 #include "clang/AST/CommentParser.h"
17 #include "clang/AST/CommentSema.h"
18 #include "llvm/ADT/STLExtras.h"
20 using namespace clang
;
23 /// Get comment kind and bool describing if it is a trailing comment.
24 std::pair
<RawComment::CommentKind
, bool> getCommentKind(StringRef Comment
,
25 bool ParseAllComments
) {
26 const size_t MinCommentLength
= ParseAllComments
? 2 : 3;
27 if ((Comment
.size() < MinCommentLength
) || Comment
[0] != '/')
28 return std::make_pair(RawComment::RCK_Invalid
, false);
30 RawComment::CommentKind K
;
31 if (Comment
[1] == '/') {
32 if (Comment
.size() < 3)
33 return std::make_pair(RawComment::RCK_OrdinaryBCPL
, false);
35 if (Comment
[2] == '/')
36 K
= RawComment::RCK_BCPLSlash
;
37 else if (Comment
[2] == '!')
38 K
= RawComment::RCK_BCPLExcl
;
40 return std::make_pair(RawComment::RCK_OrdinaryBCPL
, false);
42 assert(Comment
.size() >= 4);
44 // Comment lexer does not understand escapes in comment markers, so pretend
45 // that this is not a comment.
46 if (Comment
[1] != '*' ||
47 Comment
[Comment
.size() - 2] != '*' ||
48 Comment
[Comment
.size() - 1] != '/')
49 return std::make_pair(RawComment::RCK_Invalid
, false);
51 if (Comment
[2] == '*')
52 K
= RawComment::RCK_JavaDoc
;
53 else if (Comment
[2] == '!')
54 K
= RawComment::RCK_Qt
;
56 return std::make_pair(RawComment::RCK_OrdinaryC
, false);
58 const bool TrailingComment
= (Comment
.size() > 3) && (Comment
[3] == '<');
59 return std::make_pair(K
, TrailingComment
);
62 bool mergedCommentIsTrailingComment(StringRef Comment
) {
63 return (Comment
.size() > 3) && (Comment
[3] == '<');
65 } // unnamed namespace
67 RawComment::RawComment(const SourceManager
&SourceMgr
, SourceRange SR
,
68 bool Merged
, bool ParseAllComments
) :
69 Range(SR
), RawTextValid(false), BriefTextValid(false),
70 IsAttached(false), IsAlmostTrailingComment(false),
71 ParseAllComments(ParseAllComments
) {
72 // Extract raw comment text, if possible.
73 if (SR
.getBegin() == SR
.getEnd() || getRawText(SourceMgr
).empty()) {
79 // Guess comment kind.
80 std::pair
<CommentKind
, bool> K
= getCommentKind(RawText
, ParseAllComments
);
82 IsTrailingComment
= K
.second
;
84 IsAlmostTrailingComment
= RawText
.startswith("//<") ||
85 RawText
.startswith("/*<");
88 IsTrailingComment
= mergedCommentIsTrailingComment(RawText
);
92 StringRef
RawComment::getRawTextSlow(const SourceManager
&SourceMgr
) const {
98 std::tie(BeginFileID
, BeginOffset
) =
99 SourceMgr
.getDecomposedLoc(Range
.getBegin());
100 std::tie(EndFileID
, EndOffset
) = SourceMgr
.getDecomposedLoc(Range
.getEnd());
102 const unsigned Length
= EndOffset
- BeginOffset
;
106 // The comment can't begin in one file and end in another.
107 assert(BeginFileID
== EndFileID
);
109 bool Invalid
= false;
110 const char *BufferStart
= SourceMgr
.getBufferData(BeginFileID
,
115 return StringRef(BufferStart
+ BeginOffset
, Length
);
118 const char *RawComment::extractBriefText(const ASTContext
&Context
) const {
119 // Make sure that RawText is valid.
120 getRawText(Context
.getSourceManager());
122 // Since we will be copying the resulting text, all allocations made during
123 // parsing are garbage after resulting string is formed. Thus we can use
124 // a separate allocator for all temporary stuff.
125 llvm::BumpPtrAllocator Allocator
;
127 comments::Lexer
L(Allocator
, Context
.getDiagnostics(),
128 Context
.getCommentCommandTraits(),
130 RawText
.begin(), RawText
.end());
131 comments::BriefParser
P(L
, Context
.getCommentCommandTraits());
133 const std::string Result
= P
.Parse();
134 const unsigned BriefTextLength
= Result
.size();
135 char *BriefTextPtr
= new (Context
) char[BriefTextLength
+ 1];
136 memcpy(BriefTextPtr
, Result
.c_str(), BriefTextLength
+ 1);
137 BriefText
= BriefTextPtr
;
138 BriefTextValid
= true;
143 comments::FullComment
*RawComment::parse(const ASTContext
&Context
,
144 const Preprocessor
*PP
,
145 const Decl
*D
) const {
146 // Make sure that RawText is valid.
147 getRawText(Context
.getSourceManager());
149 comments::Lexer
L(Context
.getAllocator(), Context
.getDiagnostics(),
150 Context
.getCommentCommandTraits(),
151 getSourceRange().getBegin(),
152 RawText
.begin(), RawText
.end());
153 comments::Sema
S(Context
.getAllocator(), Context
.getSourceManager(),
154 Context
.getDiagnostics(),
155 Context
.getCommentCommandTraits(),
158 comments::Parser
P(L
, S
, Context
.getAllocator(), Context
.getSourceManager(),
159 Context
.getDiagnostics(),
160 Context
.getCommentCommandTraits());
162 return P
.parseFullComment();
165 static bool onlyWhitespaceBetween(SourceManager
&SM
,
166 SourceLocation Loc1
, SourceLocation Loc2
,
167 unsigned MaxNewlinesAllowed
) {
168 std::pair
<FileID
, unsigned> Loc1Info
= SM
.getDecomposedLoc(Loc1
);
169 std::pair
<FileID
, unsigned> Loc2Info
= SM
.getDecomposedLoc(Loc2
);
171 // Question does not make sense if locations are in different files.
172 if (Loc1Info
.first
!= Loc2Info
.first
)
175 bool Invalid
= false;
176 const char *Buffer
= SM
.getBufferData(Loc1Info
.first
, &Invalid
).data();
180 unsigned NumNewlines
= 0;
181 assert(Loc1Info
.second
<= Loc2Info
.second
&& "Loc1 after Loc2!");
182 // Look for non-whitespace characters and remember any newlines seen.
183 for (unsigned I
= Loc1Info
.second
; I
!= Loc2Info
.second
; ++I
) {
196 // Check if we have found more than the maximum allowed number of
198 if (NumNewlines
> MaxNewlinesAllowed
)
201 // Collapse \r\n and \n\r into a single newline.
202 if (I
+ 1 != Loc2Info
.second
&&
203 (Buffer
[I
+ 1] == '\n' || Buffer
[I
+ 1] == '\r') &&
204 Buffer
[I
] != Buffer
[I
+ 1])
213 void RawCommentList::addComment(const RawComment
&RC
,
214 llvm::BumpPtrAllocator
&Allocator
) {
218 // Check if the comments are not in source order.
219 while (!Comments
.empty() &&
220 !SourceMgr
.isBeforeInTranslationUnit(Comments
.back()->getLocStart(),
222 // If they are, just pop a few last comments that don't fit.
223 // This happens if an \#include directive contains comments.
227 // Ordinary comments are not interesting for us.
231 // If this is the first Doxygen comment, save it (because there isn't
232 // anything to merge it with).
233 if (Comments
.empty()) {
234 Comments
.push_back(new (Allocator
) RawComment(RC
));
238 const RawComment
&C1
= *Comments
.back();
239 const RawComment
&C2
= RC
;
241 // Merge comments only if there is only whitespace between them.
242 // Can't merge trailing and non-trailing comments.
243 // Merge comments if they are on same or consecutive lines.
244 if (C1
.isTrailingComment() == C2
.isTrailingComment() &&
245 onlyWhitespaceBetween(SourceMgr
, C1
.getLocEnd(), C2
.getLocStart(),
246 /*MaxNewlinesAllowed=*/1)) {
247 SourceRange
MergedRange(C1
.getLocStart(), C2
.getLocEnd());
248 *Comments
.back() = RawComment(SourceMgr
, MergedRange
, true,
249 RC
.isParseAllComments());
251 Comments
.push_back(new (Allocator
) RawComment(RC
));
255 void RawCommentList::addDeserializedComments(ArrayRef
<RawComment
*> DeserializedComments
) {
256 std::vector
<RawComment
*> MergedComments
;
257 MergedComments
.reserve(Comments
.size() + DeserializedComments
.size());
259 std::merge(Comments
.begin(), Comments
.end(),
260 DeserializedComments
.begin(), DeserializedComments
.end(),
261 std::back_inserter(MergedComments
),
262 BeforeThanCompare
<RawComment
>(SourceMgr
));
263 std::swap(Comments
, MergedComments
);