1 //===- Rewriter.cpp - Code rewriting interface ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the Rewriter class, which is used for code
12 //===----------------------------------------------------------------------===//
14 #include "clang/Rewrite/Core/Rewriter.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticIDs.h"
17 #include "clang/Basic/SourceLocation.h"
18 #include "clang/Basic/SourceManager.h"
19 #include "clang/Lex/Lexer.h"
20 #include "clang/Rewrite/Core/RewriteBuffer.h"
21 #include "clang/Rewrite/Core/RewriteRope.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/raw_ostream.h"
31 using namespace clang
;
33 raw_ostream
&RewriteBuffer::write(raw_ostream
&os
) const {
34 // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the
35 // character iterator.
36 for (RopePieceBTreeIterator I
= begin(), E
= end(); I
!= E
;
42 /// Return true if this character is non-new-line whitespace:
43 /// ' ', '\\t', '\\f', '\\v', '\\r'.
44 static inline bool isWhitespaceExceptNL(unsigned char c
) {
57 void RewriteBuffer::RemoveText(unsigned OrigOffset
, unsigned Size
,
58 bool removeLineIfEmpty
) {
59 // Nothing to remove, exit early.
60 if (Size
== 0) return;
62 unsigned RealOffset
= getMappedOffset(OrigOffset
, true);
63 assert(RealOffset
+Size
<= Buffer
.size() && "Invalid location");
65 // Remove the dead characters.
66 Buffer
.erase(RealOffset
, Size
);
68 // Add a delta so that future changes are offset correctly.
69 AddReplaceDelta(OrigOffset
, -Size
);
71 if (removeLineIfEmpty
) {
72 // Find the line that the remove occurred and if it is completely empty
73 // remove the line as well.
75 iterator curLineStart
= begin();
76 unsigned curLineStartOffs
= 0;
77 iterator posI
= begin();
78 for (unsigned i
= 0; i
!= RealOffset
; ++i
) {
82 curLineStartOffs
= i
+ 1;
87 unsigned lineSize
= 0;
89 while (posI
!= end() && isWhitespaceExceptNL(*posI
)) {
93 if (posI
!= end() && *posI
== '\n') {
94 Buffer
.erase(curLineStartOffs
, lineSize
+ 1/* + '\n'*/);
95 // FIXME: Here, the offset of the start of the line is supposed to be
96 // expressed in terms of the original input not the "real" rewrite
97 // buffer. How do we compute that reliably? It might be tempting to use
98 // curLineStartOffs + OrigOffset - RealOffset, but that assumes the
99 // difference between the original and real offset is the same at the
100 // removed text and at the start of the line, but that's not true if
101 // edits were previously made earlier on the line. This bug is also
102 // documented by a FIXME on the definition of
103 // clang::Rewriter::RewriteOptions::RemoveLineIfEmpty. A reproducer for
104 // the implementation below is the test RemoveLineIfEmpty in
105 // clang/unittests/Rewrite/RewriteBufferTest.cpp.
106 AddReplaceDelta(curLineStartOffs
, -(lineSize
+ 1/* + '\n'*/));
111 void RewriteBuffer::InsertText(unsigned OrigOffset
, StringRef Str
,
113 // Nothing to insert, exit early.
114 if (Str
.empty()) return;
116 unsigned RealOffset
= getMappedOffset(OrigOffset
, InsertAfter
);
117 Buffer
.insert(RealOffset
, Str
.begin(), Str
.end());
119 // Add a delta so that future changes are offset correctly.
120 AddInsertDelta(OrigOffset
, Str
.size());
123 /// ReplaceText - This method replaces a range of characters in the input
124 /// buffer with a new string. This is effectively a combined "remove+insert"
126 void RewriteBuffer::ReplaceText(unsigned OrigOffset
, unsigned OrigLength
,
128 unsigned RealOffset
= getMappedOffset(OrigOffset
, true);
129 Buffer
.erase(RealOffset
, OrigLength
);
130 Buffer
.insert(RealOffset
, NewStr
.begin(), NewStr
.end());
131 if (OrigLength
!= NewStr
.size())
132 AddReplaceDelta(OrigOffset
, NewStr
.size() - OrigLength
);
135 //===----------------------------------------------------------------------===//
137 //===----------------------------------------------------------------------===//
139 /// getRangeSize - Return the size in bytes of the specified range if they
140 /// are in the same file. If not, this returns -1.
141 int Rewriter::getRangeSize(const CharSourceRange
&Range
,
142 RewriteOptions opts
) const {
143 if (!isRewritable(Range
.getBegin()) ||
144 !isRewritable(Range
.getEnd())) return -1;
146 FileID StartFileID
, EndFileID
;
147 unsigned StartOff
= getLocationOffsetAndFileID(Range
.getBegin(), StartFileID
);
148 unsigned EndOff
= getLocationOffsetAndFileID(Range
.getEnd(), EndFileID
);
150 if (StartFileID
!= EndFileID
)
153 // If edits have been made to this buffer, the delta between the range may
155 std::map
<FileID
, RewriteBuffer
>::const_iterator I
=
156 RewriteBuffers
.find(StartFileID
);
157 if (I
!= RewriteBuffers
.end()) {
158 const RewriteBuffer
&RB
= I
->second
;
159 EndOff
= RB
.getMappedOffset(EndOff
, opts
.IncludeInsertsAtEndOfRange
);
160 StartOff
= RB
.getMappedOffset(StartOff
, !opts
.IncludeInsertsAtBeginOfRange
);
163 // Adjust the end offset to the end of the last token, instead of being the
164 // start of the last token if this is a token range.
165 if (Range
.isTokenRange())
166 EndOff
+= Lexer::MeasureTokenLength(Range
.getEnd(), *SourceMgr
, *LangOpts
);
168 return EndOff
-StartOff
;
171 int Rewriter::getRangeSize(SourceRange Range
, RewriteOptions opts
) const {
172 return getRangeSize(CharSourceRange::getTokenRange(Range
), opts
);
175 /// getRewrittenText - Return the rewritten form of the text in the specified
176 /// range. If the start or end of the range was unrewritable or if they are
177 /// in different buffers, this returns an empty string.
179 /// Note that this method is not particularly efficient.
180 std::string
Rewriter::getRewrittenText(CharSourceRange Range
) const {
181 if (!isRewritable(Range
.getBegin()) ||
182 !isRewritable(Range
.getEnd()))
185 FileID StartFileID
, EndFileID
;
186 unsigned StartOff
, EndOff
;
187 StartOff
= getLocationOffsetAndFileID(Range
.getBegin(), StartFileID
);
188 EndOff
= getLocationOffsetAndFileID(Range
.getEnd(), EndFileID
);
190 if (StartFileID
!= EndFileID
)
191 return {}; // Start and end in different buffers.
193 // If edits have been made to this buffer, the delta between the range may
195 std::map
<FileID
, RewriteBuffer
>::const_iterator I
=
196 RewriteBuffers
.find(StartFileID
);
197 if (I
== RewriteBuffers
.end()) {
198 // If the buffer hasn't been rewritten, just return the text from the input.
199 const char *Ptr
= SourceMgr
->getCharacterData(Range
.getBegin());
201 // Adjust the end offset to the end of the last token, instead of being the
202 // start of the last token.
203 if (Range
.isTokenRange())
205 Lexer::MeasureTokenLength(Range
.getEnd(), *SourceMgr
, *LangOpts
);
206 return std::string(Ptr
, Ptr
+EndOff
-StartOff
);
209 const RewriteBuffer
&RB
= I
->second
;
210 EndOff
= RB
.getMappedOffset(EndOff
, true);
211 StartOff
= RB
.getMappedOffset(StartOff
);
213 // Adjust the end offset to the end of the last token, instead of being the
214 // start of the last token.
215 if (Range
.isTokenRange())
216 EndOff
+= Lexer::MeasureTokenLength(Range
.getEnd(), *SourceMgr
, *LangOpts
);
218 // Advance the iterators to the right spot, yay for linear time algorithms.
219 RewriteBuffer::iterator Start
= RB
.begin();
220 std::advance(Start
, StartOff
);
221 RewriteBuffer::iterator End
= Start
;
222 assert(EndOff
>= StartOff
&& "Invalid iteration distance");
223 std::advance(End
, EndOff
-StartOff
);
225 return std::string(Start
, End
);
228 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc
,
230 assert(Loc
.isValid() && "Invalid location");
231 std::pair
<FileID
, unsigned> V
= SourceMgr
->getDecomposedLoc(Loc
);
236 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID.
237 RewriteBuffer
&Rewriter::getEditBuffer(FileID FID
) {
238 std::map
<FileID
, RewriteBuffer
>::iterator I
=
239 RewriteBuffers
.lower_bound(FID
);
240 if (I
!= RewriteBuffers
.end() && I
->first
== FID
)
242 I
= RewriteBuffers
.insert(I
, std::make_pair(FID
, RewriteBuffer()));
244 StringRef MB
= SourceMgr
->getBufferData(FID
);
245 I
->second
.Initialize(MB
.begin(), MB
.end());
250 /// InsertText - Insert the specified string at the specified location in the
252 bool Rewriter::InsertText(SourceLocation Loc
, StringRef Str
,
253 bool InsertAfter
, bool indentNewLines
) {
254 if (!isRewritable(Loc
)) return true;
256 unsigned StartOffs
= getLocationOffsetAndFileID(Loc
, FID
);
258 SmallString
<128> indentedStr
;
259 if (indentNewLines
&& Str
.contains('\n')) {
260 StringRef MB
= SourceMgr
->getBufferData(FID
);
262 unsigned lineNo
= SourceMgr
->getLineNumber(FID
, StartOffs
) - 1;
263 const SrcMgr::ContentCache
*Content
=
264 &SourceMgr
->getSLocEntry(FID
).getFile().getContentCache();
265 unsigned lineOffs
= Content
->SourceLineCache
[lineNo
];
267 // Find the whitespace at the start of the line.
268 StringRef indentSpace
;
270 unsigned i
= lineOffs
;
271 while (isWhitespaceExceptNL(MB
[i
]))
273 indentSpace
= MB
.substr(lineOffs
, i
-lineOffs
);
276 SmallVector
<StringRef
, 4> lines
;
277 Str
.split(lines
, "\n");
279 for (unsigned i
= 0, e
= lines
.size(); i
!= e
; ++i
) {
280 indentedStr
+= lines
[i
];
283 indentedStr
+= indentSpace
;
286 Str
= indentedStr
.str();
289 getEditBuffer(FID
).InsertText(StartOffs
, Str
, InsertAfter
);
293 bool Rewriter::InsertTextAfterToken(SourceLocation Loc
, StringRef Str
) {
294 if (!isRewritable(Loc
)) return true;
296 unsigned StartOffs
= getLocationOffsetAndFileID(Loc
, FID
);
297 RewriteOptions rangeOpts
;
298 rangeOpts
.IncludeInsertsAtBeginOfRange
= false;
299 StartOffs
+= getRangeSize(SourceRange(Loc
, Loc
), rangeOpts
);
300 getEditBuffer(FID
).InsertText(StartOffs
, Str
, /*InsertAfter*/true);
304 /// RemoveText - Remove the specified text region.
305 bool Rewriter::RemoveText(SourceLocation Start
, unsigned Length
,
306 RewriteOptions opts
) {
307 if (!isRewritable(Start
)) return true;
309 unsigned StartOffs
= getLocationOffsetAndFileID(Start
, FID
);
310 getEditBuffer(FID
).RemoveText(StartOffs
, Length
, opts
.RemoveLineIfEmpty
);
314 /// ReplaceText - This method replaces a range of characters in the input
315 /// buffer with a new string. This is effectively a combined "remove/insert"
317 bool Rewriter::ReplaceText(SourceLocation Start
, unsigned OrigLength
,
319 if (!isRewritable(Start
)) return true;
321 unsigned StartOffs
= getLocationOffsetAndFileID(Start
, StartFileID
);
323 getEditBuffer(StartFileID
).ReplaceText(StartOffs
, OrigLength
, NewStr
);
327 bool Rewriter::ReplaceText(SourceRange range
, SourceRange replacementRange
) {
328 if (!isRewritable(range
.getBegin())) return true;
329 if (!isRewritable(range
.getEnd())) return true;
330 if (replacementRange
.isInvalid()) return true;
331 SourceLocation start
= range
.getBegin();
332 unsigned origLength
= getRangeSize(range
);
333 unsigned newLength
= getRangeSize(replacementRange
);
335 unsigned newOffs
= getLocationOffsetAndFileID(replacementRange
.getBegin(),
337 StringRef MB
= SourceMgr
->getBufferData(FID
);
338 return ReplaceText(start
, origLength
, MB
.substr(newOffs
, newLength
));
341 bool Rewriter::IncreaseIndentation(CharSourceRange range
,
342 SourceLocation parentIndent
) {
343 if (range
.isInvalid()) return true;
344 if (!isRewritable(range
.getBegin())) return true;
345 if (!isRewritable(range
.getEnd())) return true;
346 if (!isRewritable(parentIndent
)) return true;
348 FileID StartFileID
, EndFileID
, parentFileID
;
349 unsigned StartOff
, EndOff
, parentOff
;
351 StartOff
= getLocationOffsetAndFileID(range
.getBegin(), StartFileID
);
352 EndOff
= getLocationOffsetAndFileID(range
.getEnd(), EndFileID
);
353 parentOff
= getLocationOffsetAndFileID(parentIndent
, parentFileID
);
355 if (StartFileID
!= EndFileID
|| StartFileID
!= parentFileID
)
357 if (StartOff
> EndOff
)
360 FileID FID
= StartFileID
;
361 StringRef MB
= SourceMgr
->getBufferData(FID
);
363 unsigned parentLineNo
= SourceMgr
->getLineNumber(FID
, parentOff
) - 1;
364 unsigned startLineNo
= SourceMgr
->getLineNumber(FID
, StartOff
) - 1;
365 unsigned endLineNo
= SourceMgr
->getLineNumber(FID
, EndOff
) - 1;
367 const SrcMgr::ContentCache
*Content
=
368 &SourceMgr
->getSLocEntry(FID
).getFile().getContentCache();
370 // Find where the lines start.
371 unsigned parentLineOffs
= Content
->SourceLineCache
[parentLineNo
];
372 unsigned startLineOffs
= Content
->SourceLineCache
[startLineNo
];
374 // Find the whitespace at the start of each line.
375 StringRef parentSpace
, startSpace
;
377 unsigned i
= parentLineOffs
;
378 while (isWhitespaceExceptNL(MB
[i
]))
380 parentSpace
= MB
.substr(parentLineOffs
, i
-parentLineOffs
);
383 while (isWhitespaceExceptNL(MB
[i
]))
385 startSpace
= MB
.substr(startLineOffs
, i
-startLineOffs
);
387 if (parentSpace
.size() >= startSpace
.size())
389 if (!startSpace
.startswith(parentSpace
))
392 StringRef indent
= startSpace
.substr(parentSpace
.size());
394 // Indent the lines between start/end offsets.
395 RewriteBuffer
&RB
= getEditBuffer(FID
);
396 for (unsigned lineNo
= startLineNo
; lineNo
<= endLineNo
; ++lineNo
) {
397 unsigned offs
= Content
->SourceLineCache
[lineNo
];
399 while (isWhitespaceExceptNL(MB
[i
]))
401 StringRef origIndent
= MB
.substr(offs
, i
-offs
);
402 if (origIndent
.startswith(startSpace
))
403 RB
.InsertText(offs
, indent
, /*InsertAfter=*/false);
409 bool Rewriter::overwriteChangedFiles() {
410 bool AllWritten
= true;
411 auto& Diag
= getSourceMgr().getDiagnostics();
412 unsigned OverwriteFailure
= Diag
.getCustomDiagID(
413 DiagnosticsEngine::Error
, "unable to overwrite file %0: %1");
414 for (buffer_iterator I
= buffer_begin(), E
= buffer_end(); I
!= E
; ++I
) {
415 const FileEntry
*Entry
= getSourceMgr().getFileEntryForID(I
->first
);
417 llvm::writeToOutput(Entry
->getName(), [&](llvm::raw_ostream
&OS
) {
419 return llvm::Error::success();
421 Diag
.Report(OverwriteFailure
)
422 << Entry
->getName() << llvm::toString(std::move(Error
));