1 //===- Rewriter.cpp - Code rewriting interface ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the Rewriter class, which is used for code
12 //===----------------------------------------------------------------------===//
14 #include "clang/Rewrite/Core/Rewriter.h"
15 #include "clang/Basic/Diagnostic.h"
16 #include "clang/Basic/DiagnosticIDs.h"
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Rewrite/Core/RewriteBuffer.h"
22 #include "clang/Rewrite/Core/RewriteRope.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/raw_ostream.h"
32 #include <system_error>
35 using namespace clang
;
37 raw_ostream
&RewriteBuffer::write(raw_ostream
&os
) const {
38 // Walk RewriteRope chunks efficiently using MoveToNextPiece() instead of the
39 // character iterator.
40 for (RopePieceBTreeIterator I
= begin(), E
= end(); I
!= E
;
46 /// Return true if this character is non-new-line whitespace:
47 /// ' ', '\\t', '\\f', '\\v', '\\r'.
48 static inline bool isWhitespaceExceptNL(unsigned char c
) {
61 void RewriteBuffer::RemoveText(unsigned OrigOffset
, unsigned Size
,
62 bool removeLineIfEmpty
) {
63 // Nothing to remove, exit early.
64 if (Size
== 0) return;
66 unsigned RealOffset
= getMappedOffset(OrigOffset
, true);
67 assert(RealOffset
+Size
<= Buffer
.size() && "Invalid location");
69 // Remove the dead characters.
70 Buffer
.erase(RealOffset
, Size
);
72 // Add a delta so that future changes are offset correctly.
73 AddReplaceDelta(OrigOffset
, -Size
);
75 if (removeLineIfEmpty
) {
76 // Find the line that the remove occurred and if it is completely empty
77 // remove the line as well.
79 iterator curLineStart
= begin();
80 unsigned curLineStartOffs
= 0;
81 iterator posI
= begin();
82 for (unsigned i
= 0; i
!= RealOffset
; ++i
) {
86 curLineStartOffs
= i
+ 1;
91 unsigned lineSize
= 0;
93 while (posI
!= end() && isWhitespaceExceptNL(*posI
)) {
97 if (posI
!= end() && *posI
== '\n') {
98 Buffer
.erase(curLineStartOffs
, lineSize
+ 1/* + '\n'*/);
99 // FIXME: Here, the offset of the start of the line is supposed to be
100 // expressed in terms of the original input not the "real" rewrite
101 // buffer. How do we compute that reliably? It might be tempting to use
102 // curLineStartOffs + OrigOffset - RealOffset, but that assumes the
103 // difference between the original and real offset is the same at the
104 // removed text and at the start of the line, but that's not true if
105 // edits were previously made earlier on the line. This bug is also
106 // documented by a FIXME on the definition of
107 // clang::Rewriter::RewriteOptions::RemoveLineIfEmpty. A reproducer for
108 // the implementation below is the test RemoveLineIfEmpty in
109 // clang/unittests/Rewrite/RewriteBufferTest.cpp.
110 AddReplaceDelta(curLineStartOffs
, -(lineSize
+ 1/* + '\n'*/));
115 void RewriteBuffer::InsertText(unsigned OrigOffset
, StringRef Str
,
117 // Nothing to insert, exit early.
118 if (Str
.empty()) return;
120 unsigned RealOffset
= getMappedOffset(OrigOffset
, InsertAfter
);
121 Buffer
.insert(RealOffset
, Str
.begin(), Str
.end());
123 // Add a delta so that future changes are offset correctly.
124 AddInsertDelta(OrigOffset
, Str
.size());
127 /// ReplaceText - This method replaces a range of characters in the input
128 /// buffer with a new string. This is effectively a combined "remove+insert"
130 void RewriteBuffer::ReplaceText(unsigned OrigOffset
, unsigned OrigLength
,
132 unsigned RealOffset
= getMappedOffset(OrigOffset
, true);
133 Buffer
.erase(RealOffset
, OrigLength
);
134 Buffer
.insert(RealOffset
, NewStr
.begin(), NewStr
.end());
135 if (OrigLength
!= NewStr
.size())
136 AddReplaceDelta(OrigOffset
, NewStr
.size() - OrigLength
);
139 //===----------------------------------------------------------------------===//
141 //===----------------------------------------------------------------------===//
143 /// getRangeSize - Return the size in bytes of the specified range if they
144 /// are in the same file. If not, this returns -1.
145 int Rewriter::getRangeSize(const CharSourceRange
&Range
,
146 RewriteOptions opts
) const {
147 if (!isRewritable(Range
.getBegin()) ||
148 !isRewritable(Range
.getEnd())) return -1;
150 FileID StartFileID
, EndFileID
;
151 unsigned StartOff
= getLocationOffsetAndFileID(Range
.getBegin(), StartFileID
);
152 unsigned EndOff
= getLocationOffsetAndFileID(Range
.getEnd(), EndFileID
);
154 if (StartFileID
!= EndFileID
)
157 // If edits have been made to this buffer, the delta between the range may
159 std::map
<FileID
, RewriteBuffer
>::const_iterator I
=
160 RewriteBuffers
.find(StartFileID
);
161 if (I
!= RewriteBuffers
.end()) {
162 const RewriteBuffer
&RB
= I
->second
;
163 EndOff
= RB
.getMappedOffset(EndOff
, opts
.IncludeInsertsAtEndOfRange
);
164 StartOff
= RB
.getMappedOffset(StartOff
, !opts
.IncludeInsertsAtBeginOfRange
);
167 // Adjust the end offset to the end of the last token, instead of being the
168 // start of the last token if this is a token range.
169 if (Range
.isTokenRange())
170 EndOff
+= Lexer::MeasureTokenLength(Range
.getEnd(), *SourceMgr
, *LangOpts
);
172 return EndOff
-StartOff
;
175 int Rewriter::getRangeSize(SourceRange Range
, RewriteOptions opts
) const {
176 return getRangeSize(CharSourceRange::getTokenRange(Range
), opts
);
179 /// getRewrittenText - Return the rewritten form of the text in the specified
180 /// range. If the start or end of the range was unrewritable or if they are
181 /// in different buffers, this returns an empty string.
183 /// Note that this method is not particularly efficient.
184 std::string
Rewriter::getRewrittenText(CharSourceRange Range
) const {
185 if (!isRewritable(Range
.getBegin()) ||
186 !isRewritable(Range
.getEnd()))
189 FileID StartFileID
, EndFileID
;
190 unsigned StartOff
, EndOff
;
191 StartOff
= getLocationOffsetAndFileID(Range
.getBegin(), StartFileID
);
192 EndOff
= getLocationOffsetAndFileID(Range
.getEnd(), EndFileID
);
194 if (StartFileID
!= EndFileID
)
195 return {}; // Start and end in different buffers.
197 // If edits have been made to this buffer, the delta between the range may
199 std::map
<FileID
, RewriteBuffer
>::const_iterator I
=
200 RewriteBuffers
.find(StartFileID
);
201 if (I
== RewriteBuffers
.end()) {
202 // If the buffer hasn't been rewritten, just return the text from the input.
203 const char *Ptr
= SourceMgr
->getCharacterData(Range
.getBegin());
205 // Adjust the end offset to the end of the last token, instead of being the
206 // start of the last token.
207 if (Range
.isTokenRange())
209 Lexer::MeasureTokenLength(Range
.getEnd(), *SourceMgr
, *LangOpts
);
210 return std::string(Ptr
, Ptr
+EndOff
-StartOff
);
213 const RewriteBuffer
&RB
= I
->second
;
214 EndOff
= RB
.getMappedOffset(EndOff
, true);
215 StartOff
= RB
.getMappedOffset(StartOff
);
217 // Adjust the end offset to the end of the last token, instead of being the
218 // start of the last token.
219 if (Range
.isTokenRange())
220 EndOff
+= Lexer::MeasureTokenLength(Range
.getEnd(), *SourceMgr
, *LangOpts
);
222 // Advance the iterators to the right spot, yay for linear time algorithms.
223 RewriteBuffer::iterator Start
= RB
.begin();
224 std::advance(Start
, StartOff
);
225 RewriteBuffer::iterator End
= Start
;
226 assert(EndOff
>= StartOff
&& "Invalid iteration distance");
227 std::advance(End
, EndOff
-StartOff
);
229 return std::string(Start
, End
);
232 unsigned Rewriter::getLocationOffsetAndFileID(SourceLocation Loc
,
234 assert(Loc
.isValid() && "Invalid location");
235 std::pair
<FileID
, unsigned> V
= SourceMgr
->getDecomposedLoc(Loc
);
240 /// getEditBuffer - Get or create a RewriteBuffer for the specified FileID.
241 RewriteBuffer
&Rewriter::getEditBuffer(FileID FID
) {
242 std::map
<FileID
, RewriteBuffer
>::iterator I
=
243 RewriteBuffers
.lower_bound(FID
);
244 if (I
!= RewriteBuffers
.end() && I
->first
== FID
)
246 I
= RewriteBuffers
.insert(I
, std::make_pair(FID
, RewriteBuffer()));
248 StringRef MB
= SourceMgr
->getBufferData(FID
);
249 I
->second
.Initialize(MB
.begin(), MB
.end());
254 /// InsertText - Insert the specified string at the specified location in the
256 bool Rewriter::InsertText(SourceLocation Loc
, StringRef Str
,
257 bool InsertAfter
, bool indentNewLines
) {
258 if (!isRewritable(Loc
)) return true;
260 unsigned StartOffs
= getLocationOffsetAndFileID(Loc
, FID
);
262 SmallString
<128> indentedStr
;
263 if (indentNewLines
&& Str
.contains('\n')) {
264 StringRef MB
= SourceMgr
->getBufferData(FID
);
266 unsigned lineNo
= SourceMgr
->getLineNumber(FID
, StartOffs
) - 1;
267 const SrcMgr::ContentCache
*Content
=
268 &SourceMgr
->getSLocEntry(FID
).getFile().getContentCache();
269 unsigned lineOffs
= Content
->SourceLineCache
[lineNo
];
271 // Find the whitespace at the start of the line.
272 StringRef indentSpace
;
274 unsigned i
= lineOffs
;
275 while (isWhitespaceExceptNL(MB
[i
]))
277 indentSpace
= MB
.substr(lineOffs
, i
-lineOffs
);
280 SmallVector
<StringRef
, 4> lines
;
281 Str
.split(lines
, "\n");
283 for (unsigned i
= 0, e
= lines
.size(); i
!= e
; ++i
) {
284 indentedStr
+= lines
[i
];
287 indentedStr
+= indentSpace
;
290 Str
= indentedStr
.str();
293 getEditBuffer(FID
).InsertText(StartOffs
, Str
, InsertAfter
);
297 bool Rewriter::InsertTextAfterToken(SourceLocation Loc
, StringRef Str
) {
298 if (!isRewritable(Loc
)) return true;
300 unsigned StartOffs
= getLocationOffsetAndFileID(Loc
, FID
);
301 RewriteOptions rangeOpts
;
302 rangeOpts
.IncludeInsertsAtBeginOfRange
= false;
303 StartOffs
+= getRangeSize(SourceRange(Loc
, Loc
), rangeOpts
);
304 getEditBuffer(FID
).InsertText(StartOffs
, Str
, /*InsertAfter*/true);
308 /// RemoveText - Remove the specified text region.
309 bool Rewriter::RemoveText(SourceLocation Start
, unsigned Length
,
310 RewriteOptions opts
) {
311 if (!isRewritable(Start
)) return true;
313 unsigned StartOffs
= getLocationOffsetAndFileID(Start
, FID
);
314 getEditBuffer(FID
).RemoveText(StartOffs
, Length
, opts
.RemoveLineIfEmpty
);
318 /// ReplaceText - This method replaces a range of characters in the input
319 /// buffer with a new string. This is effectively a combined "remove/insert"
321 bool Rewriter::ReplaceText(SourceLocation Start
, unsigned OrigLength
,
323 if (!isRewritable(Start
)) return true;
325 unsigned StartOffs
= getLocationOffsetAndFileID(Start
, StartFileID
);
327 getEditBuffer(StartFileID
).ReplaceText(StartOffs
, OrigLength
, NewStr
);
331 bool Rewriter::ReplaceText(SourceRange range
, SourceRange replacementRange
) {
332 if (!isRewritable(range
.getBegin())) return true;
333 if (!isRewritable(range
.getEnd())) return true;
334 if (replacementRange
.isInvalid()) return true;
335 SourceLocation start
= range
.getBegin();
336 unsigned origLength
= getRangeSize(range
);
337 unsigned newLength
= getRangeSize(replacementRange
);
339 unsigned newOffs
= getLocationOffsetAndFileID(replacementRange
.getBegin(),
341 StringRef MB
= SourceMgr
->getBufferData(FID
);
342 return ReplaceText(start
, origLength
, MB
.substr(newOffs
, newLength
));
345 bool Rewriter::IncreaseIndentation(CharSourceRange range
,
346 SourceLocation parentIndent
) {
347 if (range
.isInvalid()) return true;
348 if (!isRewritable(range
.getBegin())) return true;
349 if (!isRewritable(range
.getEnd())) return true;
350 if (!isRewritable(parentIndent
)) return true;
352 FileID StartFileID
, EndFileID
, parentFileID
;
353 unsigned StartOff
, EndOff
, parentOff
;
355 StartOff
= getLocationOffsetAndFileID(range
.getBegin(), StartFileID
);
356 EndOff
= getLocationOffsetAndFileID(range
.getEnd(), EndFileID
);
357 parentOff
= getLocationOffsetAndFileID(parentIndent
, parentFileID
);
359 if (StartFileID
!= EndFileID
|| StartFileID
!= parentFileID
)
361 if (StartOff
> EndOff
)
364 FileID FID
= StartFileID
;
365 StringRef MB
= SourceMgr
->getBufferData(FID
);
367 unsigned parentLineNo
= SourceMgr
->getLineNumber(FID
, parentOff
) - 1;
368 unsigned startLineNo
= SourceMgr
->getLineNumber(FID
, StartOff
) - 1;
369 unsigned endLineNo
= SourceMgr
->getLineNumber(FID
, EndOff
) - 1;
371 const SrcMgr::ContentCache
*Content
=
372 &SourceMgr
->getSLocEntry(FID
).getFile().getContentCache();
374 // Find where the lines start.
375 unsigned parentLineOffs
= Content
->SourceLineCache
[parentLineNo
];
376 unsigned startLineOffs
= Content
->SourceLineCache
[startLineNo
];
378 // Find the whitespace at the start of each line.
379 StringRef parentSpace
, startSpace
;
381 unsigned i
= parentLineOffs
;
382 while (isWhitespaceExceptNL(MB
[i
]))
384 parentSpace
= MB
.substr(parentLineOffs
, i
-parentLineOffs
);
387 while (isWhitespaceExceptNL(MB
[i
]))
389 startSpace
= MB
.substr(startLineOffs
, i
-startLineOffs
);
391 if (parentSpace
.size() >= startSpace
.size())
393 if (!startSpace
.startswith(parentSpace
))
396 StringRef indent
= startSpace
.substr(parentSpace
.size());
398 // Indent the lines between start/end offsets.
399 RewriteBuffer
&RB
= getEditBuffer(FID
);
400 for (unsigned lineNo
= startLineNo
; lineNo
<= endLineNo
; ++lineNo
) {
401 unsigned offs
= Content
->SourceLineCache
[lineNo
];
403 while (isWhitespaceExceptNL(MB
[i
]))
405 StringRef origIndent
= MB
.substr(offs
, i
-offs
);
406 if (origIndent
.startswith(startSpace
))
407 RB
.InsertText(offs
, indent
, /*InsertAfter=*/false);
415 // A wrapper for a file stream that atomically overwrites the target.
417 // Creates a file output stream for a temporary file in the constructor,
418 // which is later accessible via getStream() if ok() return true.
419 // Flushes the stream and moves the temporary file to the target location
420 // in the destructor.
421 class AtomicallyMovedFile
{
423 AtomicallyMovedFile(DiagnosticsEngine
&Diagnostics
, StringRef Filename
,
425 : Diagnostics(Diagnostics
), Filename(Filename
), AllWritten(AllWritten
) {
426 TempFilename
= Filename
;
427 TempFilename
+= "-%%%%%%%%";
429 if (llvm::sys::fs::createUniqueFile(TempFilename
, FD
, TempFilename
)) {
431 Diagnostics
.Report(clang::diag::err_unable_to_make_temp
)
434 FileStream
.reset(new llvm::raw_fd_ostream(FD
, /*shouldClose=*/true));
438 ~AtomicallyMovedFile() {
441 // Close (will also flush) theFileStream.
443 if (std::error_code ec
= llvm::sys::fs::rename(TempFilename
, Filename
)) {
445 Diagnostics
.Report(clang::diag::err_unable_to_rename_temp
)
446 << TempFilename
<< Filename
<< ec
.message();
447 // If the remove fails, there's not a lot we can do - this is already an
449 llvm::sys::fs::remove(TempFilename
);
453 bool ok() { return (bool)FileStream
; }
454 raw_ostream
&getStream() { return *FileStream
; }
457 DiagnosticsEngine
&Diagnostics
;
459 SmallString
<128> TempFilename
;
460 std::unique_ptr
<llvm::raw_fd_ostream
> FileStream
;
466 bool Rewriter::overwriteChangedFiles() {
467 bool AllWritten
= true;
468 for (buffer_iterator I
= buffer_begin(), E
= buffer_end(); I
!= E
; ++I
) {
469 const FileEntry
*Entry
=
470 getSourceMgr().getFileEntryForID(I
->first
);
471 AtomicallyMovedFile
File(getSourceMgr().getDiagnostics(), Entry
->getName(),
474 I
->second
.write(File
.getStream());