1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "support/Logger.h"
10 #include "clang/Basic/SourceManager.h"
11 #include "clang/Format/Format.h"
12 #include "clang/Lex/Lexer.h"
13 #include "clang/Tooling/Core/Replacement.h"
14 #include "llvm/Support/Unicode.h"
20 /// Append closing brackets )]} to \p Code to make it well-formed.
21 /// Clang-format conservatively refuses to format files with unmatched brackets
22 /// as it isn't sure where the errors are and so can't correct.
23 /// When editing, it's reasonable to assume code before the cursor is complete.
24 void closeBrackets(std::string
&Code
, const format::FormatStyle
&Style
) {
25 SourceManagerForFile
FileSM("mock_file.cpp", Code
);
26 auto &SM
= FileSM
.get();
27 FileID FID
= SM
.getMainFileID();
28 LangOptions LangOpts
= format::getFormattingLangOpts(Style
);
29 Lexer
Lex(FID
, SM
.getBufferOrFake(FID
), SM
, LangOpts
);
31 std::vector
<char> Brackets
;
32 while (!Lex
.LexFromRawLexer(Tok
)) {
33 switch(Tok
.getKind()) {
35 Brackets
.push_back(')');
38 Brackets
.push_back('}');
41 Brackets
.push_back(']');
44 if (!Brackets
.empty() && Brackets
.back() == ')')
48 if (!Brackets
.empty() && Brackets
.back() == '}')
52 if (!Brackets
.empty() && Brackets
.back() == ']')
59 // Attempt to end any open comments first.
60 Code
.append("\n// */\n");
61 Code
.append(Brackets
.rbegin(), Brackets
.rend());
64 static StringRef
commentMarker(llvm::StringRef Line
) {
65 for (StringRef Marker
: {"///", "//"}){
66 auto I
= Line
.rfind(Marker
);
67 if (I
!= StringRef::npos
)
68 return Line
.substr(I
, Marker
.size());
73 llvm::StringRef
firstLine(llvm::StringRef Code
) {
74 return Code
.take_until([](char C
) { return C
== '\n'; });
77 llvm::StringRef
lastLine(llvm::StringRef Code
) {
78 llvm::StringRef Rest
= Code
;
79 while (!Rest
.empty() && Rest
.back() != '\n')
80 Rest
= Rest
.drop_back();
81 return Code
.substr(Rest
.size());
84 // Filename is needed for tooling::Replacement and some overloads of reformat().
85 // Its value should not affect the outcome. We use the default from reformat().
86 llvm::StringRef Filename
= "<stdin>";
88 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
89 tooling::Replacement
replacement(llvm::StringRef Code
, llvm::StringRef From
,
91 assert(From
.begin() >= Code
.begin() && From
.end() <= Code
.end());
92 // The filename is required but ignored.
93 return tooling::Replacement(Filename
, From
.data() - Code
.data(),
97 // High-level representation of incremental formatting changes.
98 // The changes are made in two steps.
99 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100 // comment markers when splitting a line comment with a newline).
101 // 2) a selective clang-format run:
102 // - the "source code" passed to clang format is the code up to the cursor,
103 // a placeholder for the cursor, and some closing brackets
104 // - the formatting is restricted to the cursor and (possibly) other ranges
105 // (e.g. the old line when inserting a newline).
106 // - changes before the cursor are applied, those after are discarded.
107 struct IncrementalChanges
{
108 // Changes that should be applied before running clang-format.
109 tooling::Replacements Changes
;
110 // Ranges of the original source code that should be clang-formatted.
111 // The CursorProxyText will also be formatted.
112 std::vector
<tooling::Range
> FormatRanges
;
113 // The source code that should stand in for the cursor when clang-formatting.
114 // e.g. after inserting a newline, a line-comment at the cursor is used to
115 // ensure that the newline is preserved.
116 std::string CursorPlaceholder
;
119 // The two functions below, columnWidth() and columnWidthWithTabs(), were
120 // adapted from similar functions in clang/lib/Format/Encoding.h.
121 // FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
123 // Helper function for columnWidthWithTabs().
124 inline unsigned columnWidth(StringRef Text
) {
125 int ContentWidth
= llvm::sys::unicode::columnWidthUTF8(Text
);
126 if (ContentWidth
< 0)
127 return Text
.size(); // fallback for unprintable characters
131 // Returns the number of columns required to display the \p Text on a terminal
132 // with the \p TabWidth.
133 inline unsigned columnWidthWithTabs(StringRef Text
, unsigned TabWidth
) {
134 unsigned TotalWidth
= 0;
135 StringRef Tail
= Text
;
137 StringRef::size_type TabPos
= Tail
.find('\t');
138 if (TabPos
== StringRef::npos
)
139 return TotalWidth
+ columnWidth(Tail
);
140 TotalWidth
+= columnWidth(Tail
.substr(0, TabPos
));
142 TotalWidth
+= TabWidth
- TotalWidth
% TabWidth
;
143 Tail
= Tail
.substr(TabPos
+ 1);
148 // - we continue any line-comment that was split
149 // - we format the old line in addition to the cursor
150 // - we represent the cursor with a line comment to preserve the newline
151 IncrementalChanges
getIncrementalChangesAfterNewline(llvm::StringRef Code
,
154 IncrementalChanges Result
;
155 // Before newline, code looked like:
157 // After newline, code looks like:
159 // indentation^trailing
160 // Where indentation was added by the editor.
161 StringRef Trailing
= firstLine(Code
.substr(Cursor
));
162 StringRef Indentation
= lastLine(Code
.take_front(Cursor
));
163 if (Indentation
.data() == Code
.data()) {
164 vlog("Typed a newline, but we're still on the first line!");
168 lastLine(Code
.take_front(Indentation
.data() - Code
.data() - 1));
169 StringRef NextLine
= firstLine(Code
.substr(Cursor
+ Trailing
.size() + 1));
171 // Strip leading whitespace on trailing line.
172 StringRef TrailingTrim
= Trailing
.ltrim();
173 if (unsigned TrailWS
= Trailing
.size() - TrailingTrim
.size())
174 cantFail(Result
.Changes
.add(
175 replacement(Code
, StringRef(Trailing
.begin(), TrailWS
), "")));
177 // If we split a comment, replace indentation with a comment marker.
178 // If the editor made the new line a comment, also respect that.
179 StringRef CommentMarker
= commentMarker(Leading
);
180 bool NewLineIsComment
= !commentMarker(Indentation
).empty();
181 if (!CommentMarker
.empty() &&
182 (NewLineIsComment
|| !commentMarker(NextLine
).empty() ||
183 (!TrailingTrim
.empty() && !TrailingTrim
.startswith("//")))) {
184 // We indent the new comment to match the previous one.
185 StringRef PreComment
=
186 Leading
.take_front(CommentMarker
.data() - Leading
.data());
187 std::string IndentAndComment
=
188 (std::string(columnWidthWithTabs(PreComment
, TabWidth
), ' ') +
192 Result
.Changes
.add(replacement(Code
, Indentation
, IndentAndComment
)));
194 // Remove any indentation and let clang-format re-add it.
195 // This prevents the cursor marker dragging e.g. an aligned comment with it.
196 cantFail(Result
.Changes
.add(replacement(Code
, Indentation
, "")));
199 // If we put a the newline inside a {} pair, put } on its own line...
200 if (CommentMarker
.empty() && Leading
.endswith("{") &&
201 Trailing
.startswith("}")) {
203 Result
.Changes
.add(replacement(Code
, Trailing
.take_front(1), "\n}")));
205 Result
.FormatRanges
.push_back(
206 tooling::Range(Trailing
.data() - Code
.data() + 1, 1));
209 // Format the whole leading line.
210 Result
.FormatRanges
.push_back(
211 tooling::Range(Leading
.data() - Code
.data(), Leading
.size()));
213 // We use a comment to represent the cursor, to preserve the newline.
214 // A trailing identifier improves parsing of e.g. for without braces.
215 // Exception: if the previous line has a trailing comment, we can't use one
216 // as the cursor (they will be aligned). But in this case we don't need to.
217 Result
.CursorPlaceholder
= !CommentMarker
.empty() ? "ident" : "//==\nident";
222 IncrementalChanges
getIncrementalChanges(llvm::StringRef Code
, unsigned Cursor
,
223 llvm::StringRef InsertedText
,
225 IncrementalChanges Result
;
226 if (InsertedText
== "\n")
227 return getIncrementalChangesAfterNewline(Code
, Cursor
, TabWidth
);
229 Result
.CursorPlaceholder
= " /**/";
233 // Returns equivalent replacements that preserve the correspondence between
234 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235 // replacement will be split.
236 std::vector
<tooling::Replacement
>
237 split(const tooling::Replacements
&Replacements
, unsigned OldCursor
,
238 unsigned NewCursor
) {
239 std::vector
<tooling::Replacement
> Result
;
240 int LengthChange
= 0;
241 for (const tooling::Replacement
&R
: Replacements
) {
242 if (R
.getOffset() + R
.getLength() <= OldCursor
) { // before cursor
244 LengthChange
+= R
.getReplacementText().size() - R
.getLength();
245 } else if (R
.getOffset() < OldCursor
) { // overlaps cursor
246 int ReplacementSplit
= NewCursor
- LengthChange
- R
.getOffset();
247 assert(ReplacementSplit
>= 0 &&
248 ReplacementSplit
<= int(R
.getReplacementText().size()) &&
249 "NewCursor incompatible with OldCursor!");
250 Result
.push_back(tooling::Replacement(
251 R
.getFilePath(), R
.getOffset(), OldCursor
- R
.getOffset(),
252 R
.getReplacementText().take_front(ReplacementSplit
)));
253 Result
.push_back(tooling::Replacement(
254 R
.getFilePath(), OldCursor
,
255 R
.getLength() - (OldCursor
- R
.getOffset()),
256 R
.getReplacementText().drop_front(ReplacementSplit
)));
257 } else if (R
.getOffset() >= OldCursor
) { // after cursor
266 // We're simulating the following sequence of changes:
267 // - apply the pre-formatting edits (see getIncrementalChanges)
268 // - insert a placeholder for the cursor
269 // - format some of the resulting code
270 // - remove the cursor placeholder again
271 // The replacements we return are produced by composing these.
273 // The text we actually pass to clang-format is slightly different from this,
274 // e.g. we have to close brackets. We ensure these differences are *after*
275 // all the regions we want to format, and discard changes in them.
276 std::vector
<tooling::Replacement
>
277 formatIncremental(llvm::StringRef OriginalCode
, unsigned OriginalCursor
,
278 llvm::StringRef InsertedText
, format::FormatStyle Style
) {
279 IncrementalChanges Incremental
= getIncrementalChanges(
280 OriginalCode
, OriginalCursor
, InsertedText
, Style
.TabWidth
);
281 // Never *remove* lines in response to pressing enter! This annoys users.
282 if (InsertedText
== "\n") {
283 Style
.MaxEmptyLinesToKeep
= 1000;
284 Style
.KeepEmptyLinesAtTheStartOfBlocks
= true;
287 // Compute the code we want to format:
288 // 1) Start with code after the pre-formatting edits.
289 std::string CodeToFormat
= cantFail(
290 tooling::applyAllReplacements(OriginalCode
, Incremental
.Changes
));
291 unsigned Cursor
= Incremental
.Changes
.getShiftedCodePosition(OriginalCursor
);
292 // 2) Truncate code after the last interesting range.
293 unsigned FormatLimit
= Cursor
;
294 for (tooling::Range
&R
: Incremental
.FormatRanges
)
295 FormatLimit
= std::max(FormatLimit
, R
.getOffset() + R
.getLength());
296 CodeToFormat
.resize(FormatLimit
);
297 // 3) Insert a placeholder for the cursor.
298 CodeToFormat
.insert(Cursor
, Incremental
.CursorPlaceholder
);
299 // 4) Append brackets after FormatLimit so the code is well-formed.
300 closeBrackets(CodeToFormat
, Style
);
302 // Determine the ranges to format:
303 std::vector
<tooling::Range
> RangesToFormat
= Incremental
.FormatRanges
;
304 // Ranges after the cursor need to be adjusted for the placeholder.
305 for (auto &R
: RangesToFormat
) {
306 if (R
.getOffset() > Cursor
)
307 R
= tooling::Range(R
.getOffset() + Incremental
.CursorPlaceholder
.size(),
310 // We also format the cursor.
311 RangesToFormat
.push_back(
312 tooling::Range(Cursor
, Incremental
.CursorPlaceholder
.size()));
313 // Also update FormatLimit for the placeholder, we'll use this later.
314 FormatLimit
+= Incremental
.CursorPlaceholder
.size();
316 // Run clang-format, and truncate changes at FormatLimit.
317 tooling::Replacements FormattingChanges
;
318 format::FormattingAttemptStatus Status
;
319 for (const tooling::Replacement
&R
: format::reformat(
320 Style
, CodeToFormat
, RangesToFormat
, Filename
, &Status
)) {
321 if (R
.getOffset() + R
.getLength() <= FormatLimit
) // Before limit.
322 cantFail(FormattingChanges
.add(R
));
323 else if(R
.getOffset() < FormatLimit
) { // Overlaps limit.
324 if (R
.getReplacementText().empty()) // Deletions are easy to handle.
325 cantFail(FormattingChanges
.add(tooling::Replacement(Filename
,
326 R
.getOffset(), FormatLimit
- R
.getOffset(), "")));
328 // Hopefully won't happen in practice?
329 elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
330 Cursor
, CodeToFormat
);
333 if (!Status
.FormatComplete
)
334 vlog("Incremental format incomplete at line {0}", Status
.Line
);
336 // Now we are ready to compose the changes relative to OriginalCode.
337 // edits -> insert placeholder -> format -> remove placeholder.
338 // We must express insert/remove as Replacements.
339 tooling::Replacements
InsertCursorPlaceholder(
340 tooling::Replacement(Filename
, Cursor
, 0, Incremental
.CursorPlaceholder
));
341 unsigned FormattedCursorStart
=
342 FormattingChanges
.getShiftedCodePosition(Cursor
),
343 FormattedCursorEnd
= FormattingChanges
.getShiftedCodePosition(
344 Cursor
+ Incremental
.CursorPlaceholder
.size());
345 tooling::Replacements
RemoveCursorPlaceholder(
346 tooling::Replacement(Filename
, FormattedCursorStart
,
347 FormattedCursorEnd
- FormattedCursorStart
, ""));
349 // We can't simply merge() and return: tooling::Replacements will combine
350 // adjacent edits left and right of the cursor. This gives the right source
351 // code, but loses information about where the cursor is!
352 // Fortunately, none of the individual passes lose information, so:
353 // - we use merge() to compute the final Replacements
354 // - we chain getShiftedCodePosition() to compute final cursor position
355 // - we split the final Replacements at the cursor position, so that
356 // each Replacement lies either before or after the cursor.
357 tooling::Replacements Final
;
358 unsigned FinalCursor
= OriginalCursor
;
360 std::string FinalCode
= std::string(OriginalCode
);
361 dlog("Initial code: {0}", FinalCode
);
364 std::vector
<std::pair
<const char *, const tooling::Replacements
*>>{
365 {"Pre-formatting changes", &Incremental
.Changes
},
366 {"Insert placeholder", &InsertCursorPlaceholder
},
367 {"clang-format", &FormattingChanges
},
368 {"Remove placeholder", &RemoveCursorPlaceholder
}}) {
369 Final
= Final
.merge(*Pass
.second
);
370 FinalCursor
= Pass
.second
->getShiftedCodePosition(FinalCursor
);
373 cantFail(tooling::applyAllReplacements(FinalCode
, *Pass
.second
));
374 dlog("After {0}:\n{1}^{2}", Pass
.first
,
375 StringRef(FinalCode
).take_front(FinalCursor
),
376 StringRef(FinalCode
).drop_front(FinalCursor
));
379 return split(Final
, OriginalCursor
, FinalCursor
);
383 transformCursorPosition(unsigned Offset
,
384 const std::vector
<tooling::Replacement
> &Replacements
) {
385 unsigned OriginalOffset
= Offset
;
386 for (const auto &R
: Replacements
) {
387 if (R
.getOffset() + R
.getLength() <= OriginalOffset
) {
388 // Replacement is before cursor.
389 Offset
+= R
.getReplacementText().size();
390 Offset
-= R
.getLength();
391 } else if (R
.getOffset() < OriginalOffset
) {
392 // Replacement overlaps cursor.
393 // Preserve position within replacement text, as far as possible.
394 unsigned PositionWithinReplacement
= Offset
- R
.getOffset();
395 if (PositionWithinReplacement
> R
.getReplacementText().size()) {
396 Offset
+= R
.getReplacementText().size();
397 Offset
-= PositionWithinReplacement
;
400 // Replacement after cursor.
401 break; // Replacements are sorted, the rest are also after the cursor.
407 } // namespace clangd