Bump version to 19.1.0git
[llvm-project.git] / clang-tools-extra / clangd / Format.cpp
blobfc56a1c8c50304abfaf6bfa607ba3c24334fd85d
1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "support/Logger.h"
10 #include "clang/Basic/SourceManager.h"
11 #include "clang/Format/Format.h"
12 #include "clang/Lex/Lexer.h"
13 #include "clang/Tooling/Core/Replacement.h"
14 #include "llvm/Support/Unicode.h"
16 namespace clang {
17 namespace clangd {
18 namespace {
20 /// Append closing brackets )]} to \p Code to make it well-formed.
21 /// Clang-format conservatively refuses to format files with unmatched brackets
22 /// as it isn't sure where the errors are and so can't correct.
23 /// When editing, it's reasonable to assume code before the cursor is complete.
24 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
25 SourceManagerForFile FileSM("mock_file.cpp", Code);
26 auto &SM = FileSM.get();
27 FileID FID = SM.getMainFileID();
28 LangOptions LangOpts = format::getFormattingLangOpts(Style);
29 Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts);
30 Token Tok;
31 std::vector<char> Brackets;
32 while (!Lex.LexFromRawLexer(Tok)) {
33 switch(Tok.getKind()) {
34 case tok::l_paren:
35 Brackets.push_back(')');
36 break;
37 case tok::l_brace:
38 Brackets.push_back('}');
39 break;
40 case tok::l_square:
41 Brackets.push_back(']');
42 break;
43 case tok::r_paren:
44 if (!Brackets.empty() && Brackets.back() == ')')
45 Brackets.pop_back();
46 break;
47 case tok::r_brace:
48 if (!Brackets.empty() && Brackets.back() == '}')
49 Brackets.pop_back();
50 break;
51 case tok::r_square:
52 if (!Brackets.empty() && Brackets.back() == ']')
53 Brackets.pop_back();
54 break;
55 default:
56 continue;
59 // Attempt to end any open comments first.
60 Code.append("\n// */\n");
61 Code.append(Brackets.rbegin(), Brackets.rend());
64 static StringRef commentMarker(llvm::StringRef Line) {
65 for (StringRef Marker : {"///", "//"}){
66 auto I = Line.rfind(Marker);
67 if (I != StringRef::npos)
68 return Line.substr(I, Marker.size());
70 return "";
73 llvm::StringRef firstLine(llvm::StringRef Code) {
74 return Code.take_until([](char C) { return C == '\n'; });
77 llvm::StringRef lastLine(llvm::StringRef Code) {
78 llvm::StringRef Rest = Code;
79 while (!Rest.empty() && Rest.back() != '\n')
80 Rest = Rest.drop_back();
81 return Code.substr(Rest.size());
84 // Filename is needed for tooling::Replacement and some overloads of reformat().
85 // Its value should not affect the outcome. We use the default from reformat().
86 llvm::StringRef Filename = "<stdin>";
88 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
89 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
90 llvm::StringRef To) {
91 assert(From.begin() >= Code.begin() && From.end() <= Code.end());
92 // The filename is required but ignored.
93 return tooling::Replacement(Filename, From.data() - Code.data(),
94 From.size(), To);
97 // High-level representation of incremental formatting changes.
98 // The changes are made in two steps.
99 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100 // comment markers when splitting a line comment with a newline).
101 // 2) a selective clang-format run:
102 // - the "source code" passed to clang format is the code up to the cursor,
103 // a placeholder for the cursor, and some closing brackets
104 // - the formatting is restricted to the cursor and (possibly) other ranges
105 // (e.g. the old line when inserting a newline).
106 // - changes before the cursor are applied, those after are discarded.
107 struct IncrementalChanges {
108 // Changes that should be applied before running clang-format.
109 tooling::Replacements Changes;
110 // Ranges of the original source code that should be clang-formatted.
111 // The CursorProxyText will also be formatted.
112 std::vector<tooling::Range> FormatRanges;
113 // The source code that should stand in for the cursor when clang-formatting.
114 // e.g. after inserting a newline, a line-comment at the cursor is used to
115 // ensure that the newline is preserved.
116 std::string CursorPlaceholder;
119 // The two functions below, columnWidth() and columnWidthWithTabs(), were
120 // adapted from similar functions in clang/lib/Format/Encoding.h.
121 // FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
123 // Helper function for columnWidthWithTabs().
124 inline unsigned columnWidth(StringRef Text) {
125 int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
126 if (ContentWidth < 0)
127 return Text.size(); // fallback for unprintable characters
128 return ContentWidth;
131 // Returns the number of columns required to display the \p Text on a terminal
132 // with the \p TabWidth.
133 inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) {
134 unsigned TotalWidth = 0;
135 StringRef Tail = Text;
136 for (;;) {
137 StringRef::size_type TabPos = Tail.find('\t');
138 if (TabPos == StringRef::npos)
139 return TotalWidth + columnWidth(Tail);
140 TotalWidth += columnWidth(Tail.substr(0, TabPos));
141 if (TabWidth)
142 TotalWidth += TabWidth - TotalWidth % TabWidth;
143 Tail = Tail.substr(TabPos + 1);
147 // After a newline:
148 // - we continue any line-comment that was split
149 // - we format the old line in addition to the cursor
150 // - we represent the cursor with a line comment to preserve the newline
151 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
152 unsigned Cursor,
153 unsigned TabWidth) {
154 IncrementalChanges Result;
155 // Before newline, code looked like:
156 // leading^trailing
157 // After newline, code looks like:
158 // leading
159 // indentation^trailing
160 // Where indentation was added by the editor.
161 StringRef Trailing = firstLine(Code.substr(Cursor));
162 StringRef Indentation = lastLine(Code.take_front(Cursor));
163 if (Indentation.data() == Code.data()) {
164 vlog("Typed a newline, but we're still on the first line!");
165 return Result;
167 StringRef Leading =
168 lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
169 StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
171 // Strip leading whitespace on trailing line.
172 StringRef TrailingTrim = Trailing.ltrim();
173 if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
174 cantFail(Result.Changes.add(
175 replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
177 // If we split a comment, replace indentation with a comment marker.
178 // If the editor made the new line a comment, also respect that.
179 StringRef CommentMarker = commentMarker(Leading);
180 bool NewLineIsComment = !commentMarker(Indentation).empty();
181 if (!CommentMarker.empty() &&
182 (NewLineIsComment || !commentMarker(NextLine).empty() ||
183 (!TrailingTrim.empty() && !TrailingTrim.starts_with("//")))) {
184 // We indent the new comment to match the previous one.
185 StringRef PreComment =
186 Leading.take_front(CommentMarker.data() - Leading.data());
187 std::string IndentAndComment =
188 (std::string(columnWidthWithTabs(PreComment, TabWidth), ' ') +
189 CommentMarker + " ")
190 .str();
191 cantFail(
192 Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
193 } else {
194 // Remove any indentation and let clang-format re-add it.
195 // This prevents the cursor marker dragging e.g. an aligned comment with it.
196 cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
199 // If we put a the newline inside a {} pair, put } on its own line...
200 if (CommentMarker.empty() && Leading.ends_with("{") &&
201 Trailing.starts_with("}")) {
202 cantFail(
203 Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
204 // ...and format it.
205 Result.FormatRanges.push_back(
206 tooling::Range(Trailing.data() - Code.data() + 1, 1));
209 // Format the whole leading line.
210 Result.FormatRanges.push_back(
211 tooling::Range(Leading.data() - Code.data(), Leading.size()));
213 // We use a comment to represent the cursor, to preserve the newline.
214 // A trailing identifier improves parsing of e.g. for without braces.
215 // Exception: if the previous line has a trailing comment, we can't use one
216 // as the cursor (they will be aligned). But in this case we don't need to.
217 Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
219 return Result;
222 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
223 llvm::StringRef InsertedText,
224 unsigned TabWidth) {
225 IncrementalChanges Result;
226 if (InsertedText == "\n")
227 return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth);
229 Result.CursorPlaceholder = " /**/";
230 return Result;
233 // Returns equivalent replacements that preserve the correspondence between
234 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235 // replacement will be split.
236 std::vector<tooling::Replacement>
237 split(const tooling::Replacements &Replacements, unsigned OldCursor,
238 unsigned NewCursor) {
239 std::vector<tooling::Replacement> Result;
240 int LengthChange = 0;
241 for (const tooling::Replacement &R : Replacements) {
242 if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
243 Result.push_back(R);
244 LengthChange += R.getReplacementText().size() - R.getLength();
245 } else if (R.getOffset() < OldCursor) { // overlaps cursor
246 int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
247 assert(ReplacementSplit >= 0 &&
248 ReplacementSplit <= int(R.getReplacementText().size()) &&
249 "NewCursor incompatible with OldCursor!");
250 Result.push_back(tooling::Replacement(
251 R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
252 R.getReplacementText().take_front(ReplacementSplit)));
253 Result.push_back(tooling::Replacement(
254 R.getFilePath(), OldCursor,
255 R.getLength() - (OldCursor - R.getOffset()),
256 R.getReplacementText().drop_front(ReplacementSplit)));
257 } else if (R.getOffset() >= OldCursor) { // after cursor
258 Result.push_back(R);
261 return Result;
264 } // namespace
266 // We're simulating the following sequence of changes:
267 // - apply the pre-formatting edits (see getIncrementalChanges)
268 // - insert a placeholder for the cursor
269 // - format some of the resulting code
270 // - remove the cursor placeholder again
271 // The replacements we return are produced by composing these.
273 // The text we actually pass to clang-format is slightly different from this,
274 // e.g. we have to close brackets. We ensure these differences are *after*
275 // all the regions we want to format, and discard changes in them.
276 std::vector<tooling::Replacement>
277 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
278 llvm::StringRef InsertedText, format::FormatStyle Style) {
279 IncrementalChanges Incremental = getIncrementalChanges(
280 OriginalCode, OriginalCursor, InsertedText, Style.TabWidth);
281 // Never *remove* lines in response to pressing enter! This annoys users.
282 if (InsertedText == "\n") {
283 Style.MaxEmptyLinesToKeep = 1000;
284 Style.KeepEmptyLines.AtStartOfBlock = true;
287 // Compute the code we want to format:
288 // 1) Start with code after the pre-formatting edits.
289 std::string CodeToFormat = cantFail(
290 tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
291 unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
292 // 2) Truncate code after the last interesting range.
293 unsigned FormatLimit = Cursor;
294 for (tooling::Range &R : Incremental.FormatRanges)
295 FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
296 CodeToFormat.resize(FormatLimit);
297 // 3) Insert a placeholder for the cursor.
298 CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
299 // 4) Append brackets after FormatLimit so the code is well-formed.
300 closeBrackets(CodeToFormat, Style);
302 // Determine the ranges to format:
303 std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
304 // Ranges after the cursor need to be adjusted for the placeholder.
305 for (auto &R : RangesToFormat) {
306 if (R.getOffset() > Cursor)
307 R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
308 R.getLength());
310 // We also format the cursor.
311 RangesToFormat.push_back(
312 tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
313 // Also update FormatLimit for the placeholder, we'll use this later.
314 FormatLimit += Incremental.CursorPlaceholder.size();
316 // Run clang-format, and truncate changes at FormatLimit.
317 tooling::Replacements FormattingChanges;
318 format::FormattingAttemptStatus Status;
319 for (const tooling::Replacement &R : format::reformat(
320 Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
321 if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
322 cantFail(FormattingChanges.add(R));
323 else if(R.getOffset() < FormatLimit) { // Overlaps limit.
324 if (R.getReplacementText().empty()) // Deletions are easy to handle.
325 cantFail(FormattingChanges.add(tooling::Replacement(Filename,
326 R.getOffset(), FormatLimit - R.getOffset(), "")));
327 else
328 // Hopefully won't happen in practice?
329 elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
330 Cursor, CodeToFormat);
333 if (!Status.FormatComplete)
334 vlog("Incremental format incomplete at line {0}", Status.Line);
336 // Now we are ready to compose the changes relative to OriginalCode.
337 // edits -> insert placeholder -> format -> remove placeholder.
338 // We must express insert/remove as Replacements.
339 tooling::Replacements InsertCursorPlaceholder(
340 tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
341 unsigned FormattedCursorStart =
342 FormattingChanges.getShiftedCodePosition(Cursor),
343 FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
344 Cursor + Incremental.CursorPlaceholder.size());
345 tooling::Replacements RemoveCursorPlaceholder(
346 tooling::Replacement(Filename, FormattedCursorStart,
347 FormattedCursorEnd - FormattedCursorStart, ""));
349 // We can't simply merge() and return: tooling::Replacements will combine
350 // adjacent edits left and right of the cursor. This gives the right source
351 // code, but loses information about where the cursor is!
352 // Fortunately, none of the individual passes lose information, so:
353 // - we use merge() to compute the final Replacements
354 // - we chain getShiftedCodePosition() to compute final cursor position
355 // - we split the final Replacements at the cursor position, so that
356 // each Replacement lies either before or after the cursor.
357 tooling::Replacements Final;
358 unsigned FinalCursor = OriginalCursor;
359 #ifndef NDEBUG
360 std::string FinalCode = std::string(OriginalCode);
361 dlog("Initial code: {0}", FinalCode);
362 #endif
363 for (auto Pass :
364 std::vector<std::pair<const char *, const tooling::Replacements *>>{
365 {"Pre-formatting changes", &Incremental.Changes},
366 {"Insert placeholder", &InsertCursorPlaceholder},
367 {"clang-format", &FormattingChanges},
368 {"Remove placeholder", &RemoveCursorPlaceholder}}) {
369 Final = Final.merge(*Pass.second);
370 FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
371 #ifndef NDEBUG
372 FinalCode =
373 cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
374 dlog("After {0}:\n{1}^{2}", Pass.first,
375 StringRef(FinalCode).take_front(FinalCursor),
376 StringRef(FinalCode).drop_front(FinalCursor));
377 #endif
379 return split(Final, OriginalCursor, FinalCursor);
382 unsigned
383 transformCursorPosition(unsigned Offset,
384 const std::vector<tooling::Replacement> &Replacements) {
385 unsigned OriginalOffset = Offset;
386 for (const auto &R : Replacements) {
387 if (R.getOffset() + R.getLength() <= OriginalOffset) {
388 // Replacement is before cursor.
389 Offset += R.getReplacementText().size();
390 Offset -= R.getLength();
391 } else if (R.getOffset() < OriginalOffset) {
392 // Replacement overlaps cursor.
393 // Preserve position within replacement text, as far as possible.
394 unsigned PositionWithinReplacement = Offset - R.getOffset();
395 if (PositionWithinReplacement > R.getReplacementText().size()) {
396 Offset += R.getReplacementText().size();
397 Offset -= PositionWithinReplacement;
399 } else {
400 // Replacement after cursor.
401 break; // Replacements are sorted, the rest are also after the cursor.
404 return Offset;
407 } // namespace clangd
408 } // namespace clang