1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Contains implementation of BreakableToken class and classes derived
14 //===----------------------------------------------------------------------===//
16 #include "BreakableToken.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/Format/Format.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/Debug.h"
23 #define DEBUG_TYPE "format-token-breaker"
28 static const char *const Blanks
= " \t\v\f\r";
29 static bool IsBlank(char C
) {
42 static BreakableToken::Split
getCommentSplit(StringRef Text
,
43 unsigned ContentStartColumn
,
46 encoding::Encoding Encoding
) {
47 if (ColumnLimit
<= ContentStartColumn
+ 1)
48 return BreakableToken::Split(StringRef::npos
, 0);
50 unsigned MaxSplit
= ColumnLimit
- ContentStartColumn
+ 1;
51 unsigned MaxSplitBytes
= 0;
53 for (unsigned NumChars
= 0;
54 NumChars
< MaxSplit
&& MaxSplitBytes
< Text
.size();) {
55 unsigned BytesInChar
=
56 encoding::getCodePointNumBytes(Text
[MaxSplitBytes
], Encoding
);
58 encoding::columnWidthWithTabs(Text
.substr(MaxSplitBytes
, BytesInChar
),
59 ContentStartColumn
, TabWidth
, Encoding
);
60 MaxSplitBytes
+= BytesInChar
;
63 StringRef::size_type SpaceOffset
= Text
.find_last_of(Blanks
, MaxSplitBytes
);
64 if (SpaceOffset
== StringRef::npos
||
65 // Don't break at leading whitespace.
66 Text
.find_last_not_of(Blanks
, SpaceOffset
) == StringRef::npos
) {
67 // Make sure that we don't break at leading whitespace that
68 // reaches past MaxSplit.
69 StringRef::size_type FirstNonWhitespace
= Text
.find_first_not_of(Blanks
);
70 if (FirstNonWhitespace
== StringRef::npos
)
71 // If the comment is only whitespace, we cannot split.
72 return BreakableToken::Split(StringRef::npos
, 0);
73 SpaceOffset
= Text
.find_first_of(
74 Blanks
, std::max
<unsigned>(MaxSplitBytes
, FirstNonWhitespace
));
76 if (SpaceOffset
!= StringRef::npos
&& SpaceOffset
!= 0) {
77 StringRef BeforeCut
= Text
.substr(0, SpaceOffset
).rtrim(Blanks
);
78 StringRef AfterCut
= Text
.substr(SpaceOffset
).ltrim(Blanks
);
79 return BreakableToken::Split(BeforeCut
.size(),
80 AfterCut
.begin() - BeforeCut
.end());
82 return BreakableToken::Split(StringRef::npos
, 0);
85 static BreakableToken::Split
86 getStringSplit(StringRef Text
, unsigned UsedColumns
, unsigned ColumnLimit
,
87 unsigned TabWidth
, encoding::Encoding Encoding
) {
88 // FIXME: Reduce unit test case.
90 return BreakableToken::Split(StringRef::npos
, 0);
91 if (ColumnLimit
<= UsedColumns
)
92 return BreakableToken::Split(StringRef::npos
, 0);
93 unsigned MaxSplit
= ColumnLimit
- UsedColumns
;
94 StringRef::size_type SpaceOffset
= 0;
95 StringRef::size_type SlashOffset
= 0;
96 StringRef::size_type WordStartOffset
= 0;
97 StringRef::size_type SplitPoint
= 0;
98 for (unsigned Chars
= 0;;) {
100 if (Text
[0] == '\\') {
101 Advance
= encoding::getEscapeSequenceLength(Text
);
104 Advance
= encoding::getCodePointNumBytes(Text
[0], Encoding
);
105 Chars
+= encoding::columnWidthWithTabs(
106 Text
.substr(0, Advance
), UsedColumns
+ Chars
, TabWidth
, Encoding
);
109 if (Chars
> MaxSplit
|| Text
.size() == Advance
)
112 if (IsBlank(Text
[0]))
113 SpaceOffset
= SplitPoint
;
115 SlashOffset
= SplitPoint
;
116 if (Advance
== 1 && !isAlphanumeric(Text
[0]))
117 WordStartOffset
= SplitPoint
;
119 SplitPoint
+= Advance
;
120 Text
= Text
.substr(Advance
);
123 if (SpaceOffset
!= 0)
124 return BreakableToken::Split(SpaceOffset
+ 1, 0);
125 if (SlashOffset
!= 0)
126 return BreakableToken::Split(SlashOffset
+ 1, 0);
127 if (WordStartOffset
!= 0)
128 return BreakableToken::Split(WordStartOffset
+ 1, 0);
130 return BreakableToken::Split(SplitPoint
, 0);
131 return BreakableToken::Split(StringRef::npos
, 0);
134 unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
136 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
137 unsigned LineIndex
, unsigned Offset
, StringRef::size_type Length
) const {
138 return StartColumn
+ Prefix
.size() + Postfix
.size() +
139 encoding::columnWidthWithTabs(Line
.substr(Offset
, Length
),
140 StartColumn
+ Prefix
.size(),
141 Style
.TabWidth
, Encoding
);
144 BreakableSingleLineToken::BreakableSingleLineToken(
145 const FormatToken
&Tok
, unsigned IndentLevel
, unsigned StartColumn
,
146 StringRef Prefix
, StringRef Postfix
, bool InPPDirective
,
147 encoding::Encoding Encoding
, const FormatStyle
&Style
)
148 : BreakableToken(Tok
, IndentLevel
, InPPDirective
, Encoding
, Style
),
149 StartColumn(StartColumn
), Prefix(Prefix
), Postfix(Postfix
) {
150 assert(Tok
.TokenText
.endswith(Postfix
));
151 Line
= Tok
.TokenText
.substr(
152 Prefix
.size(), Tok
.TokenText
.size() - Prefix
.size() - Postfix
.size());
155 BreakableStringLiteral::BreakableStringLiteral(
156 const FormatToken
&Tok
, unsigned IndentLevel
, unsigned StartColumn
,
157 StringRef Prefix
, StringRef Postfix
, bool InPPDirective
,
158 encoding::Encoding Encoding
, const FormatStyle
&Style
)
159 : BreakableSingleLineToken(Tok
, IndentLevel
, StartColumn
, Prefix
, Postfix
,
160 InPPDirective
, Encoding
, Style
) {}
162 BreakableToken::Split
163 BreakableStringLiteral::getSplit(unsigned LineIndex
, unsigned TailOffset
,
164 unsigned ColumnLimit
) const {
165 return getStringSplit(Line
.substr(TailOffset
),
166 StartColumn
+ Prefix
.size() + Postfix
.size(),
167 ColumnLimit
, Style
.TabWidth
, Encoding
);
170 void BreakableStringLiteral::insertBreak(unsigned LineIndex
,
171 unsigned TailOffset
, Split Split
,
172 WhitespaceManager
&Whitespaces
) {
173 unsigned LeadingSpaces
= StartColumn
;
174 // The '@' of an ObjC string literal (@"Test") does not become part of the
176 // FIXME: It might be a cleaner solution to merge the tokens as a
177 // precomputation step.
178 if (Prefix
.startswith("@"))
180 Whitespaces
.replaceWhitespaceInToken(
181 Tok
, Prefix
.size() + TailOffset
+ Split
.first
, Split
.second
, Postfix
,
182 Prefix
, InPPDirective
, 1, IndentLevel
, LeadingSpaces
);
185 static StringRef
getLineCommentIndentPrefix(StringRef Comment
) {
186 static const char *const KnownPrefixes
[] = { "///", "//" };
187 StringRef LongestPrefix
;
188 for (StringRef KnownPrefix
: KnownPrefixes
) {
189 if (Comment
.startswith(KnownPrefix
)) {
190 size_t PrefixLength
= KnownPrefix
.size();
191 while (PrefixLength
< Comment
.size() && Comment
[PrefixLength
] == ' ')
193 if (PrefixLength
> LongestPrefix
.size())
194 LongestPrefix
= Comment
.substr(0, PrefixLength
);
197 return LongestPrefix
;
200 BreakableLineComment::BreakableLineComment(
201 const FormatToken
&Token
, unsigned IndentLevel
, unsigned StartColumn
,
202 bool InPPDirective
, encoding::Encoding Encoding
, const FormatStyle
&Style
)
203 : BreakableSingleLineToken(Token
, IndentLevel
, StartColumn
,
204 getLineCommentIndentPrefix(Token
.TokenText
), "",
205 InPPDirective
, Encoding
, Style
) {
206 OriginalPrefix
= Prefix
;
207 if (Token
.TokenText
.size() > Prefix
.size() &&
208 isAlphanumeric(Token
.TokenText
[Prefix
.size()])) {
211 else if (Prefix
== "///")
216 BreakableToken::Split
217 BreakableLineComment::getSplit(unsigned LineIndex
, unsigned TailOffset
,
218 unsigned ColumnLimit
) const {
219 return getCommentSplit(Line
.substr(TailOffset
), StartColumn
+ Prefix
.size(),
220 ColumnLimit
, Style
.TabWidth
, Encoding
);
223 void BreakableLineComment::insertBreak(unsigned LineIndex
, unsigned TailOffset
,
225 WhitespaceManager
&Whitespaces
) {
226 Whitespaces
.replaceWhitespaceInToken(
227 Tok
, OriginalPrefix
.size() + TailOffset
+ Split
.first
, Split
.second
,
228 Postfix
, Prefix
, InPPDirective
, /*Newlines=*/1, IndentLevel
, StartColumn
);
231 void BreakableLineComment::replaceWhitespace(unsigned LineIndex
,
232 unsigned TailOffset
, Split Split
,
233 WhitespaceManager
&Whitespaces
) {
234 Whitespaces
.replaceWhitespaceInToken(
235 Tok
, OriginalPrefix
.size() + TailOffset
+ Split
.first
, Split
.second
, "",
236 "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
241 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex
,
242 WhitespaceManager
&Whitespaces
) {
243 if (OriginalPrefix
!= Prefix
) {
244 Whitespaces
.replaceWhitespaceInToken(Tok
, OriginalPrefix
.size(), 0, "", "",
245 /*InPPDirective=*/false,
246 /*Newlines=*/0, /*IndentLevel=*/0,
251 BreakableBlockComment::BreakableBlockComment(
252 const FormatToken
&Token
, unsigned IndentLevel
, unsigned StartColumn
,
253 unsigned OriginalStartColumn
, bool FirstInLine
, bool InPPDirective
,
254 encoding::Encoding Encoding
, const FormatStyle
&Style
)
255 : BreakableToken(Token
, IndentLevel
, InPPDirective
, Encoding
, Style
) {
256 StringRef
TokenText(Token
.TokenText
);
257 assert(TokenText
.startswith("/*") && TokenText
.endswith("*/"));
258 TokenText
.substr(2, TokenText
.size() - 4).split(Lines
, "\n");
260 int IndentDelta
= StartColumn
- OriginalStartColumn
;
261 LeadingWhitespace
.resize(Lines
.size());
262 StartOfLineColumn
.resize(Lines
.size());
263 StartOfLineColumn
[0] = StartColumn
+ 2;
264 for (size_t i
= 1; i
< Lines
.size(); ++i
)
265 adjustWhitespace(i
, IndentDelta
);
268 if (Lines
.size() == 1 && !FirstInLine
) {
269 // Comments for which FirstInLine is false can start on arbitrary column,
270 // and available horizontal space can be too small to align consecutive
271 // lines with the first one.
272 // FIXME: We could, probably, align them to current indentation level, but
273 // now we just wrap them without stars.
276 for (size_t i
= 1, e
= Lines
.size(); i
< e
&& !Decoration
.empty(); ++i
) {
277 // If the last line is empty, the closing "*/" will have a star.
278 if (i
+ 1 == e
&& Lines
[i
].empty())
280 while (!Lines
[i
].startswith(Decoration
))
281 Decoration
= Decoration
.substr(0, Decoration
.size() - 1);
284 LastLineNeedsDecoration
= true;
285 IndentAtLineBreak
= StartOfLineColumn
[0] + 1;
286 for (size_t i
= 1; i
< Lines
.size(); ++i
) {
287 if (Lines
[i
].empty()) {
288 if (i
+ 1 == Lines
.size()) {
289 // Empty last line means that we already have a star as a part of the
290 // trailing */. We also need to preserve whitespace, so that */ is
291 // correctly indented.
292 LastLineNeedsDecoration
= false;
293 } else if (Decoration
.empty()) {
294 // For all other lines, set the start column to 0 if they're empty, so
295 // we do not insert trailing whitespace anywhere.
296 StartOfLineColumn
[i
] = 0;
300 // The first line already excludes the star.
301 // For all other lines, adjust the line to exclude the star and
302 // (optionally) the first whitespace.
303 StartOfLineColumn
[i
] += Decoration
.size();
304 Lines
[i
] = Lines
[i
].substr(Decoration
.size());
305 LeadingWhitespace
[i
] += Decoration
.size();
307 std::min
<int>(IndentAtLineBreak
, std::max(0, StartOfLineColumn
[i
]));
309 IndentAtLineBreak
= std::max
<unsigned>(IndentAtLineBreak
, Decoration
.size());
311 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak
<< "\n";
312 for (size_t i
= 0; i
< Lines
.size(); ++i
) {
313 llvm::dbgs() << i
<< " |" << Lines
[i
] << "| " << LeadingWhitespace
[i
]
319 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex
,
321 // When in a preprocessor directive, the trailing backslash in a block comment
322 // is not needed, but can serve a purpose of uniformity with necessary escaped
323 // newlines outside the comment. In this case we remove it here before
324 // trimming the trailing whitespace. The backslash will be re-added later when
325 // inserting a line break.
326 size_t EndOfPreviousLine
= Lines
[LineIndex
- 1].size();
327 if (InPPDirective
&& Lines
[LineIndex
- 1].endswith("\\"))
330 // Calculate the end of the non-whitespace text in the previous line.
332 Lines
[LineIndex
- 1].find_last_not_of(Blanks
, EndOfPreviousLine
);
333 if (EndOfPreviousLine
== StringRef::npos
)
334 EndOfPreviousLine
= 0;
337 // Calculate the start of the non-whitespace text in the current line.
338 size_t StartOfLine
= Lines
[LineIndex
].find_first_not_of(Blanks
);
339 if (StartOfLine
== StringRef::npos
)
340 StartOfLine
= Lines
[LineIndex
].size();
342 StringRef Whitespace
= Lines
[LineIndex
].substr(0, StartOfLine
);
343 // Adjust Lines to only contain relevant text.
344 Lines
[LineIndex
- 1] = Lines
[LineIndex
- 1].substr(0, EndOfPreviousLine
);
345 Lines
[LineIndex
] = Lines
[LineIndex
].substr(StartOfLine
);
346 // Adjust LeadingWhitespace to account all whitespace between the lines
347 // to the current line.
348 LeadingWhitespace
[LineIndex
] =
349 Lines
[LineIndex
].begin() - Lines
[LineIndex
- 1].end();
351 // Adjust the start column uniformly across all lines.
352 StartOfLineColumn
[LineIndex
] =
353 encoding::columnWidthWithTabs(Whitespace
, 0, Style
.TabWidth
, Encoding
) +
357 unsigned BreakableBlockComment::getLineCount() const { return Lines
.size(); }
359 unsigned BreakableBlockComment::getLineLengthAfterSplit(
360 unsigned LineIndex
, unsigned Offset
, StringRef::size_type Length
) const {
361 unsigned ContentStartColumn
= getContentStartColumn(LineIndex
, Offset
);
362 return ContentStartColumn
+
363 encoding::columnWidthWithTabs(Lines
[LineIndex
].substr(Offset
, Length
),
364 ContentStartColumn
, Style
.TabWidth
,
366 // The last line gets a "*/" postfix.
367 (LineIndex
+ 1 == Lines
.size() ? 2 : 0);
370 BreakableToken::Split
371 BreakableBlockComment::getSplit(unsigned LineIndex
, unsigned TailOffset
,
372 unsigned ColumnLimit
) const {
373 return getCommentSplit(Lines
[LineIndex
].substr(TailOffset
),
374 getContentStartColumn(LineIndex
, TailOffset
),
375 ColumnLimit
, Style
.TabWidth
, Encoding
);
378 void BreakableBlockComment::insertBreak(unsigned LineIndex
, unsigned TailOffset
,
380 WhitespaceManager
&Whitespaces
) {
381 StringRef Text
= Lines
[LineIndex
].substr(TailOffset
);
382 StringRef Prefix
= Decoration
;
383 if (LineIndex
+ 1 == Lines
.size() &&
384 Text
.size() == Split
.first
+ Split
.second
) {
385 // For the last line we need to break before "*/", but not to add "* ".
389 unsigned BreakOffsetInToken
=
390 Text
.data() - Tok
.TokenText
.data() + Split
.first
;
391 unsigned CharsToRemove
= Split
.second
;
392 assert(IndentAtLineBreak
>= Decoration
.size());
393 Whitespaces
.replaceWhitespaceInToken(
394 Tok
, BreakOffsetInToken
, CharsToRemove
, "", Prefix
, InPPDirective
, 1,
395 IndentLevel
, IndentAtLineBreak
- Decoration
.size());
398 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex
,
399 unsigned TailOffset
, Split Split
,
400 WhitespaceManager
&Whitespaces
) {
401 StringRef Text
= Lines
[LineIndex
].substr(TailOffset
);
402 unsigned BreakOffsetInToken
=
403 Text
.data() - Tok
.TokenText
.data() + Split
.first
;
404 unsigned CharsToRemove
= Split
.second
;
405 Whitespaces
.replaceWhitespaceInToken(
406 Tok
, BreakOffsetInToken
, CharsToRemove
, "", "", /*InPPDirective=*/false,
407 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
411 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex
,
412 WhitespaceManager
&Whitespaces
) {
415 StringRef Prefix
= Decoration
;
416 if (Lines
[LineIndex
].empty()) {
417 if (LineIndex
+ 1 == Lines
.size()) {
418 if (!LastLineNeedsDecoration
) {
419 // If the last line was empty, we don't need a prefix, as the */ will
420 // line up with the decoration (if it exists).
423 } else if (!Decoration
.empty()) {
424 // For other empty lines, if we do have a decoration, adapt it to not
425 // contain a trailing whitespace.
426 Prefix
= Prefix
.substr(0, 1);
429 if (StartOfLineColumn
[LineIndex
] == 1) {
430 // This line starts immediately after the decorating *.
431 Prefix
= Prefix
.substr(0, 1);
435 unsigned WhitespaceOffsetInToken
= Lines
[LineIndex
].data() -
436 Tok
.TokenText
.data() -
437 LeadingWhitespace
[LineIndex
];
438 Whitespaces
.replaceWhitespaceInToken(
439 Tok
, WhitespaceOffsetInToken
, LeadingWhitespace
[LineIndex
], "", Prefix
,
440 InPPDirective
, 1, IndentLevel
,
441 StartOfLineColumn
[LineIndex
] - Prefix
.size());
445 BreakableBlockComment::getContentStartColumn(unsigned LineIndex
,
446 unsigned TailOffset
) const {
447 // If we break, we always break at the predefined indent.
449 return IndentAtLineBreak
;
450 return std::max(0, StartOfLineColumn
[LineIndex
]);
453 } // namespace format