1 //===--- Format.cpp - Format C++ code -------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
14 //===----------------------------------------------------------------------===//
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineFormatter.h"
19 #include "UnwrappedLineParser.h"
20 #include "WhitespaceManager.h"
21 #include "clang/Basic/Diagnostic.h"
22 #include "clang/Basic/DiagnosticOptions.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
34 #define DEBUG_TYPE "format-formatter"
36 using clang::format::FormatStyle
;
38 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string
)
42 template <> struct ScalarEnumerationTraits
<FormatStyle::LanguageKind
> {
43 static void enumeration(IO
&IO
, FormatStyle::LanguageKind
&Value
) {
44 IO
.enumCase(Value
, "Cpp", FormatStyle::LK_Cpp
);
45 IO
.enumCase(Value
, "Java", FormatStyle::LK_Java
);
46 IO
.enumCase(Value
, "JavaScript", FormatStyle::LK_JavaScript
);
47 IO
.enumCase(Value
, "Proto", FormatStyle::LK_Proto
);
51 template <> struct ScalarEnumerationTraits
<FormatStyle::LanguageStandard
> {
52 static void enumeration(IO
&IO
, FormatStyle::LanguageStandard
&Value
) {
53 IO
.enumCase(Value
, "Cpp03", FormatStyle::LS_Cpp03
);
54 IO
.enumCase(Value
, "C++03", FormatStyle::LS_Cpp03
);
55 IO
.enumCase(Value
, "Cpp11", FormatStyle::LS_Cpp11
);
56 IO
.enumCase(Value
, "C++11", FormatStyle::LS_Cpp11
);
57 IO
.enumCase(Value
, "Auto", FormatStyle::LS_Auto
);
61 template <> struct ScalarEnumerationTraits
<FormatStyle::UseTabStyle
> {
62 static void enumeration(IO
&IO
, FormatStyle::UseTabStyle
&Value
) {
63 IO
.enumCase(Value
, "Never", FormatStyle::UT_Never
);
64 IO
.enumCase(Value
, "false", FormatStyle::UT_Never
);
65 IO
.enumCase(Value
, "Always", FormatStyle::UT_Always
);
66 IO
.enumCase(Value
, "true", FormatStyle::UT_Always
);
67 IO
.enumCase(Value
, "ForIndentation", FormatStyle::UT_ForIndentation
);
71 template <> struct ScalarEnumerationTraits
<FormatStyle::ShortFunctionStyle
> {
72 static void enumeration(IO
&IO
, FormatStyle::ShortFunctionStyle
&Value
) {
73 IO
.enumCase(Value
, "None", FormatStyle::SFS_None
);
74 IO
.enumCase(Value
, "false", FormatStyle::SFS_None
);
75 IO
.enumCase(Value
, "All", FormatStyle::SFS_All
);
76 IO
.enumCase(Value
, "true", FormatStyle::SFS_All
);
77 IO
.enumCase(Value
, "Inline", FormatStyle::SFS_Inline
);
78 IO
.enumCase(Value
, "Empty", FormatStyle::SFS_Empty
);
82 template <> struct ScalarEnumerationTraits
<FormatStyle::BinaryOperatorStyle
> {
83 static void enumeration(IO
&IO
, FormatStyle::BinaryOperatorStyle
&Value
) {
84 IO
.enumCase(Value
, "All", FormatStyle::BOS_All
);
85 IO
.enumCase(Value
, "true", FormatStyle::BOS_All
);
86 IO
.enumCase(Value
, "None", FormatStyle::BOS_None
);
87 IO
.enumCase(Value
, "false", FormatStyle::BOS_None
);
88 IO
.enumCase(Value
, "NonAssignment", FormatStyle::BOS_NonAssignment
);
92 template <> struct ScalarEnumerationTraits
<FormatStyle::BraceBreakingStyle
> {
93 static void enumeration(IO
&IO
, FormatStyle::BraceBreakingStyle
&Value
) {
94 IO
.enumCase(Value
, "Attach", FormatStyle::BS_Attach
);
95 IO
.enumCase(Value
, "Linux", FormatStyle::BS_Linux
);
96 IO
.enumCase(Value
, "Stroustrup", FormatStyle::BS_Stroustrup
);
97 IO
.enumCase(Value
, "Allman", FormatStyle::BS_Allman
);
98 IO
.enumCase(Value
, "GNU", FormatStyle::BS_GNU
);
103 struct ScalarEnumerationTraits
<FormatStyle::NamespaceIndentationKind
> {
104 static void enumeration(IO
&IO
,
105 FormatStyle::NamespaceIndentationKind
&Value
) {
106 IO
.enumCase(Value
, "None", FormatStyle::NI_None
);
107 IO
.enumCase(Value
, "Inner", FormatStyle::NI_Inner
);
108 IO
.enumCase(Value
, "All", FormatStyle::NI_All
);
113 struct ScalarEnumerationTraits
<FormatStyle::PointerAlignmentStyle
> {
114 static void enumeration(IO
&IO
,
115 FormatStyle::PointerAlignmentStyle
&Value
) {
116 IO
.enumCase(Value
, "Middle", FormatStyle::PAS_Middle
);
117 IO
.enumCase(Value
, "Left", FormatStyle::PAS_Left
);
118 IO
.enumCase(Value
, "Right", FormatStyle::PAS_Right
);
120 // For backward compatibility.
121 IO
.enumCase(Value
, "true", FormatStyle::PAS_Left
);
122 IO
.enumCase(Value
, "false", FormatStyle::PAS_Right
);
127 struct ScalarEnumerationTraits
<FormatStyle::SpaceBeforeParensOptions
> {
128 static void enumeration(IO
&IO
,
129 FormatStyle::SpaceBeforeParensOptions
&Value
) {
130 IO
.enumCase(Value
, "Never", FormatStyle::SBPO_Never
);
131 IO
.enumCase(Value
, "ControlStatements",
132 FormatStyle::SBPO_ControlStatements
);
133 IO
.enumCase(Value
, "Always", FormatStyle::SBPO_Always
);
135 // For backward compatibility.
136 IO
.enumCase(Value
, "false", FormatStyle::SBPO_Never
);
137 IO
.enumCase(Value
, "true", FormatStyle::SBPO_ControlStatements
);
141 template <> struct MappingTraits
<FormatStyle
> {
142 static void mapping(IO
&IO
, FormatStyle
&Style
) {
143 // When reading, read the language first, we need it for getPredefinedStyle.
144 IO
.mapOptional("Language", Style
.Language
);
146 if (IO
.outputting()) {
147 StringRef StylesArray
[] = { "LLVM", "Google", "Chromium",
148 "Mozilla", "WebKit", "GNU" };
149 ArrayRef
<StringRef
> Styles(StylesArray
);
150 for (size_t i
= 0, e
= Styles
.size(); i
< e
; ++i
) {
151 StringRef
StyleName(Styles
[i
]);
152 FormatStyle PredefinedStyle
;
153 if (getPredefinedStyle(StyleName
, Style
.Language
, &PredefinedStyle
) &&
154 Style
== PredefinedStyle
) {
155 IO
.mapOptional("# BasedOnStyle", StyleName
);
160 StringRef BasedOnStyle
;
161 IO
.mapOptional("BasedOnStyle", BasedOnStyle
);
162 if (!BasedOnStyle
.empty()) {
163 FormatStyle::LanguageKind OldLanguage
= Style
.Language
;
164 FormatStyle::LanguageKind Language
=
165 ((FormatStyle
*)IO
.getContext())->Language
;
166 if (!getPredefinedStyle(BasedOnStyle
, Language
, &Style
)) {
167 IO
.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle
));
170 Style
.Language
= OldLanguage
;
174 IO
.mapOptional("AccessModifierOffset", Style
.AccessModifierOffset
);
175 IO
.mapOptional("AlignAfterOpenBracket", Style
.AlignAfterOpenBracket
);
176 IO
.mapOptional("AlignEscapedNewlinesLeft", Style
.AlignEscapedNewlinesLeft
);
177 IO
.mapOptional("AlignOperands", Style
.AlignOperands
);
178 IO
.mapOptional("AlignTrailingComments", Style
.AlignTrailingComments
);
179 IO
.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
180 Style
.AllowAllParametersOfDeclarationOnNextLine
);
181 IO
.mapOptional("AllowShortBlocksOnASingleLine",
182 Style
.AllowShortBlocksOnASingleLine
);
183 IO
.mapOptional("AllowShortCaseLabelsOnASingleLine",
184 Style
.AllowShortCaseLabelsOnASingleLine
);
185 IO
.mapOptional("AllowShortIfStatementsOnASingleLine",
186 Style
.AllowShortIfStatementsOnASingleLine
);
187 IO
.mapOptional("AllowShortLoopsOnASingleLine",
188 Style
.AllowShortLoopsOnASingleLine
);
189 IO
.mapOptional("AllowShortFunctionsOnASingleLine",
190 Style
.AllowShortFunctionsOnASingleLine
);
191 IO
.mapOptional("AlwaysBreakAfterDefinitionReturnType",
192 Style
.AlwaysBreakAfterDefinitionReturnType
);
193 IO
.mapOptional("AlwaysBreakTemplateDeclarations",
194 Style
.AlwaysBreakTemplateDeclarations
);
195 IO
.mapOptional("AlwaysBreakBeforeMultilineStrings",
196 Style
.AlwaysBreakBeforeMultilineStrings
);
197 IO
.mapOptional("BreakBeforeBinaryOperators",
198 Style
.BreakBeforeBinaryOperators
);
199 IO
.mapOptional("BreakBeforeTernaryOperators",
200 Style
.BreakBeforeTernaryOperators
);
201 IO
.mapOptional("BreakConstructorInitializersBeforeComma",
202 Style
.BreakConstructorInitializersBeforeComma
);
203 IO
.mapOptional("BinPackParameters", Style
.BinPackParameters
);
204 IO
.mapOptional("BinPackArguments", Style
.BinPackArguments
);
205 IO
.mapOptional("ColumnLimit", Style
.ColumnLimit
);
206 IO
.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
207 Style
.ConstructorInitializerAllOnOneLineOrOnePerLine
);
208 IO
.mapOptional("ConstructorInitializerIndentWidth",
209 Style
.ConstructorInitializerIndentWidth
);
210 IO
.mapOptional("DerivePointerAlignment", Style
.DerivePointerAlignment
);
211 IO
.mapOptional("ExperimentalAutoDetectBinPacking",
212 Style
.ExperimentalAutoDetectBinPacking
);
213 IO
.mapOptional("IndentCaseLabels", Style
.IndentCaseLabels
);
214 IO
.mapOptional("IndentWrappedFunctionNames",
215 Style
.IndentWrappedFunctionNames
);
216 IO
.mapOptional("IndentFunctionDeclarationAfterType",
217 Style
.IndentWrappedFunctionNames
);
218 IO
.mapOptional("MaxEmptyLinesToKeep", Style
.MaxEmptyLinesToKeep
);
219 IO
.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
220 Style
.KeepEmptyLinesAtTheStartOfBlocks
);
221 IO
.mapOptional("NamespaceIndentation", Style
.NamespaceIndentation
);
222 IO
.mapOptional("ObjCBlockIndentWidth", Style
.ObjCBlockIndentWidth
);
223 IO
.mapOptional("ObjCSpaceAfterProperty", Style
.ObjCSpaceAfterProperty
);
224 IO
.mapOptional("ObjCSpaceBeforeProtocolList",
225 Style
.ObjCSpaceBeforeProtocolList
);
226 IO
.mapOptional("PenaltyBreakBeforeFirstCallParameter",
227 Style
.PenaltyBreakBeforeFirstCallParameter
);
228 IO
.mapOptional("PenaltyBreakComment", Style
.PenaltyBreakComment
);
229 IO
.mapOptional("PenaltyBreakString", Style
.PenaltyBreakString
);
230 IO
.mapOptional("PenaltyBreakFirstLessLess",
231 Style
.PenaltyBreakFirstLessLess
);
232 IO
.mapOptional("PenaltyExcessCharacter", Style
.PenaltyExcessCharacter
);
233 IO
.mapOptional("PenaltyReturnTypeOnItsOwnLine",
234 Style
.PenaltyReturnTypeOnItsOwnLine
);
235 IO
.mapOptional("PointerAlignment", Style
.PointerAlignment
);
236 IO
.mapOptional("SpacesBeforeTrailingComments",
237 Style
.SpacesBeforeTrailingComments
);
238 IO
.mapOptional("Cpp11BracedListStyle", Style
.Cpp11BracedListStyle
);
239 IO
.mapOptional("Standard", Style
.Standard
);
240 IO
.mapOptional("IndentWidth", Style
.IndentWidth
);
241 IO
.mapOptional("TabWidth", Style
.TabWidth
);
242 IO
.mapOptional("UseTab", Style
.UseTab
);
243 IO
.mapOptional("BreakBeforeBraces", Style
.BreakBeforeBraces
);
244 IO
.mapOptional("SpacesInParentheses", Style
.SpacesInParentheses
);
245 IO
.mapOptional("SpacesInSquareBrackets", Style
.SpacesInSquareBrackets
);
246 IO
.mapOptional("SpacesInAngles", Style
.SpacesInAngles
);
247 IO
.mapOptional("SpaceInEmptyParentheses", Style
.SpaceInEmptyParentheses
);
248 IO
.mapOptional("SpacesInCStyleCastParentheses",
249 Style
.SpacesInCStyleCastParentheses
);
250 IO
.mapOptional("SpaceAfterCStyleCast", Style
.SpaceAfterCStyleCast
);
251 IO
.mapOptional("SpacesInContainerLiterals",
252 Style
.SpacesInContainerLiterals
);
253 IO
.mapOptional("SpaceBeforeAssignmentOperators",
254 Style
.SpaceBeforeAssignmentOperators
);
255 IO
.mapOptional("ContinuationIndentWidth", Style
.ContinuationIndentWidth
);
256 IO
.mapOptional("CommentPragmas", Style
.CommentPragmas
);
257 IO
.mapOptional("ForEachMacros", Style
.ForEachMacros
);
259 // For backward compatibility.
260 if (!IO
.outputting()) {
261 IO
.mapOptional("SpaceAfterControlStatementKeyword",
262 Style
.SpaceBeforeParens
);
263 IO
.mapOptional("PointerBindsToType", Style
.PointerAlignment
);
264 IO
.mapOptional("DerivePointerBinding", Style
.DerivePointerAlignment
);
266 IO
.mapOptional("SpaceBeforeParens", Style
.SpaceBeforeParens
);
267 IO
.mapOptional("DisableFormat", Style
.DisableFormat
);
271 // Allows to read vector<FormatStyle> while keeping default values.
272 // IO.getContext() should contain a pointer to the FormatStyle structure, that
273 // will be used to get default values for missing keys.
274 // If the first element has no Language specified, it will be treated as the
275 // default one for the following elements.
276 template <> struct DocumentListTraits
<std::vector
<FormatStyle
> > {
277 static size_t size(IO
&IO
, std::vector
<FormatStyle
> &Seq
) {
280 static FormatStyle
&element(IO
&IO
, std::vector
<FormatStyle
> &Seq
,
282 if (Index
>= Seq
.size()) {
283 assert(Index
== Seq
.size());
284 FormatStyle Template
;
285 if (Seq
.size() > 0 && Seq
[0].Language
== FormatStyle::LK_None
) {
288 Template
= *((const FormatStyle
*)IO
.getContext());
289 Template
.Language
= FormatStyle::LK_None
;
291 Seq
.resize(Index
+ 1, Template
);
302 const std::error_category
&getParseCategory() {
303 static ParseErrorCategory C
;
306 std::error_code
make_error_code(ParseError e
) {
307 return std::error_code(static_cast<int>(e
), getParseCategory());
310 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT
{
311 return "clang-format.parse_error";
314 std::string
ParseErrorCategory::message(int EV
) const {
315 switch (static_cast<ParseError
>(EV
)) {
316 case ParseError::Success
:
318 case ParseError::Error
:
319 return "Invalid argument";
320 case ParseError::Unsuitable
:
323 llvm_unreachable("unexpected parse error");
326 FormatStyle
getLLVMStyle() {
327 FormatStyle LLVMStyle
;
328 LLVMStyle
.Language
= FormatStyle::LK_Cpp
;
329 LLVMStyle
.AccessModifierOffset
= -2;
330 LLVMStyle
.AlignEscapedNewlinesLeft
= false;
331 LLVMStyle
.AlignAfterOpenBracket
= true;
332 LLVMStyle
.AlignOperands
= true;
333 LLVMStyle
.AlignTrailingComments
= true;
334 LLVMStyle
.AllowAllParametersOfDeclarationOnNextLine
= true;
335 LLVMStyle
.AllowShortFunctionsOnASingleLine
= FormatStyle::SFS_All
;
336 LLVMStyle
.AllowShortBlocksOnASingleLine
= false;
337 LLVMStyle
.AllowShortCaseLabelsOnASingleLine
= false;
338 LLVMStyle
.AllowShortIfStatementsOnASingleLine
= false;
339 LLVMStyle
.AllowShortLoopsOnASingleLine
= false;
340 LLVMStyle
.AlwaysBreakAfterDefinitionReturnType
= false;
341 LLVMStyle
.AlwaysBreakBeforeMultilineStrings
= false;
342 LLVMStyle
.AlwaysBreakTemplateDeclarations
= false;
343 LLVMStyle
.BinPackParameters
= true;
344 LLVMStyle
.BinPackArguments
= true;
345 LLVMStyle
.BreakBeforeBinaryOperators
= FormatStyle::BOS_None
;
346 LLVMStyle
.BreakBeforeTernaryOperators
= true;
347 LLVMStyle
.BreakBeforeBraces
= FormatStyle::BS_Attach
;
348 LLVMStyle
.BreakConstructorInitializersBeforeComma
= false;
349 LLVMStyle
.ColumnLimit
= 80;
350 LLVMStyle
.CommentPragmas
= "^ IWYU pragma:";
351 LLVMStyle
.ConstructorInitializerAllOnOneLineOrOnePerLine
= false;
352 LLVMStyle
.ConstructorInitializerIndentWidth
= 4;
353 LLVMStyle
.ContinuationIndentWidth
= 4;
354 LLVMStyle
.Cpp11BracedListStyle
= true;
355 LLVMStyle
.DerivePointerAlignment
= false;
356 LLVMStyle
.ExperimentalAutoDetectBinPacking
= false;
357 LLVMStyle
.ForEachMacros
.push_back("foreach");
358 LLVMStyle
.ForEachMacros
.push_back("Q_FOREACH");
359 LLVMStyle
.ForEachMacros
.push_back("BOOST_FOREACH");
360 LLVMStyle
.IndentCaseLabels
= false;
361 LLVMStyle
.IndentWrappedFunctionNames
= false;
362 LLVMStyle
.IndentWidth
= 2;
363 LLVMStyle
.TabWidth
= 8;
364 LLVMStyle
.MaxEmptyLinesToKeep
= 1;
365 LLVMStyle
.KeepEmptyLinesAtTheStartOfBlocks
= true;
366 LLVMStyle
.NamespaceIndentation
= FormatStyle::NI_None
;
367 LLVMStyle
.ObjCBlockIndentWidth
= 2;
368 LLVMStyle
.ObjCSpaceAfterProperty
= false;
369 LLVMStyle
.ObjCSpaceBeforeProtocolList
= true;
370 LLVMStyle
.PointerAlignment
= FormatStyle::PAS_Right
;
371 LLVMStyle
.SpacesBeforeTrailingComments
= 1;
372 LLVMStyle
.Standard
= FormatStyle::LS_Cpp11
;
373 LLVMStyle
.UseTab
= FormatStyle::UT_Never
;
374 LLVMStyle
.SpacesInParentheses
= false;
375 LLVMStyle
.SpacesInSquareBrackets
= false;
376 LLVMStyle
.SpaceInEmptyParentheses
= false;
377 LLVMStyle
.SpacesInContainerLiterals
= true;
378 LLVMStyle
.SpacesInCStyleCastParentheses
= false;
379 LLVMStyle
.SpaceAfterCStyleCast
= false;
380 LLVMStyle
.SpaceBeforeParens
= FormatStyle::SBPO_ControlStatements
;
381 LLVMStyle
.SpaceBeforeAssignmentOperators
= true;
382 LLVMStyle
.SpacesInAngles
= false;
384 LLVMStyle
.PenaltyBreakComment
= 300;
385 LLVMStyle
.PenaltyBreakFirstLessLess
= 120;
386 LLVMStyle
.PenaltyBreakString
= 1000;
387 LLVMStyle
.PenaltyExcessCharacter
= 1000000;
388 LLVMStyle
.PenaltyReturnTypeOnItsOwnLine
= 60;
389 LLVMStyle
.PenaltyBreakBeforeFirstCallParameter
= 19;
391 LLVMStyle
.DisableFormat
= false;
396 FormatStyle
getGoogleStyle(FormatStyle::LanguageKind Language
) {
397 FormatStyle GoogleStyle
= getLLVMStyle();
398 GoogleStyle
.Language
= Language
;
400 GoogleStyle
.AccessModifierOffset
= -1;
401 GoogleStyle
.AlignEscapedNewlinesLeft
= true;
402 GoogleStyle
.AllowShortIfStatementsOnASingleLine
= true;
403 GoogleStyle
.AllowShortLoopsOnASingleLine
= true;
404 GoogleStyle
.AlwaysBreakBeforeMultilineStrings
= true;
405 GoogleStyle
.AlwaysBreakTemplateDeclarations
= true;
406 GoogleStyle
.ConstructorInitializerAllOnOneLineOrOnePerLine
= true;
407 GoogleStyle
.DerivePointerAlignment
= true;
408 GoogleStyle
.IndentCaseLabels
= true;
409 GoogleStyle
.KeepEmptyLinesAtTheStartOfBlocks
= false;
410 GoogleStyle
.ObjCSpaceAfterProperty
= false;
411 GoogleStyle
.ObjCSpaceBeforeProtocolList
= false;
412 GoogleStyle
.PointerAlignment
= FormatStyle::PAS_Left
;
413 GoogleStyle
.SpacesBeforeTrailingComments
= 2;
414 GoogleStyle
.Standard
= FormatStyle::LS_Auto
;
416 GoogleStyle
.PenaltyReturnTypeOnItsOwnLine
= 200;
417 GoogleStyle
.PenaltyBreakBeforeFirstCallParameter
= 1;
419 if (Language
== FormatStyle::LK_Java
) {
420 GoogleStyle
.AlignAfterOpenBracket
= false;
421 GoogleStyle
.AlignOperands
= false;
422 GoogleStyle
.AlignTrailingComments
= false;
423 GoogleStyle
.AllowShortFunctionsOnASingleLine
= FormatStyle::SFS_Empty
;
424 GoogleStyle
.AllowShortIfStatementsOnASingleLine
= false;
425 GoogleStyle
.AlwaysBreakBeforeMultilineStrings
= false;
426 GoogleStyle
.BreakBeforeBinaryOperators
= FormatStyle::BOS_NonAssignment
;
427 GoogleStyle
.ColumnLimit
= 100;
428 GoogleStyle
.SpaceAfterCStyleCast
= true;
429 GoogleStyle
.SpacesBeforeTrailingComments
= 1;
430 } else if (Language
== FormatStyle::LK_JavaScript
) {
431 GoogleStyle
.BreakBeforeTernaryOperators
= false;
432 GoogleStyle
.MaxEmptyLinesToKeep
= 3;
433 GoogleStyle
.SpacesInContainerLiterals
= false;
434 GoogleStyle
.AllowShortFunctionsOnASingleLine
= FormatStyle::SFS_Inline
;
435 GoogleStyle
.AlwaysBreakBeforeMultilineStrings
= false;
436 } else if (Language
== FormatStyle::LK_Proto
) {
437 GoogleStyle
.AllowShortFunctionsOnASingleLine
= FormatStyle::SFS_None
;
438 GoogleStyle
.SpacesInContainerLiterals
= false;
444 FormatStyle
getChromiumStyle(FormatStyle::LanguageKind Language
) {
445 FormatStyle ChromiumStyle
= getGoogleStyle(Language
);
446 if (Language
== FormatStyle::LK_Java
) {
447 ChromiumStyle
.AllowShortIfStatementsOnASingleLine
= true;
448 ChromiumStyle
.IndentWidth
= 4;
449 ChromiumStyle
.ContinuationIndentWidth
= 8;
451 ChromiumStyle
.AllowAllParametersOfDeclarationOnNextLine
= false;
452 ChromiumStyle
.AllowShortFunctionsOnASingleLine
= FormatStyle::SFS_Inline
;
453 ChromiumStyle
.AllowShortIfStatementsOnASingleLine
= false;
454 ChromiumStyle
.AllowShortLoopsOnASingleLine
= false;
455 ChromiumStyle
.BinPackParameters
= false;
456 ChromiumStyle
.DerivePointerAlignment
= false;
458 return ChromiumStyle
;
461 FormatStyle
getMozillaStyle() {
462 FormatStyle MozillaStyle
= getLLVMStyle();
463 MozillaStyle
.AllowAllParametersOfDeclarationOnNextLine
= false;
464 MozillaStyle
.Cpp11BracedListStyle
= false;
465 MozillaStyle
.ConstructorInitializerAllOnOneLineOrOnePerLine
= true;
466 MozillaStyle
.DerivePointerAlignment
= true;
467 MozillaStyle
.IndentCaseLabels
= true;
468 MozillaStyle
.ObjCSpaceAfterProperty
= true;
469 MozillaStyle
.ObjCSpaceBeforeProtocolList
= false;
470 MozillaStyle
.PenaltyReturnTypeOnItsOwnLine
= 200;
471 MozillaStyle
.PointerAlignment
= FormatStyle::PAS_Left
;
472 MozillaStyle
.Standard
= FormatStyle::LS_Cpp03
;
476 FormatStyle
getWebKitStyle() {
477 FormatStyle Style
= getLLVMStyle();
478 Style
.AccessModifierOffset
= -4;
479 Style
.AlignAfterOpenBracket
= false;
480 Style
.AlignOperands
= false;
481 Style
.AlignTrailingComments
= false;
482 Style
.BreakBeforeBinaryOperators
= FormatStyle::BOS_All
;
483 Style
.BreakBeforeBraces
= FormatStyle::BS_Stroustrup
;
484 Style
.BreakConstructorInitializersBeforeComma
= true;
485 Style
.Cpp11BracedListStyle
= false;
486 Style
.ColumnLimit
= 0;
487 Style
.IndentWidth
= 4;
488 Style
.NamespaceIndentation
= FormatStyle::NI_Inner
;
489 Style
.ObjCBlockIndentWidth
= 4;
490 Style
.ObjCSpaceAfterProperty
= true;
491 Style
.PointerAlignment
= FormatStyle::PAS_Left
;
492 Style
.Standard
= FormatStyle::LS_Cpp03
;
496 FormatStyle
getGNUStyle() {
497 FormatStyle Style
= getLLVMStyle();
498 Style
.AlwaysBreakAfterDefinitionReturnType
= true;
499 Style
.BreakBeforeBinaryOperators
= FormatStyle::BOS_All
;
500 Style
.BreakBeforeBraces
= FormatStyle::BS_GNU
;
501 Style
.BreakBeforeTernaryOperators
= true;
502 Style
.Cpp11BracedListStyle
= false;
503 Style
.ColumnLimit
= 79;
504 Style
.SpaceBeforeParens
= FormatStyle::SBPO_Always
;
505 Style
.Standard
= FormatStyle::LS_Cpp03
;
509 FormatStyle
getNoStyle() {
510 FormatStyle NoStyle
= getLLVMStyle();
511 NoStyle
.DisableFormat
= true;
515 bool getPredefinedStyle(StringRef Name
, FormatStyle::LanguageKind Language
,
516 FormatStyle
*Style
) {
517 if (Name
.equals_lower("llvm")) {
518 *Style
= getLLVMStyle();
519 } else if (Name
.equals_lower("chromium")) {
520 *Style
= getChromiumStyle(Language
);
521 } else if (Name
.equals_lower("mozilla")) {
522 *Style
= getMozillaStyle();
523 } else if (Name
.equals_lower("google")) {
524 *Style
= getGoogleStyle(Language
);
525 } else if (Name
.equals_lower("webkit")) {
526 *Style
= getWebKitStyle();
527 } else if (Name
.equals_lower("gnu")) {
528 *Style
= getGNUStyle();
529 } else if (Name
.equals_lower("none")) {
530 *Style
= getNoStyle();
535 Style
->Language
= Language
;
539 std::error_code
parseConfiguration(StringRef Text
, FormatStyle
*Style
) {
541 FormatStyle::LanguageKind Language
= Style
->Language
;
542 assert(Language
!= FormatStyle::LK_None
);
543 if (Text
.trim().empty())
544 return make_error_code(ParseError::Error
);
546 std::vector
<FormatStyle
> Styles
;
547 llvm::yaml::Input
Input(Text
);
548 // DocumentListTraits<vector<FormatStyle>> uses the context to get default
549 // values for the fields, keys for which are missing from the configuration.
550 // Mapping also uses the context to get the language to find the correct
552 Input
.setContext(Style
);
555 return Input
.error();
557 for (unsigned i
= 0; i
< Styles
.size(); ++i
) {
558 // Ensures that only the first configuration can skip the Language option.
559 if (Styles
[i
].Language
== FormatStyle::LK_None
&& i
!= 0)
560 return make_error_code(ParseError::Error
);
561 // Ensure that each language is configured at most once.
562 for (unsigned j
= 0; j
< i
; ++j
) {
563 if (Styles
[i
].Language
== Styles
[j
].Language
) {
565 << "Duplicate languages in the config file on positions " << j
566 << " and " << i
<< "\n");
567 return make_error_code(ParseError::Error
);
571 // Look for a suitable configuration starting from the end, so we can
572 // find the configuration for the specific language first, and the default
573 // configuration (which can only be at slot 0) after it.
574 for (int i
= Styles
.size() - 1; i
>= 0; --i
) {
575 if (Styles
[i
].Language
== Language
||
576 Styles
[i
].Language
== FormatStyle::LK_None
) {
578 Style
->Language
= Language
;
579 return make_error_code(ParseError::Success
);
582 return make_error_code(ParseError::Unsuitable
);
585 std::string
configurationAsText(const FormatStyle
&Style
) {
587 llvm::raw_string_ostream
Stream(Text
);
588 llvm::yaml::Output
Output(Stream
);
589 // We use the same mapping method for input and output, so we need a non-const
591 FormatStyle NonConstStyle
= Style
;
592 Output
<< NonConstStyle
;
598 class FormatTokenLexer
{
600 FormatTokenLexer(SourceManager
&SourceMgr
, FileID ID
, FormatStyle
&Style
,
601 encoding::Encoding Encoding
)
602 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
603 Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr
), ID(ID
),
604 Style(Style
), IdentTable(getFormattingLangOpts(Style
)),
605 Keywords(IdentTable
), Encoding(Encoding
), FirstInLineIndex(0),
606 FormattingDisabled(false) {
607 Lex
.reset(new Lexer(ID
, SourceMgr
.getBuffer(ID
), SourceMgr
,
608 getFormattingLangOpts(Style
)));
609 Lex
->SetKeepWhitespaceMode(true);
611 for (const std::string
&ForEachMacro
: Style
.ForEachMacros
)
612 ForEachMacros
.push_back(&IdentTable
.get(ForEachMacro
));
613 std::sort(ForEachMacros
.begin(), ForEachMacros
.end());
616 ArrayRef
<FormatToken
*> lex() {
617 assert(Tokens
.empty());
618 assert(FirstInLineIndex
== 0);
620 Tokens
.push_back(getNextToken());
621 tryMergePreviousTokens();
622 if (Tokens
.back()->NewlinesBefore
> 0)
623 FirstInLineIndex
= Tokens
.size() - 1;
624 } while (Tokens
.back()->Tok
.isNot(tok::eof
));
628 const AdditionalKeywords
&getKeywords() { return Keywords
; }
631 void tryMergePreviousTokens() {
632 if (tryMerge_TMacro())
634 if (tryMergeConflictMarkers())
637 if (Style
.Language
== FormatStyle::LK_JavaScript
) {
638 if (tryMergeJSRegexLiteral())
640 if (tryMergeEscapeSequence())
643 static tok::TokenKind JSIdentity
[] = { tok::equalequal
, tok::equal
};
644 static tok::TokenKind JSNotIdentity
[] = { tok::exclaimequal
, tok::equal
};
645 static tok::TokenKind JSShiftEqual
[] = { tok::greater
, tok::greater
,
647 static tok::TokenKind JSRightArrow
[] = { tok::equal
, tok::greater
};
648 // FIXME: We probably need to change token type to mimic operator with the
650 if (tryMergeTokens(JSIdentity
))
652 if (tryMergeTokens(JSNotIdentity
))
654 if (tryMergeTokens(JSShiftEqual
))
656 if (tryMergeTokens(JSRightArrow
))
661 bool tryMergeTokens(ArrayRef
<tok::TokenKind
> Kinds
) {
662 if (Tokens
.size() < Kinds
.size())
665 SmallVectorImpl
<FormatToken
*>::const_iterator First
=
666 Tokens
.end() - Kinds
.size();
667 if (!First
[0]->is(Kinds
[0]))
669 unsigned AddLength
= 0;
670 for (unsigned i
= 1; i
< Kinds
.size(); ++i
) {
671 if (!First
[i
]->is(Kinds
[i
]) || First
[i
]->WhitespaceRange
.getBegin() !=
672 First
[i
]->WhitespaceRange
.getEnd())
674 AddLength
+= First
[i
]->TokenText
.size();
676 Tokens
.resize(Tokens
.size() - Kinds
.size() + 1);
677 First
[0]->TokenText
= StringRef(First
[0]->TokenText
.data(),
678 First
[0]->TokenText
.size() + AddLength
);
679 First
[0]->ColumnWidth
+= AddLength
;
683 // Tries to merge an escape sequence, i.e. a "\\" and the following
684 // character. Use e.g. inside JavaScript regex literals.
685 bool tryMergeEscapeSequence() {
686 if (Tokens
.size() < 2)
688 FormatToken
*Previous
= Tokens
[Tokens
.size() - 2];
689 if (Previous
->isNot(tok::unknown
) || Previous
->TokenText
!= "\\")
691 ++Previous
->ColumnWidth
;
692 StringRef Text
= Previous
->TokenText
;
693 Previous
->TokenText
= StringRef(Text
.data(), Text
.size() + 1);
694 resetLexer(SourceMgr
.getFileOffset(Tokens
.back()->Tok
.getLocation()) + 1);
695 Tokens
.resize(Tokens
.size() - 1);
696 Column
= Previous
->OriginalColumn
+ Previous
->ColumnWidth
;
700 // Try to determine whether the current token ends a JavaScript regex literal.
701 // We heuristically assume that this is a regex literal if we find two
702 // unescaped slashes on a line and the token before the first slash is one of
703 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
705 bool tryMergeJSRegexLiteral() {
706 if (Tokens
.size() < 2)
708 // If a regex literal ends in "\//", this gets represented by an unknown
709 // token "\" and a comment.
710 bool MightEndWithEscapedSlash
=
711 Tokens
.back()->is(tok::comment
) &&
712 Tokens
.back()->TokenText
.startswith("//") &&
713 Tokens
[Tokens
.size() - 2]->TokenText
== "\\";
714 if (!MightEndWithEscapedSlash
&&
715 (Tokens
.back()->isNot(tok::slash
) ||
716 (Tokens
[Tokens
.size() - 2]->is(tok::unknown
) &&
717 Tokens
[Tokens
.size() - 2]->TokenText
== "\\")))
719 unsigned TokenCount
= 0;
720 unsigned LastColumn
= Tokens
.back()->OriginalColumn
;
721 for (auto I
= Tokens
.rbegin() + 1, E
= Tokens
.rend(); I
!= E
; ++I
) {
723 if (I
[0]->is(tok::slash
) && I
+ 1 != E
&&
724 (I
[1]->isOneOf(tok::l_paren
, tok::semi
, tok::l_brace
, tok::r_brace
,
725 tok::exclaim
, tok::l_square
, tok::colon
, tok::comma
,
726 tok::question
, tok::kw_return
) ||
727 I
[1]->isBinaryOperator())) {
728 if (MightEndWithEscapedSlash
) {
729 // This regex literal ends in '\//'. Skip past the '//' of the last
730 // token and re-start lexing from there.
731 SourceLocation Loc
= Tokens
.back()->Tok
.getLocation();
732 resetLexer(SourceMgr
.getFileOffset(Loc
) + 2);
734 Tokens
.resize(Tokens
.size() - TokenCount
);
735 Tokens
.back()->Tok
.setKind(tok::unknown
);
736 Tokens
.back()->Type
= TT_RegexLiteral
;
737 Tokens
.back()->ColumnWidth
+= LastColumn
- I
[0]->OriginalColumn
;
741 // There can't be a newline inside a regex literal.
742 if (I
[0]->NewlinesBefore
> 0)
748 bool tryMerge_TMacro() {
749 if (Tokens
.size() < 4)
751 FormatToken
*Last
= Tokens
.back();
752 if (!Last
->is(tok::r_paren
))
755 FormatToken
*String
= Tokens
[Tokens
.size() - 2];
756 if (!String
->is(tok::string_literal
) || String
->IsMultiline
)
759 if (!Tokens
[Tokens
.size() - 3]->is(tok::l_paren
))
762 FormatToken
*Macro
= Tokens
[Tokens
.size() - 4];
763 if (Macro
->TokenText
!= "_T")
766 const char *Start
= Macro
->TokenText
.data();
767 const char *End
= Last
->TokenText
.data() + Last
->TokenText
.size();
768 String
->TokenText
= StringRef(Start
, End
- Start
);
769 String
->IsFirst
= Macro
->IsFirst
;
770 String
->LastNewlineOffset
= Macro
->LastNewlineOffset
;
771 String
->WhitespaceRange
= Macro
->WhitespaceRange
;
772 String
->OriginalColumn
= Macro
->OriginalColumn
;
773 String
->ColumnWidth
= encoding::columnWidthWithTabs(
774 String
->TokenText
, String
->OriginalColumn
, Style
.TabWidth
, Encoding
);
779 Tokens
.back() = String
;
783 bool tryMergeConflictMarkers() {
784 if (Tokens
.back()->NewlinesBefore
== 0 && Tokens
.back()->isNot(tok::eof
))
787 // Conflict lines look like:
788 // <marker> <text from the vcs>
790 // >>>>>>> /file/in/file/system at revision 1234
792 // We merge all tokens in a line that starts with a conflict marker
793 // into a single token with a special token type that the unwrapped line
794 // parser will use to correctly rebuild the underlying code.
797 // Get the position of the first token in the line.
798 unsigned FirstInLineOffset
;
799 std::tie(ID
, FirstInLineOffset
) = SourceMgr
.getDecomposedLoc(
800 Tokens
[FirstInLineIndex
]->getStartOfNonWhitespace());
801 StringRef Buffer
= SourceMgr
.getBuffer(ID
)->getBuffer();
802 // Calculate the offset of the start of the current line.
803 auto LineOffset
= Buffer
.rfind('\n', FirstInLineOffset
);
804 if (LineOffset
== StringRef::npos
) {
810 auto FirstSpace
= Buffer
.find_first_of(" \n", LineOffset
);
812 if (FirstSpace
== StringRef::npos
) {
813 LineStart
= Buffer
.substr(LineOffset
);
815 LineStart
= Buffer
.substr(LineOffset
, FirstSpace
- LineOffset
);
818 TokenType Type
= TT_Unknown
;
819 if (LineStart
== "<<<<<<<" || LineStart
== ">>>>") {
820 Type
= TT_ConflictStart
;
821 } else if (LineStart
== "|||||||" || LineStart
== "=======" ||
822 LineStart
== "====") {
823 Type
= TT_ConflictAlternative
;
824 } else if (LineStart
== ">>>>>>>" || LineStart
== "<<<<") {
825 Type
= TT_ConflictEnd
;
828 if (Type
!= TT_Unknown
) {
829 FormatToken
*Next
= Tokens
.back();
831 Tokens
.resize(FirstInLineIndex
+ 1);
832 // We do not need to build a complete token here, as we will skip it
833 // during parsing anyway (as we must not touch whitespace around conflict
835 Tokens
.back()->Type
= Type
;
836 Tokens
.back()->Tok
.setKind(tok::kw___unknown_anytype
);
838 Tokens
.push_back(Next
);
845 FormatToken
*getNextToken() {
846 if (GreaterStashed
) {
847 // Create a synthesized second '>' token.
848 // FIXME: Increment Column and set OriginalColumn.
849 Token Greater
= FormatTok
->Tok
;
850 FormatTok
= new (Allocator
.Allocate()) FormatToken
;
851 FormatTok
->Tok
= Greater
;
852 SourceLocation GreaterLocation
=
853 FormatTok
->Tok
.getLocation().getLocWithOffset(1);
854 FormatTok
->WhitespaceRange
=
855 SourceRange(GreaterLocation
, GreaterLocation
);
856 FormatTok
->TokenText
= ">";
857 FormatTok
->ColumnWidth
= 1;
858 GreaterStashed
= false;
862 FormatTok
= new (Allocator
.Allocate()) FormatToken
;
863 readRawToken(*FormatTok
);
864 SourceLocation WhitespaceStart
=
865 FormatTok
->Tok
.getLocation().getLocWithOffset(-TrailingWhitespace
);
866 FormatTok
->IsFirst
= IsFirstToken
;
867 IsFirstToken
= false;
869 // Consume and record whitespace until we find a significant token.
870 unsigned WhitespaceLength
= TrailingWhitespace
;
871 while (FormatTok
->Tok
.is(tok::unknown
)) {
872 for (int i
= 0, e
= FormatTok
->TokenText
.size(); i
!= e
; ++i
) {
873 switch (FormatTok
->TokenText
[i
]) {
875 ++FormatTok
->NewlinesBefore
;
876 // FIXME: This is technically incorrect, as it could also
877 // be a literal backslash at the end of the line.
878 if (i
== 0 || (FormatTok
->TokenText
[i
- 1] != '\\' &&
879 (FormatTok
->TokenText
[i
- 1] != '\r' || i
== 1 ||
880 FormatTok
->TokenText
[i
- 2] != '\\')))
881 FormatTok
->HasUnescapedNewline
= true;
882 FormatTok
->LastNewlineOffset
= WhitespaceLength
+ i
+ 1;
894 Column
+= Style
.TabWidth
- Column
% Style
.TabWidth
;
897 if (i
+ 1 == e
|| (FormatTok
->TokenText
[i
+ 1] != '\r' &&
898 FormatTok
->TokenText
[i
+ 1] != '\n'))
899 FormatTok
->Type
= TT_ImplicitStringLiteral
;
902 FormatTok
->Type
= TT_ImplicitStringLiteral
;
908 if (FormatTok
->is(TT_ImplicitStringLiteral
))
910 WhitespaceLength
+= FormatTok
->Tok
.getLength();
912 readRawToken(*FormatTok
);
915 // In case the token starts with escaped newlines, we want to
916 // take them into account as whitespace - this pattern is quite frequent
917 // in macro definitions.
918 // FIXME: Add a more explicit test.
919 while (FormatTok
->TokenText
.size() > 1 && FormatTok
->TokenText
[0] == '\\' &&
920 FormatTok
->TokenText
[1] == '\n') {
921 ++FormatTok
->NewlinesBefore
;
922 WhitespaceLength
+= 2;
924 FormatTok
->TokenText
= FormatTok
->TokenText
.substr(2);
927 FormatTok
->WhitespaceRange
= SourceRange(
928 WhitespaceStart
, WhitespaceStart
.getLocWithOffset(WhitespaceLength
));
930 FormatTok
->OriginalColumn
= Column
;
932 TrailingWhitespace
= 0;
933 if (FormatTok
->Tok
.is(tok::comment
)) {
934 // FIXME: Add the trimmed whitespace to Column.
935 StringRef UntrimmedText
= FormatTok
->TokenText
;
936 FormatTok
->TokenText
= FormatTok
->TokenText
.rtrim(" \t\v\f");
937 TrailingWhitespace
= UntrimmedText
.size() - FormatTok
->TokenText
.size();
938 } else if (FormatTok
->Tok
.is(tok::raw_identifier
)) {
939 IdentifierInfo
&Info
= IdentTable
.get(FormatTok
->TokenText
);
940 FormatTok
->Tok
.setIdentifierInfo(&Info
);
941 FormatTok
->Tok
.setKind(Info
.getTokenID());
942 if (Style
.Language
== FormatStyle::LK_Java
&&
943 FormatTok
->isOneOf(tok::kw_struct
, tok::kw_union
, tok::kw_delete
)) {
944 FormatTok
->Tok
.setKind(tok::identifier
);
945 FormatTok
->Tok
.setIdentifierInfo(nullptr);
947 } else if (FormatTok
->Tok
.is(tok::greatergreater
)) {
948 FormatTok
->Tok
.setKind(tok::greater
);
949 FormatTok
->TokenText
= FormatTok
->TokenText
.substr(0, 1);
950 GreaterStashed
= true;
953 // Now FormatTok is the next non-whitespace token.
955 StringRef Text
= FormatTok
->TokenText
;
956 size_t FirstNewlinePos
= Text
.find('\n');
957 if (FirstNewlinePos
== StringRef::npos
) {
958 // FIXME: ColumnWidth actually depends on the start column, we need to
959 // take this into account when the token is moved.
960 FormatTok
->ColumnWidth
=
961 encoding::columnWidthWithTabs(Text
, Column
, Style
.TabWidth
, Encoding
);
962 Column
+= FormatTok
->ColumnWidth
;
964 FormatTok
->IsMultiline
= true;
965 // FIXME: ColumnWidth actually depends on the start column, we need to
966 // take this into account when the token is moved.
967 FormatTok
->ColumnWidth
= encoding::columnWidthWithTabs(
968 Text
.substr(0, FirstNewlinePos
), Column
, Style
.TabWidth
, Encoding
);
970 // The last line of the token always starts in column 0.
971 // Thus, the length can be precomputed even in the presence of tabs.
972 FormatTok
->LastLineColumnWidth
= encoding::columnWidthWithTabs(
973 Text
.substr(Text
.find_last_of('\n') + 1), 0, Style
.TabWidth
,
975 Column
= FormatTok
->LastLineColumnWidth
;
978 FormatTok
->IsForEachMacro
=
979 std::binary_search(ForEachMacros
.begin(), ForEachMacros
.end(),
980 FormatTok
->Tok
.getIdentifierInfo());
985 FormatToken
*FormatTok
;
989 unsigned TrailingWhitespace
;
990 std::unique_ptr
<Lexer
> Lex
;
991 SourceManager
&SourceMgr
;
994 IdentifierTable IdentTable
;
995 AdditionalKeywords Keywords
;
996 encoding::Encoding Encoding
;
997 llvm::SpecificBumpPtrAllocator
<FormatToken
> Allocator
;
998 // Index (in 'Tokens') of the last token that starts a new line.
999 unsigned FirstInLineIndex
;
1000 SmallVector
<FormatToken
*, 16> Tokens
;
1001 SmallVector
<IdentifierInfo
*, 8> ForEachMacros
;
1003 bool FormattingDisabled
;
1005 void readRawToken(FormatToken
&Tok
) {
1006 Lex
->LexFromRawLexer(Tok
.Tok
);
1007 Tok
.TokenText
= StringRef(SourceMgr
.getCharacterData(Tok
.Tok
.getLocation()),
1008 Tok
.Tok
.getLength());
1009 // For formatting, treat unterminated string literals like normal string
1011 if (Tok
.is(tok::unknown
)) {
1012 if (!Tok
.TokenText
.empty() && Tok
.TokenText
[0] == '"') {
1013 Tok
.Tok
.setKind(tok::string_literal
);
1014 Tok
.IsUnterminatedLiteral
= true;
1015 } else if (Style
.Language
== FormatStyle::LK_JavaScript
&&
1016 Tok
.TokenText
== "''") {
1017 Tok
.Tok
.setKind(tok::char_constant
);
1021 if (Tok
.is(tok::comment
) && (Tok
.TokenText
== "// clang-format on" ||
1022 Tok
.TokenText
== "/* clang-format on */")) {
1023 FormattingDisabled
= false;
1026 Tok
.Finalized
= FormattingDisabled
;
1028 if (Tok
.is(tok::comment
) && (Tok
.TokenText
== "// clang-format off" ||
1029 Tok
.TokenText
== "/* clang-format off */")) {
1030 FormattingDisabled
= true;
1034 void resetLexer(unsigned Offset
) {
1035 StringRef Buffer
= SourceMgr
.getBufferData(ID
);
1036 Lex
.reset(new Lexer(SourceMgr
.getLocForStartOfFile(ID
),
1037 getFormattingLangOpts(Style
), Buffer
.begin(),
1038 Buffer
.begin() + Offset
, Buffer
.end()));
1039 Lex
->SetKeepWhitespaceMode(true);
1043 static StringRef
getLanguageName(FormatStyle::LanguageKind Language
) {
1045 case FormatStyle::LK_Cpp
:
1047 case FormatStyle::LK_Java
:
1049 case FormatStyle::LK_JavaScript
:
1050 return "JavaScript";
1051 case FormatStyle::LK_Proto
:
1058 class Formatter
: public UnwrappedLineConsumer
{
1060 Formatter(const FormatStyle
&Style
, SourceManager
&SourceMgr
, FileID ID
,
1061 ArrayRef
<CharSourceRange
> Ranges
)
1062 : Style(Style
), ID(ID
), SourceMgr(SourceMgr
),
1063 Whitespaces(SourceMgr
, Style
,
1064 inputUsesCRLF(SourceMgr
.getBufferData(ID
))),
1065 Ranges(Ranges
.begin(), Ranges
.end()), UnwrappedLines(1),
1066 Encoding(encoding::detectEncoding(SourceMgr
.getBufferData(ID
))) {
1067 DEBUG(llvm::dbgs() << "File encoding: "
1068 << (Encoding
== encoding::Encoding_UTF8
? "UTF8"
1071 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style
.Language
)
1075 tooling::Replacements
format() {
1076 tooling::Replacements Result
;
1077 FormatTokenLexer
Tokens(SourceMgr
, ID
, Style
, Encoding
);
1079 UnwrappedLineParser
Parser(Style
, Tokens
.getKeywords(), Tokens
.lex(),
1081 bool StructuralError
= Parser
.parse();
1082 assert(UnwrappedLines
.rbegin()->empty());
1083 for (unsigned Run
= 0, RunE
= UnwrappedLines
.size(); Run
+ 1 != RunE
;
1085 DEBUG(llvm::dbgs() << "Run " << Run
<< "...\n");
1086 SmallVector
<AnnotatedLine
*, 16> AnnotatedLines
;
1087 for (unsigned i
= 0, e
= UnwrappedLines
[Run
].size(); i
!= e
; ++i
) {
1088 AnnotatedLines
.push_back(new AnnotatedLine(UnwrappedLines
[Run
][i
]));
1090 tooling::Replacements RunResult
=
1091 format(AnnotatedLines
, StructuralError
, Tokens
);
1093 llvm::dbgs() << "Replacements for run " << Run
<< ":\n";
1094 for (tooling::Replacements::iterator I
= RunResult
.begin(),
1095 E
= RunResult
.end();
1097 llvm::dbgs() << I
->toString() << "\n";
1100 for (unsigned i
= 0, e
= AnnotatedLines
.size(); i
!= e
; ++i
) {
1101 delete AnnotatedLines
[i
];
1103 Result
.insert(RunResult
.begin(), RunResult
.end());
1104 Whitespaces
.reset();
1109 tooling::Replacements
format(SmallVectorImpl
<AnnotatedLine
*> &AnnotatedLines
,
1110 bool StructuralError
, FormatTokenLexer
&Tokens
) {
1111 TokenAnnotator
Annotator(Style
, Tokens
.getKeywords());
1112 for (unsigned i
= 0, e
= AnnotatedLines
.size(); i
!= e
; ++i
) {
1113 Annotator
.annotate(*AnnotatedLines
[i
]);
1115 deriveLocalStyle(AnnotatedLines
);
1116 for (unsigned i
= 0, e
= AnnotatedLines
.size(); i
!= e
; ++i
) {
1117 Annotator
.calculateFormattingInformation(*AnnotatedLines
[i
]);
1119 computeAffectedLines(AnnotatedLines
.begin(), AnnotatedLines
.end());
1121 Annotator
.setCommentLineLevels(AnnotatedLines
);
1122 ContinuationIndenter
Indenter(Style
, Tokens
.getKeywords(), SourceMgr
,
1123 Whitespaces
, Encoding
,
1124 BinPackInconclusiveFunctions
);
1125 UnwrappedLineFormatter
Formatter(&Indenter
, &Whitespaces
, Style
);
1126 Formatter
.format(AnnotatedLines
, /*DryRun=*/false);
1127 return Whitespaces
.generateReplacements();
1131 // Determines which lines are affected by the SourceRanges given as input.
1132 // Returns \c true if at least one line between I and E or one of their
1133 // children is affected.
1134 bool computeAffectedLines(SmallVectorImpl
<AnnotatedLine
*>::iterator I
,
1135 SmallVectorImpl
<AnnotatedLine
*>::iterator E
) {
1136 bool SomeLineAffected
= false;
1137 const AnnotatedLine
*PreviousLine
= nullptr;
1139 AnnotatedLine
*Line
= *I
;
1140 Line
->LeadingEmptyLinesAffected
= affectsLeadingEmptyLines(*Line
->First
);
1142 // If a line is part of a preprocessor directive, it needs to be formatted
1143 // if any token within the directive is affected.
1144 if (Line
->InPPDirective
) {
1145 FormatToken
*Last
= Line
->Last
;
1146 SmallVectorImpl
<AnnotatedLine
*>::iterator PPEnd
= I
+ 1;
1147 while (PPEnd
!= E
&& !(*PPEnd
)->First
->HasUnescapedNewline
) {
1148 Last
= (*PPEnd
)->Last
;
1152 if (affectsTokenRange(*Line
->First
, *Last
,
1153 /*IncludeLeadingNewlines=*/false)) {
1154 SomeLineAffected
= true;
1155 markAllAsAffected(I
, PPEnd
);
1161 if (nonPPLineAffected(Line
, PreviousLine
))
1162 SomeLineAffected
= true;
1164 PreviousLine
= Line
;
1167 return SomeLineAffected
;
1170 // Determines whether 'Line' is affected by the SourceRanges given as input.
1171 // Returns \c true if line or one if its children is affected.
1172 bool nonPPLineAffected(AnnotatedLine
*Line
,
1173 const AnnotatedLine
*PreviousLine
) {
1174 bool SomeLineAffected
= false;
1175 Line
->ChildrenAffected
=
1176 computeAffectedLines(Line
->Children
.begin(), Line
->Children
.end());
1177 if (Line
->ChildrenAffected
)
1178 SomeLineAffected
= true;
1180 // Stores whether one of the line's tokens is directly affected.
1181 bool SomeTokenAffected
= false;
1182 // Stores whether we need to look at the leading newlines of the next token
1183 // in order to determine whether it was affected.
1184 bool IncludeLeadingNewlines
= false;
1186 // Stores whether the first child line of any of this line's tokens is
1188 bool SomeFirstChildAffected
= false;
1190 for (FormatToken
*Tok
= Line
->First
; Tok
; Tok
= Tok
->Next
) {
1191 // Determine whether 'Tok' was affected.
1192 if (affectsTokenRange(*Tok
, *Tok
, IncludeLeadingNewlines
))
1193 SomeTokenAffected
= true;
1195 // Determine whether the first child of 'Tok' was affected.
1196 if (!Tok
->Children
.empty() && Tok
->Children
.front()->Affected
)
1197 SomeFirstChildAffected
= true;
1199 IncludeLeadingNewlines
= Tok
->Children
.empty();
1202 // Was this line moved, i.e. has it previously been on the same line as an
1204 bool LineMoved
= PreviousLine
&& PreviousLine
->Affected
&&
1205 Line
->First
->NewlinesBefore
== 0;
1207 bool IsContinuedComment
=
1208 Line
->First
->is(tok::comment
) && Line
->First
->Next
== nullptr &&
1209 Line
->First
->NewlinesBefore
< 2 && PreviousLine
&&
1210 PreviousLine
->Affected
&& PreviousLine
->Last
->is(tok::comment
);
1212 if (SomeTokenAffected
|| SomeFirstChildAffected
|| LineMoved
||
1213 IsContinuedComment
) {
1214 Line
->Affected
= true;
1215 SomeLineAffected
= true;
1217 return SomeLineAffected
;
1220 // Marks all lines between I and E as well as all their children as affected.
1221 void markAllAsAffected(SmallVectorImpl
<AnnotatedLine
*>::iterator I
,
1222 SmallVectorImpl
<AnnotatedLine
*>::iterator E
) {
1224 (*I
)->Affected
= true;
1225 markAllAsAffected((*I
)->Children
.begin(), (*I
)->Children
.end());
1230 // Returns true if the range from 'First' to 'Last' intersects with one of the
1232 bool affectsTokenRange(const FormatToken
&First
, const FormatToken
&Last
,
1233 bool IncludeLeadingNewlines
) {
1234 SourceLocation Start
= First
.WhitespaceRange
.getBegin();
1235 if (!IncludeLeadingNewlines
)
1236 Start
= Start
.getLocWithOffset(First
.LastNewlineOffset
);
1237 SourceLocation End
= Last
.getStartOfNonWhitespace();
1238 End
= End
.getLocWithOffset(Last
.TokenText
.size());
1239 CharSourceRange Range
= CharSourceRange::getCharRange(Start
, End
);
1240 return affectsCharSourceRange(Range
);
1243 // Returns true if one of the input ranges intersect the leading empty lines
1245 bool affectsLeadingEmptyLines(const FormatToken
&Tok
) {
1246 CharSourceRange EmptyLineRange
= CharSourceRange::getCharRange(
1247 Tok
.WhitespaceRange
.getBegin(),
1248 Tok
.WhitespaceRange
.getBegin().getLocWithOffset(Tok
.LastNewlineOffset
));
1249 return affectsCharSourceRange(EmptyLineRange
);
1252 // Returns true if 'Range' intersects with one of the input ranges.
1253 bool affectsCharSourceRange(const CharSourceRange
&Range
) {
1254 for (SmallVectorImpl
<CharSourceRange
>::const_iterator I
= Ranges
.begin(),
1257 if (!SourceMgr
.isBeforeInTranslationUnit(Range
.getEnd(), I
->getBegin()) &&
1258 !SourceMgr
.isBeforeInTranslationUnit(I
->getEnd(), Range
.getBegin()))
1264 static bool inputUsesCRLF(StringRef Text
) {
1265 return Text
.count('\r') * 2 > Text
.count('\n');
1269 deriveLocalStyle(const SmallVectorImpl
<AnnotatedLine
*> &AnnotatedLines
) {
1270 unsigned CountBoundToVariable
= 0;
1271 unsigned CountBoundToType
= 0;
1272 bool HasCpp03IncompatibleFormat
= false;
1273 bool HasBinPackedFunction
= false;
1274 bool HasOnePerLineFunction
= false;
1275 for (unsigned i
= 0, e
= AnnotatedLines
.size(); i
!= e
; ++i
) {
1276 if (!AnnotatedLines
[i
]->First
->Next
)
1278 FormatToken
*Tok
= AnnotatedLines
[i
]->First
->Next
;
1280 if (Tok
->is(TT_PointerOrReference
)) {
1282 Tok
->WhitespaceRange
.getBegin() != Tok
->WhitespaceRange
.getEnd();
1283 bool SpacesAfter
= Tok
->Next
->WhitespaceRange
.getBegin() !=
1284 Tok
->Next
->WhitespaceRange
.getEnd();
1285 if (SpacesBefore
&& !SpacesAfter
)
1286 ++CountBoundToVariable
;
1287 else if (!SpacesBefore
&& SpacesAfter
)
1291 if (Tok
->WhitespaceRange
.getBegin() == Tok
->WhitespaceRange
.getEnd()) {
1292 if (Tok
->is(tok::coloncolon
) && Tok
->Previous
->is(TT_TemplateOpener
))
1293 HasCpp03IncompatibleFormat
= true;
1294 if (Tok
->is(TT_TemplateCloser
) &&
1295 Tok
->Previous
->is(TT_TemplateCloser
))
1296 HasCpp03IncompatibleFormat
= true;
1299 if (Tok
->PackingKind
== PPK_BinPacked
)
1300 HasBinPackedFunction
= true;
1301 if (Tok
->PackingKind
== PPK_OnePerLine
)
1302 HasOnePerLineFunction
= true;
1307 if (Style
.DerivePointerAlignment
) {
1308 if (CountBoundToType
> CountBoundToVariable
)
1309 Style
.PointerAlignment
= FormatStyle::PAS_Left
;
1310 else if (CountBoundToType
< CountBoundToVariable
)
1311 Style
.PointerAlignment
= FormatStyle::PAS_Right
;
1313 if (Style
.Standard
== FormatStyle::LS_Auto
) {
1314 Style
.Standard
= HasCpp03IncompatibleFormat
? FormatStyle::LS_Cpp11
1315 : FormatStyle::LS_Cpp03
;
1317 BinPackInconclusiveFunctions
=
1318 HasBinPackedFunction
|| !HasOnePerLineFunction
;
1321 void consumeUnwrappedLine(const UnwrappedLine
&TheLine
) override
{
1322 assert(!UnwrappedLines
.empty());
1323 UnwrappedLines
.back().push_back(TheLine
);
1326 void finishRun() override
{
1327 UnwrappedLines
.push_back(SmallVector
<UnwrappedLine
, 16>());
1332 SourceManager
&SourceMgr
;
1333 WhitespaceManager Whitespaces
;
1334 SmallVector
<CharSourceRange
, 8> Ranges
;
1335 SmallVector
<SmallVector
<UnwrappedLine
, 16>, 2> UnwrappedLines
;
1337 encoding::Encoding Encoding
;
1338 bool BinPackInconclusiveFunctions
;
1341 } // end anonymous namespace
1343 tooling::Replacements
reformat(const FormatStyle
&Style
, Lexer
&Lex
,
1344 SourceManager
&SourceMgr
,
1345 ArrayRef
<CharSourceRange
> Ranges
) {
1346 if (Style
.DisableFormat
)
1347 return tooling::Replacements();
1348 return reformat(Style
, SourceMgr
,
1349 SourceMgr
.getFileID(Lex
.getSourceLocation()), Ranges
);
1352 tooling::Replacements
reformat(const FormatStyle
&Style
,
1353 SourceManager
&SourceMgr
, FileID ID
,
1354 ArrayRef
<CharSourceRange
> Ranges
) {
1355 if (Style
.DisableFormat
)
1356 return tooling::Replacements();
1357 Formatter
formatter(Style
, SourceMgr
, ID
, Ranges
);
1358 return formatter
.format();
1361 tooling::Replacements
reformat(const FormatStyle
&Style
, StringRef Code
,
1362 ArrayRef
<tooling::Range
> Ranges
,
1363 StringRef FileName
) {
1364 if (Style
.DisableFormat
)
1365 return tooling::Replacements();
1367 FileManager
Files((FileSystemOptions()));
1368 DiagnosticsEngine
Diagnostics(
1369 IntrusiveRefCntPtr
<DiagnosticIDs
>(new DiagnosticIDs
),
1370 new DiagnosticOptions
);
1371 SourceManager
SourceMgr(Diagnostics
, Files
);
1372 std::unique_ptr
<llvm::MemoryBuffer
> Buf
=
1373 llvm::MemoryBuffer::getMemBuffer(Code
, FileName
);
1374 const clang::FileEntry
*Entry
=
1375 Files
.getVirtualFile(FileName
, Buf
->getBufferSize(), 0);
1376 SourceMgr
.overrideFileContents(Entry
, std::move(Buf
));
1378 SourceMgr
.createFileID(Entry
, SourceLocation(), clang::SrcMgr::C_User
);
1379 SourceLocation StartOfFile
= SourceMgr
.getLocForStartOfFile(ID
);
1380 std::vector
<CharSourceRange
> CharRanges
;
1381 for (const tooling::Range
&Range
: Ranges
) {
1382 SourceLocation Start
= StartOfFile
.getLocWithOffset(Range
.getOffset());
1383 SourceLocation End
= Start
.getLocWithOffset(Range
.getLength());
1384 CharRanges
.push_back(CharSourceRange::getCharRange(Start
, End
));
1386 return reformat(Style
, SourceMgr
, ID
, CharRanges
);
1389 LangOptions
getFormattingLangOpts(const FormatStyle
&Style
) {
1390 LangOptions LangOpts
;
1391 LangOpts
.CPlusPlus
= 1;
1392 LangOpts
.CPlusPlus11
= Style
.Standard
== FormatStyle::LS_Cpp03
? 0 : 1;
1393 LangOpts
.CPlusPlus14
= Style
.Standard
== FormatStyle::LS_Cpp03
? 0 : 1;
1394 LangOpts
.LineComment
= 1;
1395 bool AlternativeOperators
= Style
.Language
!= FormatStyle::LK_JavaScript
&&
1396 Style
.Language
!= FormatStyle::LK_Java
;
1397 LangOpts
.CXXOperatorNames
= AlternativeOperators
? 1 : 0;
1404 const char *StyleOptionHelpDescription
=
1405 "Coding style, currently supports:\n"
1406 " LLVM, Google, Chromium, Mozilla, WebKit.\n"
1407 "Use -style=file to load style configuration from\n"
1408 ".clang-format file located in one of the parent\n"
1409 "directories of the source file (or current\n"
1410 "directory for stdin).\n"
1411 "Use -style=\"{key: value, ...}\" to set specific\n"
1412 "parameters, e.g.:\n"
1413 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1415 static FormatStyle::LanguageKind
getLanguageByFileName(StringRef FileName
) {
1416 if (FileName
.endswith(".java")) {
1417 return FormatStyle::LK_Java
;
1418 } else if (FileName
.endswith_lower(".js")) {
1419 return FormatStyle::LK_JavaScript
;
1420 } else if (FileName
.endswith_lower(".proto") ||
1421 FileName
.endswith_lower(".protodevel")) {
1422 return FormatStyle::LK_Proto
;
1424 return FormatStyle::LK_Cpp
;
1427 FormatStyle
getStyle(StringRef StyleName
, StringRef FileName
,
1428 StringRef FallbackStyle
) {
1429 FormatStyle Style
= getLLVMStyle();
1430 Style
.Language
= getLanguageByFileName(FileName
);
1431 if (!getPredefinedStyle(FallbackStyle
, Style
.Language
, &Style
)) {
1432 llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1433 << "\" using LLVM style\n";
1437 if (StyleName
.startswith("{")) {
1438 // Parse YAML/JSON style from the command line.
1439 if (std::error_code ec
= parseConfiguration(StyleName
, &Style
)) {
1440 llvm::errs() << "Error parsing -style: " << ec
.message() << ", using "
1441 << FallbackStyle
<< " style\n";
1446 if (!StyleName
.equals_lower("file")) {
1447 if (!getPredefinedStyle(StyleName
, Style
.Language
, &Style
))
1448 llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1453 // Look for .clang-format/_clang-format file in the file's parent directories.
1454 SmallString
<128> UnsuitableConfigFiles
;
1455 SmallString
<128> Path(FileName
);
1456 llvm::sys::fs::make_absolute(Path
);
1457 for (StringRef Directory
= Path
; !Directory
.empty();
1458 Directory
= llvm::sys::path::parent_path(Directory
)) {
1459 if (!llvm::sys::fs::is_directory(Directory
))
1461 SmallString
<128> ConfigFile(Directory
);
1463 llvm::sys::path::append(ConfigFile
, ".clang-format");
1464 DEBUG(llvm::dbgs() << "Trying " << ConfigFile
<< "...\n");
1465 bool IsFile
= false;
1466 // Ignore errors from is_regular_file: we only need to know if we can read
1468 llvm::sys::fs::is_regular_file(Twine(ConfigFile
), IsFile
);
1471 // Try _clang-format too, since dotfiles are not commonly used on Windows.
1472 ConfigFile
= Directory
;
1473 llvm::sys::path::append(ConfigFile
, "_clang-format");
1474 DEBUG(llvm::dbgs() << "Trying " << ConfigFile
<< "...\n");
1475 llvm::sys::fs::is_regular_file(Twine(ConfigFile
), IsFile
);
1479 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> Text
=
1480 llvm::MemoryBuffer::getFile(ConfigFile
.c_str());
1481 if (std::error_code EC
= Text
.getError()) {
1482 llvm::errs() << EC
.message() << "\n";
1485 if (std::error_code ec
=
1486 parseConfiguration(Text
.get()->getBuffer(), &Style
)) {
1487 if (ec
== ParseError::Unsuitable
) {
1488 if (!UnsuitableConfigFiles
.empty())
1489 UnsuitableConfigFiles
.append(", ");
1490 UnsuitableConfigFiles
.append(ConfigFile
);
1493 llvm::errs() << "Error reading " << ConfigFile
<< ": " << ec
.message()
1497 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile
<< "\n");
1501 llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1503 if (!UnsuitableConfigFiles
.empty()) {
1504 llvm::errs() << "Configuration file(s) do(es) not support "
1505 << getLanguageName(Style
.Language
) << ": "
1506 << UnsuitableConfigFiles
<< "\n";
1511 } // namespace format
1512 } // namespace clang