etc/services - sync with NetBSD-8
[minix.git] / external / bsd / llvm / dist / clang / lib / Format / Format.cpp
blob2a4721f2b3b7dbda84e6a00b46e3ceefa653bace
1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 //===----------------------------------------------------------------------===//
16 #include "ContinuationIndenter.h"
17 #include "TokenAnnotator.h"
18 #include "UnwrappedLineFormatter.h"
19 #include "UnwrappedLineParser.h"
20 #include "WhitespaceManager.h"
21 #include "clang/Basic/Diagnostic.h"
22 #include "clang/Basic/DiagnosticOptions.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/Lexer.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/Support/Allocator.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/YAMLTraits.h"
31 #include <queue>
32 #include <string>
34 #define DEBUG_TYPE "format-formatter"
36 using clang::format::FormatStyle;
38 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
40 namespace llvm {
41 namespace yaml {
42 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
43 static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
44 IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
45 IO.enumCase(Value, "Java", FormatStyle::LK_Java);
46 IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
47 IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
51 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> {
52 static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) {
53 IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03);
54 IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03);
55 IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11);
56 IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11);
57 IO.enumCase(Value, "Auto", FormatStyle::LS_Auto);
61 template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> {
62 static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) {
63 IO.enumCase(Value, "Never", FormatStyle::UT_Never);
64 IO.enumCase(Value, "false", FormatStyle::UT_Never);
65 IO.enumCase(Value, "Always", FormatStyle::UT_Always);
66 IO.enumCase(Value, "true", FormatStyle::UT_Always);
67 IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation);
71 template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> {
72 static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) {
73 IO.enumCase(Value, "None", FormatStyle::SFS_None);
74 IO.enumCase(Value, "false", FormatStyle::SFS_None);
75 IO.enumCase(Value, "All", FormatStyle::SFS_All);
76 IO.enumCase(Value, "true", FormatStyle::SFS_All);
77 IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline);
78 IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty);
82 template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> {
83 static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) {
84 IO.enumCase(Value, "All", FormatStyle::BOS_All);
85 IO.enumCase(Value, "true", FormatStyle::BOS_All);
86 IO.enumCase(Value, "None", FormatStyle::BOS_None);
87 IO.enumCase(Value, "false", FormatStyle::BOS_None);
88 IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment);
92 template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
93 static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) {
94 IO.enumCase(Value, "Attach", FormatStyle::BS_Attach);
95 IO.enumCase(Value, "Linux", FormatStyle::BS_Linux);
96 IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
97 IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
98 IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
102 template <>
103 struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
104 static void enumeration(IO &IO,
105 FormatStyle::NamespaceIndentationKind &Value) {
106 IO.enumCase(Value, "None", FormatStyle::NI_None);
107 IO.enumCase(Value, "Inner", FormatStyle::NI_Inner);
108 IO.enumCase(Value, "All", FormatStyle::NI_All);
112 template <>
113 struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
114 static void enumeration(IO &IO,
115 FormatStyle::PointerAlignmentStyle &Value) {
116 IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
117 IO.enumCase(Value, "Left", FormatStyle::PAS_Left);
118 IO.enumCase(Value, "Right", FormatStyle::PAS_Right);
120 // For backward compatibility.
121 IO.enumCase(Value, "true", FormatStyle::PAS_Left);
122 IO.enumCase(Value, "false", FormatStyle::PAS_Right);
126 template <>
127 struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> {
128 static void enumeration(IO &IO,
129 FormatStyle::SpaceBeforeParensOptions &Value) {
130 IO.enumCase(Value, "Never", FormatStyle::SBPO_Never);
131 IO.enumCase(Value, "ControlStatements",
132 FormatStyle::SBPO_ControlStatements);
133 IO.enumCase(Value, "Always", FormatStyle::SBPO_Always);
135 // For backward compatibility.
136 IO.enumCase(Value, "false", FormatStyle::SBPO_Never);
137 IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements);
141 template <> struct MappingTraits<FormatStyle> {
142 static void mapping(IO &IO, FormatStyle &Style) {
143 // When reading, read the language first, we need it for getPredefinedStyle.
144 IO.mapOptional("Language", Style.Language);
146 if (IO.outputting()) {
147 StringRef StylesArray[] = { "LLVM", "Google", "Chromium",
148 "Mozilla", "WebKit", "GNU" };
149 ArrayRef<StringRef> Styles(StylesArray);
150 for (size_t i = 0, e = Styles.size(); i < e; ++i) {
151 StringRef StyleName(Styles[i]);
152 FormatStyle PredefinedStyle;
153 if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) &&
154 Style == PredefinedStyle) {
155 IO.mapOptional("# BasedOnStyle", StyleName);
156 break;
159 } else {
160 StringRef BasedOnStyle;
161 IO.mapOptional("BasedOnStyle", BasedOnStyle);
162 if (!BasedOnStyle.empty()) {
163 FormatStyle::LanguageKind OldLanguage = Style.Language;
164 FormatStyle::LanguageKind Language =
165 ((FormatStyle *)IO.getContext())->Language;
166 if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) {
167 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
168 return;
170 Style.Language = OldLanguage;
174 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
175 IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
176 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
177 IO.mapOptional("AlignOperands", Style.AlignOperands);
178 IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
179 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
180 Style.AllowAllParametersOfDeclarationOnNextLine);
181 IO.mapOptional("AllowShortBlocksOnASingleLine",
182 Style.AllowShortBlocksOnASingleLine);
183 IO.mapOptional("AllowShortCaseLabelsOnASingleLine",
184 Style.AllowShortCaseLabelsOnASingleLine);
185 IO.mapOptional("AllowShortIfStatementsOnASingleLine",
186 Style.AllowShortIfStatementsOnASingleLine);
187 IO.mapOptional("AllowShortLoopsOnASingleLine",
188 Style.AllowShortLoopsOnASingleLine);
189 IO.mapOptional("AllowShortFunctionsOnASingleLine",
190 Style.AllowShortFunctionsOnASingleLine);
191 IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
192 Style.AlwaysBreakAfterDefinitionReturnType);
193 IO.mapOptional("AlwaysBreakTemplateDeclarations",
194 Style.AlwaysBreakTemplateDeclarations);
195 IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
196 Style.AlwaysBreakBeforeMultilineStrings);
197 IO.mapOptional("BreakBeforeBinaryOperators",
198 Style.BreakBeforeBinaryOperators);
199 IO.mapOptional("BreakBeforeTernaryOperators",
200 Style.BreakBeforeTernaryOperators);
201 IO.mapOptional("BreakConstructorInitializersBeforeComma",
202 Style.BreakConstructorInitializersBeforeComma);
203 IO.mapOptional("BinPackParameters", Style.BinPackParameters);
204 IO.mapOptional("BinPackArguments", Style.BinPackArguments);
205 IO.mapOptional("ColumnLimit", Style.ColumnLimit);
206 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
207 Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
208 IO.mapOptional("ConstructorInitializerIndentWidth",
209 Style.ConstructorInitializerIndentWidth);
210 IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment);
211 IO.mapOptional("ExperimentalAutoDetectBinPacking",
212 Style.ExperimentalAutoDetectBinPacking);
213 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
214 IO.mapOptional("IndentWrappedFunctionNames",
215 Style.IndentWrappedFunctionNames);
216 IO.mapOptional("IndentFunctionDeclarationAfterType",
217 Style.IndentWrappedFunctionNames);
218 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
219 IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks",
220 Style.KeepEmptyLinesAtTheStartOfBlocks);
221 IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
222 IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
223 IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty);
224 IO.mapOptional("ObjCSpaceBeforeProtocolList",
225 Style.ObjCSpaceBeforeProtocolList);
226 IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
227 Style.PenaltyBreakBeforeFirstCallParameter);
228 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
229 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
230 IO.mapOptional("PenaltyBreakFirstLessLess",
231 Style.PenaltyBreakFirstLessLess);
232 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
233 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
234 Style.PenaltyReturnTypeOnItsOwnLine);
235 IO.mapOptional("PointerAlignment", Style.PointerAlignment);
236 IO.mapOptional("SpacesBeforeTrailingComments",
237 Style.SpacesBeforeTrailingComments);
238 IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
239 IO.mapOptional("Standard", Style.Standard);
240 IO.mapOptional("IndentWidth", Style.IndentWidth);
241 IO.mapOptional("TabWidth", Style.TabWidth);
242 IO.mapOptional("UseTab", Style.UseTab);
243 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
244 IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
245 IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets);
246 IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
247 IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
248 IO.mapOptional("SpacesInCStyleCastParentheses",
249 Style.SpacesInCStyleCastParentheses);
250 IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
251 IO.mapOptional("SpacesInContainerLiterals",
252 Style.SpacesInContainerLiterals);
253 IO.mapOptional("SpaceBeforeAssignmentOperators",
254 Style.SpaceBeforeAssignmentOperators);
255 IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
256 IO.mapOptional("CommentPragmas", Style.CommentPragmas);
257 IO.mapOptional("ForEachMacros", Style.ForEachMacros);
259 // For backward compatibility.
260 if (!IO.outputting()) {
261 IO.mapOptional("SpaceAfterControlStatementKeyword",
262 Style.SpaceBeforeParens);
263 IO.mapOptional("PointerBindsToType", Style.PointerAlignment);
264 IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment);
266 IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens);
267 IO.mapOptional("DisableFormat", Style.DisableFormat);
271 // Allows to read vector<FormatStyle> while keeping default values.
272 // IO.getContext() should contain a pointer to the FormatStyle structure, that
273 // will be used to get default values for missing keys.
274 // If the first element has no Language specified, it will be treated as the
275 // default one for the following elements.
276 template <> struct DocumentListTraits<std::vector<FormatStyle> > {
277 static size_t size(IO &IO, std::vector<FormatStyle> &Seq) {
278 return Seq.size();
280 static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq,
281 size_t Index) {
282 if (Index >= Seq.size()) {
283 assert(Index == Seq.size());
284 FormatStyle Template;
285 if (Seq.size() > 0 && Seq[0].Language == FormatStyle::LK_None) {
286 Template = Seq[0];
287 } else {
288 Template = *((const FormatStyle *)IO.getContext());
289 Template.Language = FormatStyle::LK_None;
291 Seq.resize(Index + 1, Template);
293 return Seq[Index];
299 namespace clang {
300 namespace format {
302 const std::error_category &getParseCategory() {
303 static ParseErrorCategory C;
304 return C;
306 std::error_code make_error_code(ParseError e) {
307 return std::error_code(static_cast<int>(e), getParseCategory());
310 const char *ParseErrorCategory::name() const LLVM_NOEXCEPT {
311 return "clang-format.parse_error";
314 std::string ParseErrorCategory::message(int EV) const {
315 switch (static_cast<ParseError>(EV)) {
316 case ParseError::Success:
317 return "Success";
318 case ParseError::Error:
319 return "Invalid argument";
320 case ParseError::Unsuitable:
321 return "Unsuitable";
323 llvm_unreachable("unexpected parse error");
326 FormatStyle getLLVMStyle() {
327 FormatStyle LLVMStyle;
328 LLVMStyle.Language = FormatStyle::LK_Cpp;
329 LLVMStyle.AccessModifierOffset = -2;
330 LLVMStyle.AlignEscapedNewlinesLeft = false;
331 LLVMStyle.AlignAfterOpenBracket = true;
332 LLVMStyle.AlignOperands = true;
333 LLVMStyle.AlignTrailingComments = true;
334 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
335 LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
336 LLVMStyle.AllowShortBlocksOnASingleLine = false;
337 LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
338 LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
339 LLVMStyle.AllowShortLoopsOnASingleLine = false;
340 LLVMStyle.AlwaysBreakAfterDefinitionReturnType = false;
341 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
342 LLVMStyle.AlwaysBreakTemplateDeclarations = false;
343 LLVMStyle.BinPackParameters = true;
344 LLVMStyle.BinPackArguments = true;
345 LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
346 LLVMStyle.BreakBeforeTernaryOperators = true;
347 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
348 LLVMStyle.BreakConstructorInitializersBeforeComma = false;
349 LLVMStyle.ColumnLimit = 80;
350 LLVMStyle.CommentPragmas = "^ IWYU pragma:";
351 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
352 LLVMStyle.ConstructorInitializerIndentWidth = 4;
353 LLVMStyle.ContinuationIndentWidth = 4;
354 LLVMStyle.Cpp11BracedListStyle = true;
355 LLVMStyle.DerivePointerAlignment = false;
356 LLVMStyle.ExperimentalAutoDetectBinPacking = false;
357 LLVMStyle.ForEachMacros.push_back("foreach");
358 LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
359 LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
360 LLVMStyle.IndentCaseLabels = false;
361 LLVMStyle.IndentWrappedFunctionNames = false;
362 LLVMStyle.IndentWidth = 2;
363 LLVMStyle.TabWidth = 8;
364 LLVMStyle.MaxEmptyLinesToKeep = 1;
365 LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true;
366 LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
367 LLVMStyle.ObjCBlockIndentWidth = 2;
368 LLVMStyle.ObjCSpaceAfterProperty = false;
369 LLVMStyle.ObjCSpaceBeforeProtocolList = true;
370 LLVMStyle.PointerAlignment = FormatStyle::PAS_Right;
371 LLVMStyle.SpacesBeforeTrailingComments = 1;
372 LLVMStyle.Standard = FormatStyle::LS_Cpp11;
373 LLVMStyle.UseTab = FormatStyle::UT_Never;
374 LLVMStyle.SpacesInParentheses = false;
375 LLVMStyle.SpacesInSquareBrackets = false;
376 LLVMStyle.SpaceInEmptyParentheses = false;
377 LLVMStyle.SpacesInContainerLiterals = true;
378 LLVMStyle.SpacesInCStyleCastParentheses = false;
379 LLVMStyle.SpaceAfterCStyleCast = false;
380 LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements;
381 LLVMStyle.SpaceBeforeAssignmentOperators = true;
382 LLVMStyle.SpacesInAngles = false;
384 LLVMStyle.PenaltyBreakComment = 300;
385 LLVMStyle.PenaltyBreakFirstLessLess = 120;
386 LLVMStyle.PenaltyBreakString = 1000;
387 LLVMStyle.PenaltyExcessCharacter = 1000000;
388 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
389 LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
391 LLVMStyle.DisableFormat = false;
393 return LLVMStyle;
396 FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
397 FormatStyle GoogleStyle = getLLVMStyle();
398 GoogleStyle.Language = Language;
400 GoogleStyle.AccessModifierOffset = -1;
401 GoogleStyle.AlignEscapedNewlinesLeft = true;
402 GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
403 GoogleStyle.AllowShortLoopsOnASingleLine = true;
404 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
405 GoogleStyle.AlwaysBreakTemplateDeclarations = true;
406 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
407 GoogleStyle.DerivePointerAlignment = true;
408 GoogleStyle.IndentCaseLabels = true;
409 GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
410 GoogleStyle.ObjCSpaceAfterProperty = false;
411 GoogleStyle.ObjCSpaceBeforeProtocolList = false;
412 GoogleStyle.PointerAlignment = FormatStyle::PAS_Left;
413 GoogleStyle.SpacesBeforeTrailingComments = 2;
414 GoogleStyle.Standard = FormatStyle::LS_Auto;
416 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
417 GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
419 if (Language == FormatStyle::LK_Java) {
420 GoogleStyle.AlignAfterOpenBracket = false;
421 GoogleStyle.AlignOperands = false;
422 GoogleStyle.AlignTrailingComments = false;
423 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
424 GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
425 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
426 GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment;
427 GoogleStyle.ColumnLimit = 100;
428 GoogleStyle.SpaceAfterCStyleCast = true;
429 GoogleStyle.SpacesBeforeTrailingComments = 1;
430 } else if (Language == FormatStyle::LK_JavaScript) {
431 GoogleStyle.BreakBeforeTernaryOperators = false;
432 GoogleStyle.MaxEmptyLinesToKeep = 3;
433 GoogleStyle.SpacesInContainerLiterals = false;
434 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
435 GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
436 } else if (Language == FormatStyle::LK_Proto) {
437 GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
438 GoogleStyle.SpacesInContainerLiterals = false;
441 return GoogleStyle;
444 FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
445 FormatStyle ChromiumStyle = getGoogleStyle(Language);
446 if (Language == FormatStyle::LK_Java) {
447 ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
448 ChromiumStyle.IndentWidth = 4;
449 ChromiumStyle.ContinuationIndentWidth = 8;
450 } else {
451 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
452 ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
453 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
454 ChromiumStyle.AllowShortLoopsOnASingleLine = false;
455 ChromiumStyle.BinPackParameters = false;
456 ChromiumStyle.DerivePointerAlignment = false;
458 return ChromiumStyle;
461 FormatStyle getMozillaStyle() {
462 FormatStyle MozillaStyle = getLLVMStyle();
463 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
464 MozillaStyle.Cpp11BracedListStyle = false;
465 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
466 MozillaStyle.DerivePointerAlignment = true;
467 MozillaStyle.IndentCaseLabels = true;
468 MozillaStyle.ObjCSpaceAfterProperty = true;
469 MozillaStyle.ObjCSpaceBeforeProtocolList = false;
470 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
471 MozillaStyle.PointerAlignment = FormatStyle::PAS_Left;
472 MozillaStyle.Standard = FormatStyle::LS_Cpp03;
473 return MozillaStyle;
476 FormatStyle getWebKitStyle() {
477 FormatStyle Style = getLLVMStyle();
478 Style.AccessModifierOffset = -4;
479 Style.AlignAfterOpenBracket = false;
480 Style.AlignOperands = false;
481 Style.AlignTrailingComments = false;
482 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
483 Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
484 Style.BreakConstructorInitializersBeforeComma = true;
485 Style.Cpp11BracedListStyle = false;
486 Style.ColumnLimit = 0;
487 Style.IndentWidth = 4;
488 Style.NamespaceIndentation = FormatStyle::NI_Inner;
489 Style.ObjCBlockIndentWidth = 4;
490 Style.ObjCSpaceAfterProperty = true;
491 Style.PointerAlignment = FormatStyle::PAS_Left;
492 Style.Standard = FormatStyle::LS_Cpp03;
493 return Style;
496 FormatStyle getGNUStyle() {
497 FormatStyle Style = getLLVMStyle();
498 Style.AlwaysBreakAfterDefinitionReturnType = true;
499 Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
500 Style.BreakBeforeBraces = FormatStyle::BS_GNU;
501 Style.BreakBeforeTernaryOperators = true;
502 Style.Cpp11BracedListStyle = false;
503 Style.ColumnLimit = 79;
504 Style.SpaceBeforeParens = FormatStyle::SBPO_Always;
505 Style.Standard = FormatStyle::LS_Cpp03;
506 return Style;
509 FormatStyle getNoStyle() {
510 FormatStyle NoStyle = getLLVMStyle();
511 NoStyle.DisableFormat = true;
512 return NoStyle;
515 bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language,
516 FormatStyle *Style) {
517 if (Name.equals_lower("llvm")) {
518 *Style = getLLVMStyle();
519 } else if (Name.equals_lower("chromium")) {
520 *Style = getChromiumStyle(Language);
521 } else if (Name.equals_lower("mozilla")) {
522 *Style = getMozillaStyle();
523 } else if (Name.equals_lower("google")) {
524 *Style = getGoogleStyle(Language);
525 } else if (Name.equals_lower("webkit")) {
526 *Style = getWebKitStyle();
527 } else if (Name.equals_lower("gnu")) {
528 *Style = getGNUStyle();
529 } else if (Name.equals_lower("none")) {
530 *Style = getNoStyle();
531 } else {
532 return false;
535 Style->Language = Language;
536 return true;
539 std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
540 assert(Style);
541 FormatStyle::LanguageKind Language = Style->Language;
542 assert(Language != FormatStyle::LK_None);
543 if (Text.trim().empty())
544 return make_error_code(ParseError::Error);
546 std::vector<FormatStyle> Styles;
547 llvm::yaml::Input Input(Text);
548 // DocumentListTraits<vector<FormatStyle>> uses the context to get default
549 // values for the fields, keys for which are missing from the configuration.
550 // Mapping also uses the context to get the language to find the correct
551 // base style.
552 Input.setContext(Style);
553 Input >> Styles;
554 if (Input.error())
555 return Input.error();
557 for (unsigned i = 0; i < Styles.size(); ++i) {
558 // Ensures that only the first configuration can skip the Language option.
559 if (Styles[i].Language == FormatStyle::LK_None && i != 0)
560 return make_error_code(ParseError::Error);
561 // Ensure that each language is configured at most once.
562 for (unsigned j = 0; j < i; ++j) {
563 if (Styles[i].Language == Styles[j].Language) {
564 DEBUG(llvm::dbgs()
565 << "Duplicate languages in the config file on positions " << j
566 << " and " << i << "\n");
567 return make_error_code(ParseError::Error);
571 // Look for a suitable configuration starting from the end, so we can
572 // find the configuration for the specific language first, and the default
573 // configuration (which can only be at slot 0) after it.
574 for (int i = Styles.size() - 1; i >= 0; --i) {
575 if (Styles[i].Language == Language ||
576 Styles[i].Language == FormatStyle::LK_None) {
577 *Style = Styles[i];
578 Style->Language = Language;
579 return make_error_code(ParseError::Success);
582 return make_error_code(ParseError::Unsuitable);
585 std::string configurationAsText(const FormatStyle &Style) {
586 std::string Text;
587 llvm::raw_string_ostream Stream(Text);
588 llvm::yaml::Output Output(Stream);
589 // We use the same mapping method for input and output, so we need a non-const
590 // reference here.
591 FormatStyle NonConstStyle = Style;
592 Output << NonConstStyle;
593 return Stream.str();
596 namespace {
598 class FormatTokenLexer {
599 public:
600 FormatTokenLexer(SourceManager &SourceMgr, FileID ID, FormatStyle &Style,
601 encoding::Encoding Encoding)
602 : FormatTok(nullptr), IsFirstToken(true), GreaterStashed(false),
603 Column(0), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID),
604 Style(Style), IdentTable(getFormattingLangOpts(Style)),
605 Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0),
606 FormattingDisabled(false) {
607 Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr,
608 getFormattingLangOpts(Style)));
609 Lex->SetKeepWhitespaceMode(true);
611 for (const std::string &ForEachMacro : Style.ForEachMacros)
612 ForEachMacros.push_back(&IdentTable.get(ForEachMacro));
613 std::sort(ForEachMacros.begin(), ForEachMacros.end());
616 ArrayRef<FormatToken *> lex() {
617 assert(Tokens.empty());
618 assert(FirstInLineIndex == 0);
619 do {
620 Tokens.push_back(getNextToken());
621 tryMergePreviousTokens();
622 if (Tokens.back()->NewlinesBefore > 0)
623 FirstInLineIndex = Tokens.size() - 1;
624 } while (Tokens.back()->Tok.isNot(tok::eof));
625 return Tokens;
628 const AdditionalKeywords &getKeywords() { return Keywords; }
630 private:
631 void tryMergePreviousTokens() {
632 if (tryMerge_TMacro())
633 return;
634 if (tryMergeConflictMarkers())
635 return;
637 if (Style.Language == FormatStyle::LK_JavaScript) {
638 if (tryMergeJSRegexLiteral())
639 return;
640 if (tryMergeEscapeSequence())
641 return;
643 static tok::TokenKind JSIdentity[] = { tok::equalequal, tok::equal };
644 static tok::TokenKind JSNotIdentity[] = { tok::exclaimequal, tok::equal };
645 static tok::TokenKind JSShiftEqual[] = { tok::greater, tok::greater,
646 tok::greaterequal };
647 static tok::TokenKind JSRightArrow[] = { tok::equal, tok::greater };
648 // FIXME: We probably need to change token type to mimic operator with the
649 // correct priority.
650 if (tryMergeTokens(JSIdentity))
651 return;
652 if (tryMergeTokens(JSNotIdentity))
653 return;
654 if (tryMergeTokens(JSShiftEqual))
655 return;
656 if (tryMergeTokens(JSRightArrow))
657 return;
661 bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds) {
662 if (Tokens.size() < Kinds.size())
663 return false;
665 SmallVectorImpl<FormatToken *>::const_iterator First =
666 Tokens.end() - Kinds.size();
667 if (!First[0]->is(Kinds[0]))
668 return false;
669 unsigned AddLength = 0;
670 for (unsigned i = 1; i < Kinds.size(); ++i) {
671 if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
672 First[i]->WhitespaceRange.getEnd())
673 return false;
674 AddLength += First[i]->TokenText.size();
676 Tokens.resize(Tokens.size() - Kinds.size() + 1);
677 First[0]->TokenText = StringRef(First[0]->TokenText.data(),
678 First[0]->TokenText.size() + AddLength);
679 First[0]->ColumnWidth += AddLength;
680 return true;
683 // Tries to merge an escape sequence, i.e. a "\\" and the following
684 // character. Use e.g. inside JavaScript regex literals.
685 bool tryMergeEscapeSequence() {
686 if (Tokens.size() < 2)
687 return false;
688 FormatToken *Previous = Tokens[Tokens.size() - 2];
689 if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\")
690 return false;
691 ++Previous->ColumnWidth;
692 StringRef Text = Previous->TokenText;
693 Previous->TokenText = StringRef(Text.data(), Text.size() + 1);
694 resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1);
695 Tokens.resize(Tokens.size() - 1);
696 Column = Previous->OriginalColumn + Previous->ColumnWidth;
697 return true;
700 // Try to determine whether the current token ends a JavaScript regex literal.
701 // We heuristically assume that this is a regex literal if we find two
702 // unescaped slashes on a line and the token before the first slash is one of
703 // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
704 // a division.
705 bool tryMergeJSRegexLiteral() {
706 if (Tokens.size() < 2)
707 return false;
708 // If a regex literal ends in "\//", this gets represented by an unknown
709 // token "\" and a comment.
710 bool MightEndWithEscapedSlash =
711 Tokens.back()->is(tok::comment) &&
712 Tokens.back()->TokenText.startswith("//") &&
713 Tokens[Tokens.size() - 2]->TokenText == "\\";
714 if (!MightEndWithEscapedSlash &&
715 (Tokens.back()->isNot(tok::slash) ||
716 (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
717 Tokens[Tokens.size() - 2]->TokenText == "\\")))
718 return false;
719 unsigned TokenCount = 0;
720 unsigned LastColumn = Tokens.back()->OriginalColumn;
721 for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
722 ++TokenCount;
723 if (I[0]->is(tok::slash) && I + 1 != E &&
724 (I[1]->isOneOf(tok::l_paren, tok::semi, tok::l_brace, tok::r_brace,
725 tok::exclaim, tok::l_square, tok::colon, tok::comma,
726 tok::question, tok::kw_return) ||
727 I[1]->isBinaryOperator())) {
728 if (MightEndWithEscapedSlash) {
729 // This regex literal ends in '\//'. Skip past the '//' of the last
730 // token and re-start lexing from there.
731 SourceLocation Loc = Tokens.back()->Tok.getLocation();
732 resetLexer(SourceMgr.getFileOffset(Loc) + 2);
734 Tokens.resize(Tokens.size() - TokenCount);
735 Tokens.back()->Tok.setKind(tok::unknown);
736 Tokens.back()->Type = TT_RegexLiteral;
737 Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
738 return true;
741 // There can't be a newline inside a regex literal.
742 if (I[0]->NewlinesBefore > 0)
743 return false;
745 return false;
748 bool tryMerge_TMacro() {
749 if (Tokens.size() < 4)
750 return false;
751 FormatToken *Last = Tokens.back();
752 if (!Last->is(tok::r_paren))
753 return false;
755 FormatToken *String = Tokens[Tokens.size() - 2];
756 if (!String->is(tok::string_literal) || String->IsMultiline)
757 return false;
759 if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
760 return false;
762 FormatToken *Macro = Tokens[Tokens.size() - 4];
763 if (Macro->TokenText != "_T")
764 return false;
766 const char *Start = Macro->TokenText.data();
767 const char *End = Last->TokenText.data() + Last->TokenText.size();
768 String->TokenText = StringRef(Start, End - Start);
769 String->IsFirst = Macro->IsFirst;
770 String->LastNewlineOffset = Macro->LastNewlineOffset;
771 String->WhitespaceRange = Macro->WhitespaceRange;
772 String->OriginalColumn = Macro->OriginalColumn;
773 String->ColumnWidth = encoding::columnWidthWithTabs(
774 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
776 Tokens.pop_back();
777 Tokens.pop_back();
778 Tokens.pop_back();
779 Tokens.back() = String;
780 return true;
783 bool tryMergeConflictMarkers() {
784 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))
785 return false;
787 // Conflict lines look like:
788 // <marker> <text from the vcs>
789 // For example:
790 // >>>>>>> /file/in/file/system at revision 1234
792 // We merge all tokens in a line that starts with a conflict marker
793 // into a single token with a special token type that the unwrapped line
794 // parser will use to correctly rebuild the underlying code.
796 FileID ID;
797 // Get the position of the first token in the line.
798 unsigned FirstInLineOffset;
799 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(
800 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());
801 StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer();
802 // Calculate the offset of the start of the current line.
803 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);
804 if (LineOffset == StringRef::npos) {
805 LineOffset = 0;
806 } else {
807 ++LineOffset;
810 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);
811 StringRef LineStart;
812 if (FirstSpace == StringRef::npos) {
813 LineStart = Buffer.substr(LineOffset);
814 } else {
815 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);
818 TokenType Type = TT_Unknown;
819 if (LineStart == "<<<<<<<" || LineStart == ">>>>") {
820 Type = TT_ConflictStart;
821 } else if (LineStart == "|||||||" || LineStart == "=======" ||
822 LineStart == "====") {
823 Type = TT_ConflictAlternative;
824 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {
825 Type = TT_ConflictEnd;
828 if (Type != TT_Unknown) {
829 FormatToken *Next = Tokens.back();
831 Tokens.resize(FirstInLineIndex + 1);
832 // We do not need to build a complete token here, as we will skip it
833 // during parsing anyway (as we must not touch whitespace around conflict
834 // markers).
835 Tokens.back()->Type = Type;
836 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);
838 Tokens.push_back(Next);
839 return true;
842 return false;
845 FormatToken *getNextToken() {
846 if (GreaterStashed) {
847 // Create a synthesized second '>' token.
848 // FIXME: Increment Column and set OriginalColumn.
849 Token Greater = FormatTok->Tok;
850 FormatTok = new (Allocator.Allocate()) FormatToken;
851 FormatTok->Tok = Greater;
852 SourceLocation GreaterLocation =
853 FormatTok->Tok.getLocation().getLocWithOffset(1);
854 FormatTok->WhitespaceRange =
855 SourceRange(GreaterLocation, GreaterLocation);
856 FormatTok->TokenText = ">";
857 FormatTok->ColumnWidth = 1;
858 GreaterStashed = false;
859 return FormatTok;
862 FormatTok = new (Allocator.Allocate()) FormatToken;
863 readRawToken(*FormatTok);
864 SourceLocation WhitespaceStart =
865 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
866 FormatTok->IsFirst = IsFirstToken;
867 IsFirstToken = false;
869 // Consume and record whitespace until we find a significant token.
870 unsigned WhitespaceLength = TrailingWhitespace;
871 while (FormatTok->Tok.is(tok::unknown)) {
872 for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
873 switch (FormatTok->TokenText[i]) {
874 case '\n':
875 ++FormatTok->NewlinesBefore;
876 // FIXME: This is technically incorrect, as it could also
877 // be a literal backslash at the end of the line.
878 if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
879 (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
880 FormatTok->TokenText[i - 2] != '\\')))
881 FormatTok->HasUnescapedNewline = true;
882 FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
883 Column = 0;
884 break;
885 case '\r':
886 case '\f':
887 case '\v':
888 Column = 0;
889 break;
890 case ' ':
891 ++Column;
892 break;
893 case '\t':
894 Column += Style.TabWidth - Column % Style.TabWidth;
895 break;
896 case '\\':
897 if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
898 FormatTok->TokenText[i + 1] != '\n'))
899 FormatTok->Type = TT_ImplicitStringLiteral;
900 break;
901 default:
902 FormatTok->Type = TT_ImplicitStringLiteral;
903 ++Column;
904 break;
908 if (FormatTok->is(TT_ImplicitStringLiteral))
909 break;
910 WhitespaceLength += FormatTok->Tok.getLength();
912 readRawToken(*FormatTok);
915 // In case the token starts with escaped newlines, we want to
916 // take them into account as whitespace - this pattern is quite frequent
917 // in macro definitions.
918 // FIXME: Add a more explicit test.
919 while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
920 FormatTok->TokenText[1] == '\n') {
921 ++FormatTok->NewlinesBefore;
922 WhitespaceLength += 2;
923 Column = 0;
924 FormatTok->TokenText = FormatTok->TokenText.substr(2);
927 FormatTok->WhitespaceRange = SourceRange(
928 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
930 FormatTok->OriginalColumn = Column;
932 TrailingWhitespace = 0;
933 if (FormatTok->Tok.is(tok::comment)) {
934 // FIXME: Add the trimmed whitespace to Column.
935 StringRef UntrimmedText = FormatTok->TokenText;
936 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
937 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
938 } else if (FormatTok->Tok.is(tok::raw_identifier)) {
939 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
940 FormatTok->Tok.setIdentifierInfo(&Info);
941 FormatTok->Tok.setKind(Info.getTokenID());
942 if (Style.Language == FormatStyle::LK_Java &&
943 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) {
944 FormatTok->Tok.setKind(tok::identifier);
945 FormatTok->Tok.setIdentifierInfo(nullptr);
947 } else if (FormatTok->Tok.is(tok::greatergreater)) {
948 FormatTok->Tok.setKind(tok::greater);
949 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
950 GreaterStashed = true;
953 // Now FormatTok is the next non-whitespace token.
955 StringRef Text = FormatTok->TokenText;
956 size_t FirstNewlinePos = Text.find('\n');
957 if (FirstNewlinePos == StringRef::npos) {
958 // FIXME: ColumnWidth actually depends on the start column, we need to
959 // take this into account when the token is moved.
960 FormatTok->ColumnWidth =
961 encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
962 Column += FormatTok->ColumnWidth;
963 } else {
964 FormatTok->IsMultiline = true;
965 // FIXME: ColumnWidth actually depends on the start column, we need to
966 // take this into account when the token is moved.
967 FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
968 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
970 // The last line of the token always starts in column 0.
971 // Thus, the length can be precomputed even in the presence of tabs.
972 FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
973 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
974 Encoding);
975 Column = FormatTok->LastLineColumnWidth;
978 FormatTok->IsForEachMacro =
979 std::binary_search(ForEachMacros.begin(), ForEachMacros.end(),
980 FormatTok->Tok.getIdentifierInfo());
982 return FormatTok;
985 FormatToken *FormatTok;
986 bool IsFirstToken;
987 bool GreaterStashed;
988 unsigned Column;
989 unsigned TrailingWhitespace;
990 std::unique_ptr<Lexer> Lex;
991 SourceManager &SourceMgr;
992 FileID ID;
993 FormatStyle &Style;
994 IdentifierTable IdentTable;
995 AdditionalKeywords Keywords;
996 encoding::Encoding Encoding;
997 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
998 // Index (in 'Tokens') of the last token that starts a new line.
999 unsigned FirstInLineIndex;
1000 SmallVector<FormatToken *, 16> Tokens;
1001 SmallVector<IdentifierInfo *, 8> ForEachMacros;
1003 bool FormattingDisabled;
1005 void readRawToken(FormatToken &Tok) {
1006 Lex->LexFromRawLexer(Tok.Tok);
1007 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1008 Tok.Tok.getLength());
1009 // For formatting, treat unterminated string literals like normal string
1010 // literals.
1011 if (Tok.is(tok::unknown)) {
1012 if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') {
1013 Tok.Tok.setKind(tok::string_literal);
1014 Tok.IsUnterminatedLiteral = true;
1015 } else if (Style.Language == FormatStyle::LK_JavaScript &&
1016 Tok.TokenText == "''") {
1017 Tok.Tok.setKind(tok::char_constant);
1021 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" ||
1022 Tok.TokenText == "/* clang-format on */")) {
1023 FormattingDisabled = false;
1026 Tok.Finalized = FormattingDisabled;
1028 if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" ||
1029 Tok.TokenText == "/* clang-format off */")) {
1030 FormattingDisabled = true;
1034 void resetLexer(unsigned Offset) {
1035 StringRef Buffer = SourceMgr.getBufferData(ID);
1036 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID),
1037 getFormattingLangOpts(Style), Buffer.begin(),
1038 Buffer.begin() + Offset, Buffer.end()));
1039 Lex->SetKeepWhitespaceMode(true);
1043 static StringRef getLanguageName(FormatStyle::LanguageKind Language) {
1044 switch (Language) {
1045 case FormatStyle::LK_Cpp:
1046 return "C++";
1047 case FormatStyle::LK_Java:
1048 return "Java";
1049 case FormatStyle::LK_JavaScript:
1050 return "JavaScript";
1051 case FormatStyle::LK_Proto:
1052 return "Proto";
1053 default:
1054 return "Unknown";
1058 class Formatter : public UnwrappedLineConsumer {
1059 public:
1060 Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
1061 ArrayRef<CharSourceRange> Ranges)
1062 : Style(Style), ID(ID), SourceMgr(SourceMgr),
1063 Whitespaces(SourceMgr, Style,
1064 inputUsesCRLF(SourceMgr.getBufferData(ID))),
1065 Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1066 Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
1067 DEBUG(llvm::dbgs() << "File encoding: "
1068 << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1069 : "unknown")
1070 << "\n");
1071 DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
1072 << "\n");
1075 tooling::Replacements format() {
1076 tooling::Replacements Result;
1077 FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
1079 UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
1080 *this);
1081 bool StructuralError = Parser.parse();
1082 assert(UnwrappedLines.rbegin()->empty());
1083 for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1084 ++Run) {
1085 DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1086 SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1087 for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1088 AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1090 tooling::Replacements RunResult =
1091 format(AnnotatedLines, StructuralError, Tokens);
1092 DEBUG({
1093 llvm::dbgs() << "Replacements for run " << Run << ":\n";
1094 for (tooling::Replacements::iterator I = RunResult.begin(),
1095 E = RunResult.end();
1096 I != E; ++I) {
1097 llvm::dbgs() << I->toString() << "\n";
1100 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1101 delete AnnotatedLines[i];
1103 Result.insert(RunResult.begin(), RunResult.end());
1104 Whitespaces.reset();
1106 return Result;
1109 tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1110 bool StructuralError, FormatTokenLexer &Tokens) {
1111 TokenAnnotator Annotator(Style, Tokens.getKeywords());
1112 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1113 Annotator.annotate(*AnnotatedLines[i]);
1115 deriveLocalStyle(AnnotatedLines);
1116 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1117 Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1119 computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
1121 Annotator.setCommentLineLevels(AnnotatedLines);
1122 ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
1123 Whitespaces, Encoding,
1124 BinPackInconclusiveFunctions);
1125 UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style);
1126 Formatter.format(AnnotatedLines, /*DryRun=*/false);
1127 return Whitespaces.generateReplacements();
1130 private:
1131 // Determines which lines are affected by the SourceRanges given as input.
1132 // Returns \c true if at least one line between I and E or one of their
1133 // children is affected.
1134 bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *>::iterator I,
1135 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1136 bool SomeLineAffected = false;
1137 const AnnotatedLine *PreviousLine = nullptr;
1138 while (I != E) {
1139 AnnotatedLine *Line = *I;
1140 Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
1142 // If a line is part of a preprocessor directive, it needs to be formatted
1143 // if any token within the directive is affected.
1144 if (Line->InPPDirective) {
1145 FormatToken *Last = Line->Last;
1146 SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1;
1147 while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) {
1148 Last = (*PPEnd)->Last;
1149 ++PPEnd;
1152 if (affectsTokenRange(*Line->First, *Last,
1153 /*IncludeLeadingNewlines=*/false)) {
1154 SomeLineAffected = true;
1155 markAllAsAffected(I, PPEnd);
1157 I = PPEnd;
1158 continue;
1161 if (nonPPLineAffected(Line, PreviousLine))
1162 SomeLineAffected = true;
1164 PreviousLine = Line;
1165 ++I;
1167 return SomeLineAffected;
1170 // Determines whether 'Line' is affected by the SourceRanges given as input.
1171 // Returns \c true if line or one if its children is affected.
1172 bool nonPPLineAffected(AnnotatedLine *Line,
1173 const AnnotatedLine *PreviousLine) {
1174 bool SomeLineAffected = false;
1175 Line->ChildrenAffected =
1176 computeAffectedLines(Line->Children.begin(), Line->Children.end());
1177 if (Line->ChildrenAffected)
1178 SomeLineAffected = true;
1180 // Stores whether one of the line's tokens is directly affected.
1181 bool SomeTokenAffected = false;
1182 // Stores whether we need to look at the leading newlines of the next token
1183 // in order to determine whether it was affected.
1184 bool IncludeLeadingNewlines = false;
1186 // Stores whether the first child line of any of this line's tokens is
1187 // affected.
1188 bool SomeFirstChildAffected = false;
1190 for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
1191 // Determine whether 'Tok' was affected.
1192 if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
1193 SomeTokenAffected = true;
1195 // Determine whether the first child of 'Tok' was affected.
1196 if (!Tok->Children.empty() && Tok->Children.front()->Affected)
1197 SomeFirstChildAffected = true;
1199 IncludeLeadingNewlines = Tok->Children.empty();
1202 // Was this line moved, i.e. has it previously been on the same line as an
1203 // affected line?
1204 bool LineMoved = PreviousLine && PreviousLine->Affected &&
1205 Line->First->NewlinesBefore == 0;
1207 bool IsContinuedComment =
1208 Line->First->is(tok::comment) && Line->First->Next == nullptr &&
1209 Line->First->NewlinesBefore < 2 && PreviousLine &&
1210 PreviousLine->Affected && PreviousLine->Last->is(tok::comment);
1212 if (SomeTokenAffected || SomeFirstChildAffected || LineMoved ||
1213 IsContinuedComment) {
1214 Line->Affected = true;
1215 SomeLineAffected = true;
1217 return SomeLineAffected;
1220 // Marks all lines between I and E as well as all their children as affected.
1221 void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
1222 SmallVectorImpl<AnnotatedLine *>::iterator E) {
1223 while (I != E) {
1224 (*I)->Affected = true;
1225 markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
1226 ++I;
1230 // Returns true if the range from 'First' to 'Last' intersects with one of the
1231 // input ranges.
1232 bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
1233 bool IncludeLeadingNewlines) {
1234 SourceLocation Start = First.WhitespaceRange.getBegin();
1235 if (!IncludeLeadingNewlines)
1236 Start = Start.getLocWithOffset(First.LastNewlineOffset);
1237 SourceLocation End = Last.getStartOfNonWhitespace();
1238 End = End.getLocWithOffset(Last.TokenText.size());
1239 CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
1240 return affectsCharSourceRange(Range);
1243 // Returns true if one of the input ranges intersect the leading empty lines
1244 // before 'Tok'.
1245 bool affectsLeadingEmptyLines(const FormatToken &Tok) {
1246 CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
1247 Tok.WhitespaceRange.getBegin(),
1248 Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
1249 return affectsCharSourceRange(EmptyLineRange);
1252 // Returns true if 'Range' intersects with one of the input ranges.
1253 bool affectsCharSourceRange(const CharSourceRange &Range) {
1254 for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
1255 E = Ranges.end();
1256 I != E; ++I) {
1257 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
1258 !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
1259 return true;
1261 return false;
1264 static bool inputUsesCRLF(StringRef Text) {
1265 return Text.count('\r') * 2 > Text.count('\n');
1268 void
1269 deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1270 unsigned CountBoundToVariable = 0;
1271 unsigned CountBoundToType = 0;
1272 bool HasCpp03IncompatibleFormat = false;
1273 bool HasBinPackedFunction = false;
1274 bool HasOnePerLineFunction = false;
1275 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1276 if (!AnnotatedLines[i]->First->Next)
1277 continue;
1278 FormatToken *Tok = AnnotatedLines[i]->First->Next;
1279 while (Tok->Next) {
1280 if (Tok->is(TT_PointerOrReference)) {
1281 bool SpacesBefore =
1282 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1283 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1284 Tok->Next->WhitespaceRange.getEnd();
1285 if (SpacesBefore && !SpacesAfter)
1286 ++CountBoundToVariable;
1287 else if (!SpacesBefore && SpacesAfter)
1288 ++CountBoundToType;
1291 if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1292 if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
1293 HasCpp03IncompatibleFormat = true;
1294 if (Tok->is(TT_TemplateCloser) &&
1295 Tok->Previous->is(TT_TemplateCloser))
1296 HasCpp03IncompatibleFormat = true;
1299 if (Tok->PackingKind == PPK_BinPacked)
1300 HasBinPackedFunction = true;
1301 if (Tok->PackingKind == PPK_OnePerLine)
1302 HasOnePerLineFunction = true;
1304 Tok = Tok->Next;
1307 if (Style.DerivePointerAlignment) {
1308 if (CountBoundToType > CountBoundToVariable)
1309 Style.PointerAlignment = FormatStyle::PAS_Left;
1310 else if (CountBoundToType < CountBoundToVariable)
1311 Style.PointerAlignment = FormatStyle::PAS_Right;
1313 if (Style.Standard == FormatStyle::LS_Auto) {
1314 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1315 : FormatStyle::LS_Cpp03;
1317 BinPackInconclusiveFunctions =
1318 HasBinPackedFunction || !HasOnePerLineFunction;
1321 void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
1322 assert(!UnwrappedLines.empty());
1323 UnwrappedLines.back().push_back(TheLine);
1326 void finishRun() override {
1327 UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1330 FormatStyle Style;
1331 FileID ID;
1332 SourceManager &SourceMgr;
1333 WhitespaceManager Whitespaces;
1334 SmallVector<CharSourceRange, 8> Ranges;
1335 SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1337 encoding::Encoding Encoding;
1338 bool BinPackInconclusiveFunctions;
1341 } // end anonymous namespace
1343 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1344 SourceManager &SourceMgr,
1345 ArrayRef<CharSourceRange> Ranges) {
1346 if (Style.DisableFormat)
1347 return tooling::Replacements();
1348 return reformat(Style, SourceMgr,
1349 SourceMgr.getFileID(Lex.getSourceLocation()), Ranges);
1352 tooling::Replacements reformat(const FormatStyle &Style,
1353 SourceManager &SourceMgr, FileID ID,
1354 ArrayRef<CharSourceRange> Ranges) {
1355 if (Style.DisableFormat)
1356 return tooling::Replacements();
1357 Formatter formatter(Style, SourceMgr, ID, Ranges);
1358 return formatter.format();
1361 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1362 ArrayRef<tooling::Range> Ranges,
1363 StringRef FileName) {
1364 if (Style.DisableFormat)
1365 return tooling::Replacements();
1367 FileManager Files((FileSystemOptions()));
1368 DiagnosticsEngine Diagnostics(
1369 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1370 new DiagnosticOptions);
1371 SourceManager SourceMgr(Diagnostics, Files);
1372 std::unique_ptr<llvm::MemoryBuffer> Buf =
1373 llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1374 const clang::FileEntry *Entry =
1375 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1376 SourceMgr.overrideFileContents(Entry, std::move(Buf));
1377 FileID ID =
1378 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1379 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1380 std::vector<CharSourceRange> CharRanges;
1381 for (const tooling::Range &Range : Ranges) {
1382 SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
1383 SourceLocation End = Start.getLocWithOffset(Range.getLength());
1384 CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1386 return reformat(Style, SourceMgr, ID, CharRanges);
1389 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
1390 LangOptions LangOpts;
1391 LangOpts.CPlusPlus = 1;
1392 LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1393 LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1394 LangOpts.LineComment = 1;
1395 bool AlternativeOperators = Style.Language != FormatStyle::LK_JavaScript &&
1396 Style.Language != FormatStyle::LK_Java;
1397 LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0;
1398 LangOpts.Bool = 1;
1399 LangOpts.ObjC1 = 1;
1400 LangOpts.ObjC2 = 1;
1401 return LangOpts;
1404 const char *StyleOptionHelpDescription =
1405 "Coding style, currently supports:\n"
1406 " LLVM, Google, Chromium, Mozilla, WebKit.\n"
1407 "Use -style=file to load style configuration from\n"
1408 ".clang-format file located in one of the parent\n"
1409 "directories of the source file (or current\n"
1410 "directory for stdin).\n"
1411 "Use -style=\"{key: value, ...}\" to set specific\n"
1412 "parameters, e.g.:\n"
1413 " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1415 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
1416 if (FileName.endswith(".java")) {
1417 return FormatStyle::LK_Java;
1418 } else if (FileName.endswith_lower(".js")) {
1419 return FormatStyle::LK_JavaScript;
1420 } else if (FileName.endswith_lower(".proto") ||
1421 FileName.endswith_lower(".protodevel")) {
1422 return FormatStyle::LK_Proto;
1424 return FormatStyle::LK_Cpp;
1427 FormatStyle getStyle(StringRef StyleName, StringRef FileName,
1428 StringRef FallbackStyle) {
1429 FormatStyle Style = getLLVMStyle();
1430 Style.Language = getLanguageByFileName(FileName);
1431 if (!getPredefinedStyle(FallbackStyle, Style.Language, &Style)) {
1432 llvm::errs() << "Invalid fallback style \"" << FallbackStyle
1433 << "\" using LLVM style\n";
1434 return Style;
1437 if (StyleName.startswith("{")) {
1438 // Parse YAML/JSON style from the command line.
1439 if (std::error_code ec = parseConfiguration(StyleName, &Style)) {
1440 llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1441 << FallbackStyle << " style\n";
1443 return Style;
1446 if (!StyleName.equals_lower("file")) {
1447 if (!getPredefinedStyle(StyleName, Style.Language, &Style))
1448 llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1449 << " style\n";
1450 return Style;
1453 // Look for .clang-format/_clang-format file in the file's parent directories.
1454 SmallString<128> UnsuitableConfigFiles;
1455 SmallString<128> Path(FileName);
1456 llvm::sys::fs::make_absolute(Path);
1457 for (StringRef Directory = Path; !Directory.empty();
1458 Directory = llvm::sys::path::parent_path(Directory)) {
1459 if (!llvm::sys::fs::is_directory(Directory))
1460 continue;
1461 SmallString<128> ConfigFile(Directory);
1463 llvm::sys::path::append(ConfigFile, ".clang-format");
1464 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1465 bool IsFile = false;
1466 // Ignore errors from is_regular_file: we only need to know if we can read
1467 // the file or not.
1468 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1470 if (!IsFile) {
1471 // Try _clang-format too, since dotfiles are not commonly used on Windows.
1472 ConfigFile = Directory;
1473 llvm::sys::path::append(ConfigFile, "_clang-format");
1474 DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1475 llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1478 if (IsFile) {
1479 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1480 llvm::MemoryBuffer::getFile(ConfigFile.c_str());
1481 if (std::error_code EC = Text.getError()) {
1482 llvm::errs() << EC.message() << "\n";
1483 break;
1485 if (std::error_code ec =
1486 parseConfiguration(Text.get()->getBuffer(), &Style)) {
1487 if (ec == ParseError::Unsuitable) {
1488 if (!UnsuitableConfigFiles.empty())
1489 UnsuitableConfigFiles.append(", ");
1490 UnsuitableConfigFiles.append(ConfigFile);
1491 continue;
1493 llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1494 << "\n";
1495 break;
1497 DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1498 return Style;
1501 llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1502 << " style\n";
1503 if (!UnsuitableConfigFiles.empty()) {
1504 llvm::errs() << "Configuration file(s) do(es) not support "
1505 << getLanguageName(Style.Language) << ": "
1506 << UnsuitableConfigFiles << "\n";
1508 return Style;
1511 } // namespace format
1512 } // namespace clang