1 //===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements an abstract TokenAnalyzer and associated helper
11 /// classes. TokenAnalyzer can be extended to generate replacements based on
12 /// an annotated and pre-processed token stream.
14 //===----------------------------------------------------------------------===//
16 #include "TokenAnalyzer.h"
17 #include "AffectedRangeManager.h"
19 #include "FormatToken.h"
20 #include "FormatTokenLexer.h"
21 #include "TokenAnnotator.h"
22 #include "UnwrappedLineParser.h"
23 #include "clang/Basic/Diagnostic.h"
24 #include "clang/Basic/DiagnosticOptions.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Format/Format.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Debug.h"
31 #include <type_traits>
33 #define DEBUG_TYPE "format-formatter"
38 // FIXME: Instead of printing the diagnostic we should store it and have a
39 // better way to return errors through the format APIs.
40 class FatalDiagnosticConsumer
: public DiagnosticConsumer
{
42 void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel
,
43 const Diagnostic
&Info
) override
{
44 if (DiagLevel
== DiagnosticsEngine::Fatal
) {
46 llvm::SmallVector
<char, 128> Message
;
47 Info
.FormatDiagnostic(Message
);
48 llvm::errs() << Message
<< "\n";
52 bool fatalError() const { return Fatal
; }
58 std::unique_ptr
<Environment
>
59 Environment::make(StringRef Code
, StringRef FileName
,
60 ArrayRef
<tooling::Range
> Ranges
, unsigned FirstStartColumn
,
61 unsigned NextStartColumn
, unsigned LastStartColumn
) {
62 auto Env
= std::make_unique
<Environment
>(Code
, FileName
, FirstStartColumn
,
63 NextStartColumn
, LastStartColumn
);
64 FatalDiagnosticConsumer Diags
;
65 Env
->SM
.getDiagnostics().setClient(&Diags
, /*ShouldOwnClient=*/false);
66 SourceLocation StartOfFile
= Env
->SM
.getLocForStartOfFile(Env
->ID
);
67 for (const tooling::Range
&Range
: Ranges
) {
68 SourceLocation Start
= StartOfFile
.getLocWithOffset(Range
.getOffset());
69 SourceLocation End
= Start
.getLocWithOffset(Range
.getLength());
70 Env
->CharRanges
.push_back(CharSourceRange::getCharRange(Start
, End
));
72 // Validate that we can get the buffer data without a fatal error.
73 Env
->SM
.getBufferData(Env
->ID
);
74 if (Diags
.fatalError())
79 Environment::Environment(StringRef Code
, StringRef FileName
,
80 unsigned FirstStartColumn
, unsigned NextStartColumn
,
81 unsigned LastStartColumn
)
82 : VirtualSM(new SourceManagerForFile(FileName
, Code
)), SM(VirtualSM
->get()),
83 ID(VirtualSM
->get().getMainFileID()), FirstStartColumn(FirstStartColumn
),
84 NextStartColumn(NextStartColumn
), LastStartColumn(LastStartColumn
) {}
86 TokenAnalyzer::TokenAnalyzer(const Environment
&Env
, const FormatStyle
&Style
)
87 : Style(Style
), Env(Env
),
88 AffectedRangeMgr(Env
.getSourceManager(), Env
.getCharRanges()),
90 Encoding(encoding::detectEncoding(
91 Env
.getSourceManager().getBufferData(Env
.getFileID()))) {
93 llvm::dbgs() << "File encoding: "
94 << (Encoding
== encoding::Encoding_UTF8
? "UTF8" : "unknown")
96 LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style
.Language
)
100 std::pair
<tooling::Replacements
, unsigned> TokenAnalyzer::process() {
101 tooling::Replacements Result
;
102 llvm::SpecificBumpPtrAllocator
<FormatToken
> Allocator
;
103 IdentifierTable
IdentTable(getFormattingLangOpts(Style
));
104 FormatTokenLexer
Lex(Env
.getSourceManager(), Env
.getFileID(),
105 Env
.getFirstStartColumn(), Style
, Encoding
, Allocator
,
108 ArrayRef
<FormatToken
*> Toks(Lex
.lex());
109 SmallVector
<FormatToken
*, 10> Tokens(Toks
.begin(), Toks
.end());
110 UnwrappedLineParser
Parser(Style
, Lex
.getKeywords(),
111 Env
.getFirstStartColumn(), Tokens
, *this);
113 assert(UnwrappedLines
.back().empty());
114 unsigned Penalty
= 0;
115 for (unsigned Run
= 0, RunE
= UnwrappedLines
.size(); Run
+ 1 != RunE
; ++Run
) {
116 const auto &Lines
= UnwrappedLines
[Run
];
117 LLVM_DEBUG(llvm::dbgs() << "Run " << Run
<< "...\n");
118 SmallVector
<AnnotatedLine
*, 16> AnnotatedLines
;
119 AnnotatedLines
.reserve(Lines
.size());
121 TokenAnnotator
Annotator(Style
, Lex
.getKeywords());
122 for (const UnwrappedLine
&Line
: Lines
) {
123 AnnotatedLines
.push_back(new AnnotatedLine(Line
));
124 Annotator
.annotate(*AnnotatedLines
.back());
127 std::pair
<tooling::Replacements
, unsigned> RunResult
=
128 analyze(Annotator
, AnnotatedLines
, Lex
);
131 llvm::dbgs() << "Replacements for run " << Run
<< ":\n";
132 for (const tooling::Replacement
&Fix
: RunResult
.first
)
133 llvm::dbgs() << Fix
.toString() << "\n";
135 for (AnnotatedLine
*Line
: AnnotatedLines
)
138 Penalty
+= RunResult
.second
;
139 for (const auto &R
: RunResult
.first
) {
140 auto Err
= Result
.add(R
);
141 // FIXME: better error handling here. For now, simply return an empty
142 // Replacements to indicate failure.
144 llvm::errs() << llvm::toString(std::move(Err
)) << "\n";
145 return {tooling::Replacements(), 0};
149 return {Result
, Penalty
};
152 void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine
&TheLine
) {
153 assert(!UnwrappedLines
.empty());
154 UnwrappedLines
.back().push_back(TheLine
);
157 void TokenAnalyzer::finishRun() {
158 UnwrappedLines
.push_back(SmallVector
<UnwrappedLine
, 16>());
161 } // end namespace format
162 } // end namespace clang