1 //===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements an abstract TokenAnalyzer and associated helper
11 /// classes. TokenAnalyzer can be extended to generate replacements based on
12 /// an annotated and pre-processed token stream.
14 //===----------------------------------------------------------------------===//
16 #include "TokenAnalyzer.h"
17 #include "AffectedRangeManager.h"
19 #include "FormatToken.h"
20 #include "FormatTokenLexer.h"
21 #include "TokenAnnotator.h"
22 #include "UnwrappedLineParser.h"
23 #include "clang/Basic/Diagnostic.h"
24 #include "clang/Basic/DiagnosticOptions.h"
25 #include "clang/Basic/FileManager.h"
26 #include "clang/Basic/SourceManager.h"
27 #include "clang/Format/Format.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Debug.h"
31 #include <type_traits>
33 #define DEBUG_TYPE "format-formatter"
38 // FIXME: Instead of printing the diagnostic we should store it and have a
39 // better way to return errors through the format APIs.
40 class FatalDiagnosticConsumer
: public DiagnosticConsumer
{
42 void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel
,
43 const Diagnostic
&Info
) override
{
44 if (DiagLevel
== DiagnosticsEngine::Fatal
) {
46 llvm::SmallVector
<char, 128> Message
;
47 Info
.FormatDiagnostic(Message
);
48 llvm::errs() << Message
<< "\n";
52 bool fatalError() const { return Fatal
; }
58 std::unique_ptr
<Environment
>
59 Environment::make(StringRef Code
, StringRef FileName
,
60 ArrayRef
<tooling::Range
> Ranges
, unsigned FirstStartColumn
,
61 unsigned NextStartColumn
, unsigned LastStartColumn
) {
62 auto Env
= std::make_unique
<Environment
>(Code
, FileName
, FirstStartColumn
,
63 NextStartColumn
, LastStartColumn
);
64 FatalDiagnosticConsumer Diags
;
65 Env
->SM
.getDiagnostics().setClient(&Diags
, /*ShouldOwnClient=*/false);
66 SourceLocation StartOfFile
= Env
->SM
.getLocForStartOfFile(Env
->ID
);
67 for (const tooling::Range
&Range
: Ranges
) {
68 SourceLocation Start
= StartOfFile
.getLocWithOffset(Range
.getOffset());
69 SourceLocation End
= Start
.getLocWithOffset(Range
.getLength());
70 Env
->CharRanges
.push_back(CharSourceRange::getCharRange(Start
, End
));
72 // Validate that we can get the buffer data without a fatal error.
73 Env
->SM
.getBufferData(Env
->ID
);
74 if (Diags
.fatalError())
79 Environment::Environment(StringRef Code
, StringRef FileName
,
80 unsigned FirstStartColumn
, unsigned NextStartColumn
,
81 unsigned LastStartColumn
)
82 : VirtualSM(new SourceManagerForFile(FileName
, Code
)), SM(VirtualSM
->get()),
83 ID(VirtualSM
->get().getMainFileID()), FirstStartColumn(FirstStartColumn
),
84 NextStartColumn(NextStartColumn
), LastStartColumn(LastStartColumn
) {}
86 TokenAnalyzer::TokenAnalyzer(const Environment
&Env
, const FormatStyle
&Style
)
87 : Style(Style
), Env(Env
),
88 AffectedRangeMgr(Env
.getSourceManager(), Env
.getCharRanges()),
90 Encoding(encoding::detectEncoding(
91 Env
.getSourceManager().getBufferData(Env
.getFileID()))) {
93 llvm::dbgs() << "File encoding: "
94 << (Encoding
== encoding::Encoding_UTF8
? "UTF8" : "unknown")
96 LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style
.Language
)
100 std::pair
<tooling::Replacements
, unsigned>
101 TokenAnalyzer::process(bool SkipAnnotation
) {
102 tooling::Replacements Result
;
103 llvm::SpecificBumpPtrAllocator
<FormatToken
> Allocator
;
104 IdentifierTable
IdentTable(getFormattingLangOpts(Style
));
105 FormatTokenLexer
Lex(Env
.getSourceManager(), Env
.getFileID(),
106 Env
.getFirstStartColumn(), Style
, Encoding
, Allocator
,
109 ArrayRef
<FormatToken
*> Toks(Lex
.lex());
110 SmallVector
<FormatToken
*, 10> Tokens(Toks
.begin(), Toks
.end());
111 UnwrappedLineParser
Parser(Style
, Lex
.getKeywords(),
112 Env
.getFirstStartColumn(), Tokens
, *this);
114 assert(UnwrappedLines
.back().empty());
115 unsigned Penalty
= 0;
116 for (unsigned Run
= 0, RunE
= UnwrappedLines
.size(); Run
+ 1 != RunE
; ++Run
) {
117 const auto &Lines
= UnwrappedLines
[Run
];
118 LLVM_DEBUG(llvm::dbgs() << "Run " << Run
<< "...\n");
119 SmallVector
<AnnotatedLine
*, 16> AnnotatedLines
;
120 AnnotatedLines
.reserve(Lines
.size());
122 TokenAnnotator
Annotator(Style
, Lex
.getKeywords());
123 for (const UnwrappedLine
&Line
: Lines
) {
124 AnnotatedLines
.push_back(new AnnotatedLine(Line
));
126 Annotator
.annotate(*AnnotatedLines
.back());
129 std::pair
<tooling::Replacements
, unsigned> RunResult
=
130 analyze(Annotator
, AnnotatedLines
, Lex
);
133 llvm::dbgs() << "Replacements for run " << Run
<< ":\n";
134 for (const tooling::Replacement
&Fix
: RunResult
.first
)
135 llvm::dbgs() << Fix
.toString() << "\n";
137 for (AnnotatedLine
*Line
: AnnotatedLines
)
140 Penalty
+= RunResult
.second
;
141 for (const auto &R
: RunResult
.first
) {
142 auto Err
= Result
.add(R
);
143 // FIXME: better error handling here. For now, simply return an empty
144 // Replacements to indicate failure.
146 llvm::errs() << llvm::toString(std::move(Err
)) << "\n";
147 return {tooling::Replacements(), 0};
151 return {Result
, Penalty
};
154 void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine
&TheLine
) {
155 assert(!UnwrappedLines
.empty());
156 UnwrappedLines
.back().push_back(TheLine
);
159 void TokenAnalyzer::finishRun() {
160 UnwrappedLines
.push_back(SmallVector
<UnwrappedLine
, 16>());
163 } // end namespace format
164 } // end namespace clang