1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SourceMgr class. This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
38 static const size_t TabStop
= 8;
40 unsigned SourceMgr::AddIncludeFile(const std::string
&Filename
,
42 std::string
&IncludedFile
) {
43 IncludedFile
= Filename
;
44 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> NewBufOrErr
=
45 MemoryBuffer::getFile(IncludedFile
);
47 // If the file didn't exist directly, see if it's in an include path.
48 for (unsigned i
= 0, e
= IncludeDirectories
.size(); i
!= e
&& !NewBufOrErr
;
51 IncludeDirectories
[i
] + sys::path::get_separator().data() + Filename
;
52 NewBufOrErr
= MemoryBuffer::getFile(IncludedFile
);
58 return AddNewSourceBuffer(std::move(*NewBufOrErr
), IncludeLoc
);
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc
) const {
62 for (unsigned i
= 0, e
= Buffers
.size(); i
!= e
; ++i
)
63 if (Loc
.getPointer() >= Buffers
[i
].Buffer
->getBufferStart() &&
64 // Use <= here so that a pointer to the null at the end of the buffer
65 // is included as part of the buffer.
66 Loc
.getPointer() <= Buffers
[i
].Buffer
->getBufferEnd())
72 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr
) const {
74 // Ensure OffsetCache is allocated and populated with offsets of all the
76 std::vector
<T
> *Offsets
= nullptr;
77 if (OffsetCache
.isNull()) {
78 Offsets
= new std::vector
<T
>();
79 OffsetCache
= Offsets
;
80 size_t Sz
= Buffer
->getBufferSize();
81 assert(Sz
<= std::numeric_limits
<T
>::max());
82 StringRef S
= Buffer
->getBuffer();
83 for (size_t N
= 0; N
< Sz
; ++N
) {
85 Offsets
->push_back(static_cast<T
>(N
));
89 Offsets
= OffsetCache
.get
<std::vector
<T
> *>();
92 const char *BufStart
= Buffer
->getBufferStart();
93 assert(Ptr
>= BufStart
&& Ptr
<= Buffer
->getBufferEnd());
94 ptrdiff_t PtrDiff
= Ptr
- BufStart
;
95 assert(PtrDiff
>= 0 && static_cast<size_t>(PtrDiff
) <= std::numeric_limits
<T
>::max());
96 T PtrOffset
= static_cast<T
>(PtrDiff
);
98 // std::lower_bound returns the first EOL offset that's not-less-than
99 // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on
100 // (including if PtrOffset refers to the EOL itself). If there's no such
101 // EOL, returns end().
102 auto EOL
= std::lower_bound(Offsets
->begin(), Offsets
->end(), PtrOffset
);
104 // Lines count from 1, so add 1 to the distance from the 0th line.
105 return (1 + (EOL
- Offsets
->begin()));
108 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer
&&Other
)
109 : Buffer(std::move(Other
.Buffer
)),
110 OffsetCache(Other
.OffsetCache
),
111 IncludeLoc(Other
.IncludeLoc
) {
112 Other
.OffsetCache
= nullptr;
115 SourceMgr::SrcBuffer::~SrcBuffer() {
116 if (!OffsetCache
.isNull()) {
117 if (OffsetCache
.is
<std::vector
<uint8_t>*>())
118 delete OffsetCache
.get
<std::vector
<uint8_t>*>();
119 else if (OffsetCache
.is
<std::vector
<uint16_t>*>())
120 delete OffsetCache
.get
<std::vector
<uint16_t>*>();
121 else if (OffsetCache
.is
<std::vector
<uint32_t>*>())
122 delete OffsetCache
.get
<std::vector
<uint32_t>*>();
124 delete OffsetCache
.get
<std::vector
<uint64_t>*>();
125 OffsetCache
= nullptr;
129 std::pair
<unsigned, unsigned>
130 SourceMgr::getLineAndColumn(SMLoc Loc
, unsigned BufferID
) const {
132 BufferID
= FindBufferContainingLoc(Loc
);
133 assert(BufferID
&& "Invalid Location!");
135 auto &SB
= getBufferInfo(BufferID
);
136 const char *Ptr
= Loc
.getPointer();
138 size_t Sz
= SB
.Buffer
->getBufferSize();
140 if (Sz
<= std::numeric_limits
<uint8_t>::max())
141 LineNo
= SB
.getLineNumber
<uint8_t>(Ptr
);
142 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
143 LineNo
= SB
.getLineNumber
<uint16_t>(Ptr
);
144 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
145 LineNo
= SB
.getLineNumber
<uint32_t>(Ptr
);
147 LineNo
= SB
.getLineNumber
<uint64_t>(Ptr
);
149 const char *BufStart
= SB
.Buffer
->getBufferStart();
150 size_t NewlineOffs
= StringRef(BufStart
, Ptr
-BufStart
).find_last_of("\n\r");
151 if (NewlineOffs
== StringRef::npos
) NewlineOffs
= ~(size_t)0;
152 return std::make_pair(LineNo
, Ptr
-BufStart
-NewlineOffs
);
155 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc
, raw_ostream
&OS
) const {
156 if (IncludeLoc
== SMLoc()) return; // Top of stack.
158 unsigned CurBuf
= FindBufferContainingLoc(IncludeLoc
);
159 assert(CurBuf
&& "Invalid or unspecified location!");
161 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
163 OS
<< "Included from "
164 << getBufferInfo(CurBuf
).Buffer
->getBufferIdentifier()
165 << ":" << FindLineNumber(IncludeLoc
, CurBuf
) << ":\n";
168 SMDiagnostic
SourceMgr::GetMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
170 ArrayRef
<SMRange
> Ranges
,
171 ArrayRef
<SMFixIt
> FixIts
) const {
172 // First thing to do: find the current buffer containing the specified
173 // location to pull out the source line.
174 SmallVector
<std::pair
<unsigned, unsigned>, 4> ColRanges
;
175 std::pair
<unsigned, unsigned> LineAndCol
;
176 StringRef BufferID
= "<unknown>";
180 unsigned CurBuf
= FindBufferContainingLoc(Loc
);
181 assert(CurBuf
&& "Invalid or unspecified location!");
183 const MemoryBuffer
*CurMB
= getMemoryBuffer(CurBuf
);
184 BufferID
= CurMB
->getBufferIdentifier();
186 // Scan backward to find the start of the line.
187 const char *LineStart
= Loc
.getPointer();
188 const char *BufStart
= CurMB
->getBufferStart();
189 while (LineStart
!= BufStart
&& LineStart
[-1] != '\n' &&
190 LineStart
[-1] != '\r')
193 // Get the end of the line.
194 const char *LineEnd
= Loc
.getPointer();
195 const char *BufEnd
= CurMB
->getBufferEnd();
196 while (LineEnd
!= BufEnd
&& LineEnd
[0] != '\n' && LineEnd
[0] != '\r')
198 LineStr
= std::string(LineStart
, LineEnd
);
200 // Convert any ranges to column ranges that only intersect the line of the
202 for (unsigned i
= 0, e
= Ranges
.size(); i
!= e
; ++i
) {
203 SMRange R
= Ranges
[i
];
204 if (!R
.isValid()) continue;
206 // If the line doesn't contain any part of the range, then ignore it.
207 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
210 // Ignore pieces of the range that go onto other lines.
211 if (R
.Start
.getPointer() < LineStart
)
212 R
.Start
= SMLoc::getFromPointer(LineStart
);
213 if (R
.End
.getPointer() > LineEnd
)
214 R
.End
= SMLoc::getFromPointer(LineEnd
);
216 // Translate from SMLoc ranges to column ranges.
217 // FIXME: Handle multibyte characters.
218 ColRanges
.push_back(std::make_pair(R
.Start
.getPointer()-LineStart
,
219 R
.End
.getPointer()-LineStart
));
222 LineAndCol
= getLineAndColumn(Loc
, CurBuf
);
225 return SMDiagnostic(*this, Loc
, BufferID
, LineAndCol
.first
,
226 LineAndCol
.second
-1, Kind
, Msg
.str(),
227 LineStr
, ColRanges
, FixIts
);
230 void SourceMgr::PrintMessage(raw_ostream
&OS
, const SMDiagnostic
&Diagnostic
,
231 bool ShowColors
) const {
232 // Report the message with the diagnostic handler if present.
234 DiagHandler(Diagnostic
, DiagContext
);
238 if (Diagnostic
.getLoc().isValid()) {
239 unsigned CurBuf
= FindBufferContainingLoc(Diagnostic
.getLoc());
240 assert(CurBuf
&& "Invalid or unspecified location!");
241 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
244 Diagnostic
.print(nullptr, OS
, ShowColors
);
247 void SourceMgr::PrintMessage(raw_ostream
&OS
, SMLoc Loc
,
248 SourceMgr::DiagKind Kind
,
249 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
250 ArrayRef
<SMFixIt
> FixIts
, bool ShowColors
) const {
251 PrintMessage(OS
, GetMessage(Loc
, Kind
, Msg
, Ranges
, FixIts
), ShowColors
);
254 void SourceMgr::PrintMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
255 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
256 ArrayRef
<SMFixIt
> FixIts
, bool ShowColors
) const {
257 PrintMessage(errs(), Loc
, Kind
, Msg
, Ranges
, FixIts
, ShowColors
);
260 //===----------------------------------------------------------------------===//
261 // SMDiagnostic Implementation
262 //===----------------------------------------------------------------------===//
264 SMDiagnostic::SMDiagnostic(const SourceMgr
&sm
, SMLoc L
, StringRef FN
,
265 int Line
, int Col
, SourceMgr::DiagKind Kind
,
266 StringRef Msg
, StringRef LineStr
,
267 ArrayRef
<std::pair
<unsigned,unsigned>> Ranges
,
268 ArrayRef
<SMFixIt
> Hints
)
269 : SM(&sm
), Loc(L
), Filename(FN
), LineNo(Line
), ColumnNo(Col
), Kind(Kind
),
270 Message(Msg
), LineContents(LineStr
), Ranges(Ranges
.vec()),
271 FixIts(Hints
.begin(), Hints
.end()) {
275 static void buildFixItLine(std::string
&CaretLine
, std::string
&FixItLine
,
276 ArrayRef
<SMFixIt
> FixIts
, ArrayRef
<char> SourceLine
){
280 const char *LineStart
= SourceLine
.begin();
281 const char *LineEnd
= SourceLine
.end();
283 size_t PrevHintEndCol
= 0;
285 for (ArrayRef
<SMFixIt
>::iterator I
= FixIts
.begin(), E
= FixIts
.end();
287 // If the fixit contains a newline or tab, ignore it.
288 if (I
->getText().find_first_of("\n\r\t") != StringRef::npos
)
291 SMRange R
= I
->getRange();
293 // If the line doesn't contain any part of the range, then ignore it.
294 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
297 // Translate from SMLoc to column.
298 // Ignore pieces of the range that go onto other lines.
299 // FIXME: Handle multibyte characters in the source line.
301 if (R
.Start
.getPointer() < LineStart
)
304 FirstCol
= R
.Start
.getPointer() - LineStart
;
306 // If we inserted a long previous hint, push this one forwards, and add
307 // an extra space to show that this is not part of the previous
308 // completion. This is sort of the best we can do when two hints appear
311 // Note that if this hint is located immediately after the previous
312 // hint, no space will be added, since the location is more important.
313 unsigned HintCol
= FirstCol
;
314 if (HintCol
< PrevHintEndCol
)
315 HintCol
= PrevHintEndCol
+ 1;
317 // FIXME: This assertion is intended to catch unintended use of multibyte
318 // characters in fixits. If we decide to do this, we'll have to track
319 // separate byte widths for the source and fixit lines.
320 assert((size_t)sys::locale::columnWidth(I
->getText()) ==
321 I
->getText().size());
323 // This relies on one byte per column in our fixit hints.
324 unsigned LastColumnModified
= HintCol
+ I
->getText().size();
325 if (LastColumnModified
> FixItLine
.size())
326 FixItLine
.resize(LastColumnModified
, ' ');
328 std::copy(I
->getText().begin(), I
->getText().end(),
329 FixItLine
.begin() + HintCol
);
331 PrevHintEndCol
= LastColumnModified
;
333 // For replacements, mark the removal range with '~'.
334 // FIXME: Handle multibyte characters in the source line.
336 if (R
.End
.getPointer() >= LineEnd
)
337 LastCol
= LineEnd
- LineStart
;
339 LastCol
= R
.End
.getPointer() - LineStart
;
341 std::fill(&CaretLine
[FirstCol
], &CaretLine
[LastCol
], '~');
345 static void printSourceLine(raw_ostream
&S
, StringRef LineContents
) {
346 // Print out the source line one character at a time, so we can expand tabs.
347 for (unsigned i
= 0, e
= LineContents
.size(), OutCol
= 0; i
!= e
; ++i
) {
348 size_t NextTab
= LineContents
.find('\t', i
);
349 // If there were no tabs left, print the rest, we are done.
350 if (NextTab
== StringRef::npos
) {
351 S
<< LineContents
.drop_front(i
);
355 // Otherwise, print from i to NextTab.
356 S
<< LineContents
.slice(i
, NextTab
);
357 OutCol
+= NextTab
- i
;
360 // If we have a tab, emit at least one space, then round up to 8 columns.
364 } while ((OutCol
% TabStop
) != 0);
369 static bool isNonASCII(char c
) {
373 void SMDiagnostic::print(const char *ProgName
, raw_ostream
&OS
,
374 bool ShowColors
, bool ShowKindLabel
) const {
376 WithColor
S(OS
, raw_ostream::SAVEDCOLOR
, true, false, !ShowColors
);
378 if (ProgName
&& ProgName
[0])
379 S
<< ProgName
<< ": ";
381 if (!Filename
.empty()) {
390 S
<< ':' << (ColumnNo
+ 1);
398 case SourceMgr::DK_Error
:
399 WithColor::error(OS
, "", !ShowColors
);
401 case SourceMgr::DK_Warning
:
402 WithColor::warning(OS
, "", !ShowColors
);
404 case SourceMgr::DK_Note
:
405 WithColor::note(OS
, "", !ShowColors
);
407 case SourceMgr::DK_Remark
:
408 WithColor::remark(OS
, "", !ShowColors
);
413 WithColor(OS
, raw_ostream::SAVEDCOLOR
, true, false, !ShowColors
)
416 if (LineNo
== -1 || ColumnNo
== -1)
419 // FIXME: If there are multibyte or multi-column characters in the source, all
420 // our ranges will be wrong. To do this properly, we'll need a byte-to-column
421 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
422 // expanding them later, and bail out rather than show incorrect ranges and
423 // misaligned fixits for any other odd characters.
424 if (find_if(LineContents
, isNonASCII
) != LineContents
.end()) {
425 printSourceLine(OS
, LineContents
);
428 size_t NumColumns
= LineContents
.size();
430 // Build the line with the caret and ranges.
431 std::string
CaretLine(NumColumns
+1, ' ');
433 // Expand any ranges.
434 for (unsigned r
= 0, e
= Ranges
.size(); r
!= e
; ++r
) {
435 std::pair
<unsigned, unsigned> R
= Ranges
[r
];
436 std::fill(&CaretLine
[R
.first
],
437 &CaretLine
[std::min((size_t)R
.second
, CaretLine
.size())],
442 // FIXME: Find the beginning of the line properly for multibyte characters.
443 std::string FixItInsertionLine
;
444 buildFixItLine(CaretLine
, FixItInsertionLine
, FixIts
,
445 makeArrayRef(Loc
.getPointer() - ColumnNo
,
446 LineContents
.size()));
448 // Finally, plop on the caret.
449 if (unsigned(ColumnNo
) <= NumColumns
)
450 CaretLine
[ColumnNo
] = '^';
452 CaretLine
[NumColumns
] = '^';
454 // ... and remove trailing whitespace so the output doesn't wrap for it. We
455 // know that the line isn't completely empty because it has the caret in it at
457 CaretLine
.erase(CaretLine
.find_last_not_of(' ')+1);
459 printSourceLine(OS
, LineContents
);
462 WithColor
S(OS
, raw_ostream::GREEN
, true, false, !ShowColors
);
464 // Print out the caret line, matching tabs in the source line.
465 for (unsigned i
= 0, e
= CaretLine
.size(), OutCol
= 0; i
!= e
; ++i
) {
466 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
472 // Okay, we have a tab. Insert the appropriate number of characters.
476 } while ((OutCol
% TabStop
) != 0);
481 // Print out the replacement line, matching tabs in the source line.
482 if (FixItInsertionLine
.empty())
485 for (size_t i
= 0, e
= FixItInsertionLine
.size(), OutCol
= 0; i
< e
; ++i
) {
486 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
487 OS
<< FixItInsertionLine
[i
];
492 // Okay, we have a tab. Insert the appropriate number of characters.
494 OS
<< FixItInsertionLine
[i
];
495 // FIXME: This is trying not to break up replacements, but then to re-sync
496 // with the tabs between replacements. This will fail, though, if two
497 // fix-it replacements are exactly adjacent, or if a fix-it contains a
498 // space. Really we should be precomputing column widths, which we'll
499 // need anyway for multibyte chars.
500 if (FixItInsertionLine
[i
] != ' ')
503 } while (((OutCol
% TabStop
) != 0) && i
!= e
);