1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SourceMgr class. This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/Locale.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/SMLoc.h"
27 #include "llvm/Support/WithColor.h"
28 #include "llvm/Support/raw_ostream.h"
39 static const size_t TabStop
= 8;
41 unsigned SourceMgr::AddIncludeFile(const std::string
&Filename
,
43 std::string
&IncludedFile
) {
44 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> NewBufOrErr
=
45 OpenIncludeFile(Filename
, IncludedFile
);
49 return AddNewSourceBuffer(std::move(*NewBufOrErr
), IncludeLoc
);
52 ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
53 SourceMgr::OpenIncludeFile(const std::string
&Filename
,
54 std::string
&IncludedFile
) {
55 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> NewBufOrErr
=
56 MemoryBuffer::getFile(Filename
);
58 SmallString
<64> Buffer(Filename
);
59 // If the file didn't exist directly, see if it's in an include path.
60 for (unsigned i
= 0, e
= IncludeDirectories
.size(); i
!= e
&& !NewBufOrErr
;
62 Buffer
= IncludeDirectories
[i
];
63 sys::path::append(Buffer
, Filename
);
64 NewBufOrErr
= MemoryBuffer::getFile(Buffer
);
68 IncludedFile
= static_cast<std::string
>(Buffer
);
73 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc
) const {
74 for (unsigned i
= 0, e
= Buffers
.size(); i
!= e
; ++i
)
75 if (Loc
.getPointer() >= Buffers
[i
].Buffer
->getBufferStart() &&
76 // Use <= here so that a pointer to the null at the end of the buffer
77 // is included as part of the buffer.
78 Loc
.getPointer() <= Buffers
[i
].Buffer
->getBufferEnd())
84 static std::vector
<T
> &GetOrCreateOffsetCache(void *&OffsetCache
,
85 MemoryBuffer
*Buffer
) {
87 return *static_cast<std::vector
<T
> *>(OffsetCache
);
89 // Lazily fill in the offset cache.
90 auto *Offsets
= new std::vector
<T
>();
91 size_t Sz
= Buffer
->getBufferSize();
92 assert(Sz
<= std::numeric_limits
<T
>::max());
93 StringRef S
= Buffer
->getBuffer();
94 for (size_t N
= 0; N
< Sz
; ++N
) {
96 Offsets
->push_back(static_cast<T
>(N
));
99 OffsetCache
= Offsets
;
103 template <typename T
>
104 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr
) const {
105 std::vector
<T
> &Offsets
=
106 GetOrCreateOffsetCache
<T
>(OffsetCache
, Buffer
.get());
108 const char *BufStart
= Buffer
->getBufferStart();
109 assert(Ptr
>= BufStart
&& Ptr
<= Buffer
->getBufferEnd());
110 ptrdiff_t PtrDiff
= Ptr
- BufStart
;
111 assert(PtrDiff
>= 0 &&
112 static_cast<size_t>(PtrDiff
) <= std::numeric_limits
<T
>::max());
113 T PtrOffset
= static_cast<T
>(PtrDiff
);
115 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
117 return llvm::lower_bound(Offsets
, PtrOffset
) - Offsets
.begin() + 1;
120 /// Look up a given \p Ptr in the buffer, determining which line it came
122 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr
) const {
123 size_t Sz
= Buffer
->getBufferSize();
124 if (Sz
<= std::numeric_limits
<uint8_t>::max())
125 return getLineNumberSpecialized
<uint8_t>(Ptr
);
126 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
127 return getLineNumberSpecialized
<uint16_t>(Ptr
);
128 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
129 return getLineNumberSpecialized
<uint32_t>(Ptr
);
131 return getLineNumberSpecialized
<uint64_t>(Ptr
);
134 template <typename T
>
135 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
136 unsigned LineNo
) const {
137 std::vector
<T
> &Offsets
=
138 GetOrCreateOffsetCache
<T
>(OffsetCache
, Buffer
.get());
140 // We start counting line and column numbers from 1.
144 const char *BufStart
= Buffer
->getBufferStart();
146 // The offset cache contains the location of the \n for the specified line,
147 // we want the start of the line. As such, we look for the previous entry.
150 if (LineNo
> Offsets
.size())
152 return BufStart
+ Offsets
[LineNo
- 1] + 1;
155 /// Return a pointer to the first character of the specified line number or
156 /// null if the line number is invalid.
158 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo
) const {
159 size_t Sz
= Buffer
->getBufferSize();
160 if (Sz
<= std::numeric_limits
<uint8_t>::max())
161 return getPointerForLineNumberSpecialized
<uint8_t>(LineNo
);
162 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
163 return getPointerForLineNumberSpecialized
<uint16_t>(LineNo
);
164 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
165 return getPointerForLineNumberSpecialized
<uint32_t>(LineNo
);
167 return getPointerForLineNumberSpecialized
<uint64_t>(LineNo
);
170 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer
&&Other
)
171 : Buffer(std::move(Other
.Buffer
)), OffsetCache(Other
.OffsetCache
),
172 IncludeLoc(Other
.IncludeLoc
) {
173 Other
.OffsetCache
= nullptr;
176 SourceMgr::SrcBuffer::~SrcBuffer() {
178 size_t Sz
= Buffer
->getBufferSize();
179 if (Sz
<= std::numeric_limits
<uint8_t>::max())
180 delete static_cast<std::vector
<uint8_t> *>(OffsetCache
);
181 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
182 delete static_cast<std::vector
<uint16_t> *>(OffsetCache
);
183 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
184 delete static_cast<std::vector
<uint32_t> *>(OffsetCache
);
186 delete static_cast<std::vector
<uint64_t> *>(OffsetCache
);
187 OffsetCache
= nullptr;
191 std::pair
<unsigned, unsigned>
192 SourceMgr::getLineAndColumn(SMLoc Loc
, unsigned BufferID
) const {
194 BufferID
= FindBufferContainingLoc(Loc
);
195 assert(BufferID
&& "Invalid location!");
197 auto &SB
= getBufferInfo(BufferID
);
198 const char *Ptr
= Loc
.getPointer();
200 unsigned LineNo
= SB
.getLineNumber(Ptr
);
201 const char *BufStart
= SB
.Buffer
->getBufferStart();
202 size_t NewlineOffs
= StringRef(BufStart
, Ptr
- BufStart
).find_last_of("\n\r");
203 if (NewlineOffs
== StringRef::npos
)
204 NewlineOffs
= ~(size_t)0;
205 return std::make_pair(LineNo
, Ptr
- BufStart
- NewlineOffs
);
208 // FIXME: Note that the formatting of source locations is spread between
209 // multiple functions, some in SourceMgr and some in SMDiagnostic. A better
210 // solution would be a general-purpose source location formatter
211 // in one of those two classes, or possibly in SMLoc.
213 /// Get a string with the source location formatted in the standard
214 /// style, but without the line offset. If \p IncludePath is true, the path
215 /// is included. If false, only the file name and extension are included.
216 std::string
SourceMgr::getFormattedLocationNoOffset(SMLoc Loc
,
217 bool IncludePath
) const {
218 auto BufferID
= FindBufferContainingLoc(Loc
);
219 assert(BufferID
&& "Invalid location!");
220 auto FileSpec
= getBufferInfo(BufferID
).Buffer
->getBufferIdentifier();
223 return FileSpec
.str() + ":" + std::to_string(FindLineNumber(Loc
, BufferID
));
225 auto I
= FileSpec
.find_last_of("/\\");
226 I
= (I
== FileSpec
.size()) ? 0 : (I
+ 1);
227 return FileSpec
.substr(I
).str() + ":" +
228 std::to_string(FindLineNumber(Loc
, BufferID
));
232 /// Given a line and column number in a mapped buffer, turn it into an SMLoc.
233 /// This will return a null SMLoc if the line/column location is invalid.
234 SMLoc
SourceMgr::FindLocForLineAndColumn(unsigned BufferID
, unsigned LineNo
,
236 auto &SB
= getBufferInfo(BufferID
);
237 const char *Ptr
= SB
.getPointerForLineNumber(LineNo
);
241 // We start counting line and column numbers from 1.
245 // If we have a column number, validate it.
247 // Make sure the location is within the current line.
248 if (Ptr
+ ColNo
> SB
.Buffer
->getBufferEnd())
251 // Make sure there is no newline in the way.
252 if (StringRef(Ptr
, ColNo
).find_first_of("\n\r") != StringRef::npos
)
258 return SMLoc::getFromPointer(Ptr
);
261 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc
, raw_ostream
&OS
) const {
262 if (IncludeLoc
== SMLoc())
263 return; // Top of stack.
265 unsigned CurBuf
= FindBufferContainingLoc(IncludeLoc
);
266 assert(CurBuf
&& "Invalid or unspecified location!");
268 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
270 OS
<< "Included from " << getBufferInfo(CurBuf
).Buffer
->getBufferIdentifier()
271 << ":" << FindLineNumber(IncludeLoc
, CurBuf
) << ":\n";
274 SMDiagnostic
SourceMgr::GetMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
275 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
276 ArrayRef
<SMFixIt
> FixIts
) const {
277 // First thing to do: find the current buffer containing the specified
278 // location to pull out the source line.
279 SmallVector
<std::pair
<unsigned, unsigned>, 4> ColRanges
;
280 std::pair
<unsigned, unsigned> LineAndCol
;
281 StringRef BufferID
= "<unknown>";
285 unsigned CurBuf
= FindBufferContainingLoc(Loc
);
286 assert(CurBuf
&& "Invalid or unspecified location!");
288 const MemoryBuffer
*CurMB
= getMemoryBuffer(CurBuf
);
289 BufferID
= CurMB
->getBufferIdentifier();
291 // Scan backward to find the start of the line.
292 const char *LineStart
= Loc
.getPointer();
293 const char *BufStart
= CurMB
->getBufferStart();
294 while (LineStart
!= BufStart
&& LineStart
[-1] != '\n' &&
295 LineStart
[-1] != '\r')
298 // Get the end of the line.
299 const char *LineEnd
= Loc
.getPointer();
300 const char *BufEnd
= CurMB
->getBufferEnd();
301 while (LineEnd
!= BufEnd
&& LineEnd
[0] != '\n' && LineEnd
[0] != '\r')
303 LineStr
= StringRef(LineStart
, LineEnd
- LineStart
);
305 // Convert any ranges to column ranges that only intersect the line of the
307 for (SMRange R
: Ranges
) {
311 // If the line doesn't contain any part of the range, then ignore it.
312 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
315 // Ignore pieces of the range that go onto other lines.
316 if (R
.Start
.getPointer() < LineStart
)
317 R
.Start
= SMLoc::getFromPointer(LineStart
);
318 if (R
.End
.getPointer() > LineEnd
)
319 R
.End
= SMLoc::getFromPointer(LineEnd
);
321 // Translate from SMLoc ranges to column ranges.
322 // FIXME: Handle multibyte characters.
323 ColRanges
.push_back(std::make_pair(R
.Start
.getPointer() - LineStart
,
324 R
.End
.getPointer() - LineStart
));
327 LineAndCol
= getLineAndColumn(Loc
, CurBuf
);
330 return SMDiagnostic(*this, Loc
, BufferID
, LineAndCol
.first
,
331 LineAndCol
.second
- 1, Kind
, Msg
.str(), LineStr
,
335 void SourceMgr::PrintMessage(raw_ostream
&OS
, const SMDiagnostic
&Diagnostic
,
336 bool ShowColors
) const {
337 // Report the message with the diagnostic handler if present.
339 DiagHandler(Diagnostic
, DiagContext
);
343 if (Diagnostic
.getLoc().isValid()) {
344 unsigned CurBuf
= FindBufferContainingLoc(Diagnostic
.getLoc());
345 assert(CurBuf
&& "Invalid or unspecified location!");
346 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
349 Diagnostic
.print(nullptr, OS
, ShowColors
);
352 void SourceMgr::PrintMessage(raw_ostream
&OS
, SMLoc Loc
,
353 SourceMgr::DiagKind Kind
, const Twine
&Msg
,
354 ArrayRef
<SMRange
> Ranges
, ArrayRef
<SMFixIt
> FixIts
,
355 bool ShowColors
) const {
356 PrintMessage(OS
, GetMessage(Loc
, Kind
, Msg
, Ranges
, FixIts
), ShowColors
);
359 void SourceMgr::PrintMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
360 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
361 ArrayRef
<SMFixIt
> FixIts
, bool ShowColors
) const {
362 PrintMessage(errs(), Loc
, Kind
, Msg
, Ranges
, FixIts
, ShowColors
);
365 //===----------------------------------------------------------------------===//
366 // SMFixIt Implementation
367 //===----------------------------------------------------------------------===//
369 SMFixIt::SMFixIt(SMRange R
, const Twine
&Replacement
)
370 : Range(R
), Text(Replacement
.str()) {
374 //===----------------------------------------------------------------------===//
375 // SMDiagnostic Implementation
376 //===----------------------------------------------------------------------===//
378 SMDiagnostic::SMDiagnostic(const SourceMgr
&sm
, SMLoc L
, StringRef FN
, int Line
,
379 int Col
, SourceMgr::DiagKind Kind
, StringRef Msg
,
381 ArrayRef
<std::pair
<unsigned, unsigned>> Ranges
,
382 ArrayRef
<SMFixIt
> Hints
)
383 : SM(&sm
), Loc(L
), Filename(std::string(FN
)), LineNo(Line
), ColumnNo(Col
),
384 Kind(Kind
), Message(Msg
), LineContents(LineStr
), Ranges(Ranges
.vec()),
385 FixIts(Hints
.begin(), Hints
.end()) {
389 static void buildFixItLine(std::string
&CaretLine
, std::string
&FixItLine
,
390 ArrayRef
<SMFixIt
> FixIts
,
391 ArrayRef
<char> SourceLine
) {
395 const char *LineStart
= SourceLine
.begin();
396 const char *LineEnd
= SourceLine
.end();
398 size_t PrevHintEndCol
= 0;
400 for (const llvm::SMFixIt
&Fixit
: FixIts
) {
401 // If the fixit contains a newline or tab, ignore it.
402 if (Fixit
.getText().find_first_of("\n\r\t") != StringRef::npos
)
405 SMRange R
= Fixit
.getRange();
407 // If the line doesn't contain any part of the range, then ignore it.
408 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
411 // Translate from SMLoc to column.
412 // Ignore pieces of the range that go onto other lines.
413 // FIXME: Handle multibyte characters in the source line.
415 if (R
.Start
.getPointer() < LineStart
)
418 FirstCol
= R
.Start
.getPointer() - LineStart
;
420 // If we inserted a long previous hint, push this one forwards, and add
421 // an extra space to show that this is not part of the previous
422 // completion. This is sort of the best we can do when two hints appear
425 // Note that if this hint is located immediately after the previous
426 // hint, no space will be added, since the location is more important.
427 unsigned HintCol
= FirstCol
;
428 if (HintCol
< PrevHintEndCol
)
429 HintCol
= PrevHintEndCol
+ 1;
431 // FIXME: This assertion is intended to catch unintended use of multibyte
432 // characters in fixits. If we decide to do this, we'll have to track
433 // separate byte widths for the source and fixit lines.
434 assert((size_t)sys::locale::columnWidth(Fixit
.getText()) ==
435 Fixit
.getText().size());
437 // This relies on one byte per column in our fixit hints.
438 unsigned LastColumnModified
= HintCol
+ Fixit
.getText().size();
439 if (LastColumnModified
> FixItLine
.size())
440 FixItLine
.resize(LastColumnModified
, ' ');
442 llvm::copy(Fixit
.getText(), FixItLine
.begin() + HintCol
);
444 PrevHintEndCol
= LastColumnModified
;
446 // For replacements, mark the removal range with '~'.
447 // FIXME: Handle multibyte characters in the source line.
449 if (R
.End
.getPointer() >= LineEnd
)
450 LastCol
= LineEnd
- LineStart
;
452 LastCol
= R
.End
.getPointer() - LineStart
;
454 std::fill(&CaretLine
[FirstCol
], &CaretLine
[LastCol
], '~');
458 static void printSourceLine(raw_ostream
&S
, StringRef LineContents
) {
459 // Print out the source line one character at a time, so we can expand tabs.
460 for (unsigned i
= 0, e
= LineContents
.size(), OutCol
= 0; i
!= e
; ++i
) {
461 size_t NextTab
= LineContents
.find('\t', i
);
462 // If there were no tabs left, print the rest, we are done.
463 if (NextTab
== StringRef::npos
) {
464 S
<< LineContents
.drop_front(i
);
468 // Otherwise, print from i to NextTab.
469 S
<< LineContents
.slice(i
, NextTab
);
470 OutCol
+= NextTab
- i
;
473 // If we have a tab, emit at least one space, then round up to 8 columns.
477 } while ((OutCol
% TabStop
) != 0);
482 static bool isNonASCII(char c
) { return c
& 0x80; }
484 void SMDiagnostic::print(const char *ProgName
, raw_ostream
&OS
, bool ShowColors
,
485 bool ShowKindLabel
) const {
486 ColorMode Mode
= ShowColors
? ColorMode::Auto
: ColorMode::Disable
;
489 WithColor
S(OS
, raw_ostream::SAVEDCOLOR
, true, false, Mode
);
491 if (ProgName
&& ProgName
[0])
492 S
<< ProgName
<< ": ";
494 if (!Filename
.empty()) {
503 S
<< ':' << (ColumnNo
+ 1);
511 case SourceMgr::DK_Error
:
512 WithColor::error(OS
, "", !ShowColors
);
514 case SourceMgr::DK_Warning
:
515 WithColor::warning(OS
, "", !ShowColors
);
517 case SourceMgr::DK_Note
:
518 WithColor::note(OS
, "", !ShowColors
);
520 case SourceMgr::DK_Remark
:
521 WithColor::remark(OS
, "", !ShowColors
);
526 WithColor(OS
, raw_ostream::SAVEDCOLOR
, true, false, Mode
) << Message
<< '\n';
528 if (LineNo
== -1 || ColumnNo
== -1)
531 // FIXME: If there are multibyte or multi-column characters in the source, all
532 // our ranges will be wrong. To do this properly, we'll need a byte-to-column
533 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
534 // expanding them later, and bail out rather than show incorrect ranges and
535 // misaligned fixits for any other odd characters.
536 if (any_of(LineContents
, isNonASCII
)) {
537 printSourceLine(OS
, LineContents
);
540 size_t NumColumns
= LineContents
.size();
542 // Build the line with the caret and ranges.
543 std::string
CaretLine(NumColumns
+ 1, ' ');
545 // Expand any ranges.
546 for (const std::pair
<unsigned, unsigned> &R
: Ranges
)
547 std::fill(&CaretLine
[R
.first
],
548 &CaretLine
[std::min((size_t)R
.second
, CaretLine
.size())], '~');
551 // FIXME: Find the beginning of the line properly for multibyte characters.
552 std::string FixItInsertionLine
;
553 buildFixItLine(CaretLine
, FixItInsertionLine
, FixIts
,
554 ArrayRef(Loc
.getPointer() - ColumnNo
, LineContents
.size()));
556 // Finally, plop on the caret.
557 if (unsigned(ColumnNo
) <= NumColumns
)
558 CaretLine
[ColumnNo
] = '^';
560 CaretLine
[NumColumns
] = '^';
562 // ... and remove trailing whitespace so the output doesn't wrap for it. We
563 // know that the line isn't completely empty because it has the caret in it at
565 CaretLine
.erase(CaretLine
.find_last_not_of(' ') + 1);
567 printSourceLine(OS
, LineContents
);
570 ColorMode Mode
= ShowColors
? ColorMode::Auto
: ColorMode::Disable
;
571 WithColor
S(OS
, raw_ostream::GREEN
, true, false, Mode
);
573 // Print out the caret line, matching tabs in the source line.
574 for (unsigned i
= 0, e
= CaretLine
.size(), OutCol
= 0; i
!= e
; ++i
) {
575 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
581 // Okay, we have a tab. Insert the appropriate number of characters.
585 } while ((OutCol
% TabStop
) != 0);
590 // Print out the replacement line, matching tabs in the source line.
591 if (FixItInsertionLine
.empty())
594 for (size_t i
= 0, e
= FixItInsertionLine
.size(), OutCol
= 0; i
< e
; ++i
) {
595 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
596 OS
<< FixItInsertionLine
[i
];
601 // Okay, we have a tab. Insert the appropriate number of characters.
603 OS
<< FixItInsertionLine
[i
];
604 // FIXME: This is trying not to break up replacements, but then to re-sync
605 // with the tabs between replacements. This will fail, though, if two
606 // fix-it replacements are exactly adjacent, or if a fix-it contains a
607 // space. Really we should be precomputing column widths, which we'll
608 // need anyway for multibyte chars.
609 if (FixItInsertionLine
[i
] != ' ')
612 } while (((OutCol
% TabStop
) != 0) && i
!= e
);