1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SourceMgr class. This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
38 static const size_t TabStop
= 8;
40 unsigned SourceMgr::AddIncludeFile(const std::string
&Filename
,
42 std::string
&IncludedFile
) {
43 IncludedFile
= Filename
;
44 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> NewBufOrErr
=
45 MemoryBuffer::getFile(IncludedFile
);
47 // If the file didn't exist directly, see if it's in an include path.
48 for (unsigned i
= 0, e
= IncludeDirectories
.size(); i
!= e
&& !NewBufOrErr
;
51 IncludeDirectories
[i
] + sys::path::get_separator().data() + Filename
;
52 NewBufOrErr
= MemoryBuffer::getFile(IncludedFile
);
58 return AddNewSourceBuffer(std::move(*NewBufOrErr
), IncludeLoc
);
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc
) const {
62 for (unsigned i
= 0, e
= Buffers
.size(); i
!= e
; ++i
)
63 if (Loc
.getPointer() >= Buffers
[i
].Buffer
->getBufferStart() &&
64 // Use <= here so that a pointer to the null at the end of the buffer
65 // is included as part of the buffer.
66 Loc
.getPointer() <= Buffers
[i
].Buffer
->getBufferEnd())
72 static std::vector
<T
> &GetOrCreateOffsetCache(void *&OffsetCache
,
73 MemoryBuffer
*Buffer
) {
75 return *static_cast<std::vector
<T
> *>(OffsetCache
);
77 // Lazily fill in the offset cache.
78 auto *Offsets
= new std::vector
<T
>();
79 size_t Sz
= Buffer
->getBufferSize();
80 assert(Sz
<= std::numeric_limits
<T
>::max());
81 StringRef S
= Buffer
->getBuffer();
82 for (size_t N
= 0; N
< Sz
; ++N
) {
84 Offsets
->push_back(static_cast<T
>(N
));
87 OffsetCache
= Offsets
;
92 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr
) const {
93 std::vector
<T
> &Offsets
=
94 GetOrCreateOffsetCache
<T
>(OffsetCache
, Buffer
.get());
96 const char *BufStart
= Buffer
->getBufferStart();
97 assert(Ptr
>= BufStart
&& Ptr
<= Buffer
->getBufferEnd());
98 ptrdiff_t PtrDiff
= Ptr
- BufStart
;
99 assert(PtrDiff
>= 0 &&
100 static_cast<size_t>(PtrDiff
) <= std::numeric_limits
<T
>::max());
101 T PtrOffset
= static_cast<T
>(PtrDiff
);
103 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
105 return llvm::lower_bound(Offsets
, PtrOffset
) - Offsets
.begin() + 1;
108 /// Look up a given \p Ptr in in the buffer, determining which line it came
110 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr
) const {
111 size_t Sz
= Buffer
->getBufferSize();
112 if (Sz
<= std::numeric_limits
<uint8_t>::max())
113 return getLineNumberSpecialized
<uint8_t>(Ptr
);
114 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
115 return getLineNumberSpecialized
<uint16_t>(Ptr
);
116 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
117 return getLineNumberSpecialized
<uint32_t>(Ptr
);
119 return getLineNumberSpecialized
<uint64_t>(Ptr
);
122 template <typename T
>
123 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
124 unsigned LineNo
) const {
125 std::vector
<T
> &Offsets
=
126 GetOrCreateOffsetCache
<T
>(OffsetCache
, Buffer
.get());
128 // We start counting line and column numbers from 1.
132 const char *BufStart
= Buffer
->getBufferStart();
134 // The offset cache contains the location of the \n for the specified line,
135 // we want the start of the line. As such, we look for the previous entry.
138 if (LineNo
> Offsets
.size())
140 return BufStart
+ Offsets
[LineNo
- 1] + 1;
143 /// Return a pointer to the first character of the specified line number or
144 /// null if the line number is invalid.
146 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo
) const {
147 size_t Sz
= Buffer
->getBufferSize();
148 if (Sz
<= std::numeric_limits
<uint8_t>::max())
149 return getPointerForLineNumberSpecialized
<uint8_t>(LineNo
);
150 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
151 return getPointerForLineNumberSpecialized
<uint16_t>(LineNo
);
152 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
153 return getPointerForLineNumberSpecialized
<uint32_t>(LineNo
);
155 return getPointerForLineNumberSpecialized
<uint64_t>(LineNo
);
158 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer
&&Other
)
159 : Buffer(std::move(Other
.Buffer
)), OffsetCache(Other
.OffsetCache
),
160 IncludeLoc(Other
.IncludeLoc
) {
161 Other
.OffsetCache
= nullptr;
164 SourceMgr::SrcBuffer::~SrcBuffer() {
166 size_t Sz
= Buffer
->getBufferSize();
167 if (Sz
<= std::numeric_limits
<uint8_t>::max())
168 delete static_cast<std::vector
<uint8_t> *>(OffsetCache
);
169 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
170 delete static_cast<std::vector
<uint16_t> *>(OffsetCache
);
171 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
172 delete static_cast<std::vector
<uint32_t> *>(OffsetCache
);
174 delete static_cast<std::vector
<uint64_t> *>(OffsetCache
);
175 OffsetCache
= nullptr;
179 std::pair
<unsigned, unsigned>
180 SourceMgr::getLineAndColumn(SMLoc Loc
, unsigned BufferID
) const {
182 BufferID
= FindBufferContainingLoc(Loc
);
183 assert(BufferID
&& "Invalid location!");
185 auto &SB
= getBufferInfo(BufferID
);
186 const char *Ptr
= Loc
.getPointer();
188 unsigned LineNo
= SB
.getLineNumber(Ptr
);
189 const char *BufStart
= SB
.Buffer
->getBufferStart();
190 size_t NewlineOffs
= StringRef(BufStart
, Ptr
- BufStart
).find_last_of("\n\r");
191 if (NewlineOffs
== StringRef::npos
)
192 NewlineOffs
= ~(size_t)0;
193 return std::make_pair(LineNo
, Ptr
- BufStart
- NewlineOffs
);
196 // FIXME: Note that the formatting of source locations is spread between
197 // multiple functions, some in SourceMgr and some in SMDiagnostic. A better
198 // solution would be a general-purpose source location formatter
199 // in one of those two classes, or possibly in SMLoc.
201 /// Get a string with the source location formatted in the standard
202 /// style, but without the line offset. If \p IncludePath is true, the path
203 /// is included. If false, only the file name and extension are included.
204 std::string
SourceMgr::getFormattedLocationNoOffset(SMLoc Loc
,
205 bool IncludePath
) const {
206 auto BufferID
= FindBufferContainingLoc(Loc
);
207 assert(BufferID
&& "Invalid location!");
208 auto FileSpec
= getBufferInfo(BufferID
).Buffer
->getBufferIdentifier();
211 return FileSpec
.str() + ":" + std::to_string(FindLineNumber(Loc
, BufferID
));
213 auto I
= FileSpec
.find_last_of("/\\");
214 I
= (I
== FileSpec
.size()) ? 0 : (I
+ 1);
215 return FileSpec
.substr(I
).str() + ":" +
216 std::to_string(FindLineNumber(Loc
, BufferID
));
220 /// Given a line and column number in a mapped buffer, turn it into an SMLoc.
221 /// This will return a null SMLoc if the line/column location is invalid.
222 SMLoc
SourceMgr::FindLocForLineAndColumn(unsigned BufferID
, unsigned LineNo
,
224 auto &SB
= getBufferInfo(BufferID
);
225 const char *Ptr
= SB
.getPointerForLineNumber(LineNo
);
229 // We start counting line and column numbers from 1.
233 // If we have a column number, validate it.
235 // Make sure the location is within the current line.
236 if (Ptr
+ ColNo
> SB
.Buffer
->getBufferEnd())
239 // Make sure there is no newline in the way.
240 if (StringRef(Ptr
, ColNo
).find_first_of("\n\r") != StringRef::npos
)
246 return SMLoc::getFromPointer(Ptr
);
249 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc
, raw_ostream
&OS
) const {
250 if (IncludeLoc
== SMLoc())
251 return; // Top of stack.
253 unsigned CurBuf
= FindBufferContainingLoc(IncludeLoc
);
254 assert(CurBuf
&& "Invalid or unspecified location!");
256 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
258 OS
<< "Included from " << getBufferInfo(CurBuf
).Buffer
->getBufferIdentifier()
259 << ":" << FindLineNumber(IncludeLoc
, CurBuf
) << ":\n";
262 SMDiagnostic
SourceMgr::GetMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
263 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
264 ArrayRef
<SMFixIt
> FixIts
) const {
265 // First thing to do: find the current buffer containing the specified
266 // location to pull out the source line.
267 SmallVector
<std::pair
<unsigned, unsigned>, 4> ColRanges
;
268 std::pair
<unsigned, unsigned> LineAndCol
;
269 StringRef BufferID
= "<unknown>";
273 unsigned CurBuf
= FindBufferContainingLoc(Loc
);
274 assert(CurBuf
&& "Invalid or unspecified location!");
276 const MemoryBuffer
*CurMB
= getMemoryBuffer(CurBuf
);
277 BufferID
= CurMB
->getBufferIdentifier();
279 // Scan backward to find the start of the line.
280 const char *LineStart
= Loc
.getPointer();
281 const char *BufStart
= CurMB
->getBufferStart();
282 while (LineStart
!= BufStart
&& LineStart
[-1] != '\n' &&
283 LineStart
[-1] != '\r')
286 // Get the end of the line.
287 const char *LineEnd
= Loc
.getPointer();
288 const char *BufEnd
= CurMB
->getBufferEnd();
289 while (LineEnd
!= BufEnd
&& LineEnd
[0] != '\n' && LineEnd
[0] != '\r')
291 LineStr
= StringRef(LineStart
, LineEnd
- LineStart
);
293 // Convert any ranges to column ranges that only intersect the line of the
295 for (unsigned i
= 0, e
= Ranges
.size(); i
!= e
; ++i
) {
296 SMRange R
= Ranges
[i
];
300 // If the line doesn't contain any part of the range, then ignore it.
301 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
304 // Ignore pieces of the range that go onto other lines.
305 if (R
.Start
.getPointer() < LineStart
)
306 R
.Start
= SMLoc::getFromPointer(LineStart
);
307 if (R
.End
.getPointer() > LineEnd
)
308 R
.End
= SMLoc::getFromPointer(LineEnd
);
310 // Translate from SMLoc ranges to column ranges.
311 // FIXME: Handle multibyte characters.
312 ColRanges
.push_back(std::make_pair(R
.Start
.getPointer() - LineStart
,
313 R
.End
.getPointer() - LineStart
));
316 LineAndCol
= getLineAndColumn(Loc
, CurBuf
);
319 return SMDiagnostic(*this, Loc
, BufferID
, LineAndCol
.first
,
320 LineAndCol
.second
- 1, Kind
, Msg
.str(), LineStr
,
324 void SourceMgr::PrintMessage(raw_ostream
&OS
, const SMDiagnostic
&Diagnostic
,
325 bool ShowColors
) const {
326 // Report the message with the diagnostic handler if present.
328 DiagHandler(Diagnostic
, DiagContext
);
332 if (Diagnostic
.getLoc().isValid()) {
333 unsigned CurBuf
= FindBufferContainingLoc(Diagnostic
.getLoc());
334 assert(CurBuf
&& "Invalid or unspecified location!");
335 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
338 Diagnostic
.print(nullptr, OS
, ShowColors
);
341 void SourceMgr::PrintMessage(raw_ostream
&OS
, SMLoc Loc
,
342 SourceMgr::DiagKind Kind
, const Twine
&Msg
,
343 ArrayRef
<SMRange
> Ranges
, ArrayRef
<SMFixIt
> FixIts
,
344 bool ShowColors
) const {
345 PrintMessage(OS
, GetMessage(Loc
, Kind
, Msg
, Ranges
, FixIts
), ShowColors
);
348 void SourceMgr::PrintMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
349 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
350 ArrayRef
<SMFixIt
> FixIts
, bool ShowColors
) const {
351 PrintMessage(errs(), Loc
, Kind
, Msg
, Ranges
, FixIts
, ShowColors
);
354 //===----------------------------------------------------------------------===//
355 // SMFixIt Implementation
356 //===----------------------------------------------------------------------===//
358 SMFixIt::SMFixIt(SMRange R
, const Twine
&Replacement
)
359 : Range(R
), Text(Replacement
.str()) {
363 //===----------------------------------------------------------------------===//
364 // SMDiagnostic Implementation
365 //===----------------------------------------------------------------------===//
367 SMDiagnostic::SMDiagnostic(const SourceMgr
&sm
, SMLoc L
, StringRef FN
, int Line
,
368 int Col
, SourceMgr::DiagKind Kind
, StringRef Msg
,
370 ArrayRef
<std::pair
<unsigned, unsigned>> Ranges
,
371 ArrayRef
<SMFixIt
> Hints
)
372 : SM(&sm
), Loc(L
), Filename(std::string(FN
)), LineNo(Line
), ColumnNo(Col
),
373 Kind(Kind
), Message(Msg
), LineContents(LineStr
), Ranges(Ranges
.vec()),
374 FixIts(Hints
.begin(), Hints
.end()) {
378 static void buildFixItLine(std::string
&CaretLine
, std::string
&FixItLine
,
379 ArrayRef
<SMFixIt
> FixIts
,
380 ArrayRef
<char> SourceLine
) {
384 const char *LineStart
= SourceLine
.begin();
385 const char *LineEnd
= SourceLine
.end();
387 size_t PrevHintEndCol
= 0;
389 for (const llvm::SMFixIt
&Fixit
: FixIts
) {
390 // If the fixit contains a newline or tab, ignore it.
391 if (Fixit
.getText().find_first_of("\n\r\t") != StringRef::npos
)
394 SMRange R
= Fixit
.getRange();
396 // If the line doesn't contain any part of the range, then ignore it.
397 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
400 // Translate from SMLoc to column.
401 // Ignore pieces of the range that go onto other lines.
402 // FIXME: Handle multibyte characters in the source line.
404 if (R
.Start
.getPointer() < LineStart
)
407 FirstCol
= R
.Start
.getPointer() - LineStart
;
409 // If we inserted a long previous hint, push this one forwards, and add
410 // an extra space to show that this is not part of the previous
411 // completion. This is sort of the best we can do when two hints appear
414 // Note that if this hint is located immediately after the previous
415 // hint, no space will be added, since the location is more important.
416 unsigned HintCol
= FirstCol
;
417 if (HintCol
< PrevHintEndCol
)
418 HintCol
= PrevHintEndCol
+ 1;
420 // FIXME: This assertion is intended to catch unintended use of multibyte
421 // characters in fixits. If we decide to do this, we'll have to track
422 // separate byte widths for the source and fixit lines.
423 assert((size_t)sys::locale::columnWidth(Fixit
.getText()) ==
424 Fixit
.getText().size());
426 // This relies on one byte per column in our fixit hints.
427 unsigned LastColumnModified
= HintCol
+ Fixit
.getText().size();
428 if (LastColumnModified
> FixItLine
.size())
429 FixItLine
.resize(LastColumnModified
, ' ');
431 llvm::copy(Fixit
.getText(), FixItLine
.begin() + HintCol
);
433 PrevHintEndCol
= LastColumnModified
;
435 // For replacements, mark the removal range with '~'.
436 // FIXME: Handle multibyte characters in the source line.
438 if (R
.End
.getPointer() >= LineEnd
)
439 LastCol
= LineEnd
- LineStart
;
441 LastCol
= R
.End
.getPointer() - LineStart
;
443 std::fill(&CaretLine
[FirstCol
], &CaretLine
[LastCol
], '~');
447 static void printSourceLine(raw_ostream
&S
, StringRef LineContents
) {
448 // Print out the source line one character at a time, so we can expand tabs.
449 for (unsigned i
= 0, e
= LineContents
.size(), OutCol
= 0; i
!= e
; ++i
) {
450 size_t NextTab
= LineContents
.find('\t', i
);
451 // If there were no tabs left, print the rest, we are done.
452 if (NextTab
== StringRef::npos
) {
453 S
<< LineContents
.drop_front(i
);
457 // Otherwise, print from i to NextTab.
458 S
<< LineContents
.slice(i
, NextTab
);
459 OutCol
+= NextTab
- i
;
462 // If we have a tab, emit at least one space, then round up to 8 columns.
466 } while ((OutCol
% TabStop
) != 0);
471 static bool isNonASCII(char c
) { return c
& 0x80; }
473 void SMDiagnostic::print(const char *ProgName
, raw_ostream
&OS
, bool ShowColors
,
474 bool ShowKindLabel
) const {
475 ColorMode Mode
= ShowColors
? ColorMode::Auto
: ColorMode::Disable
;
478 WithColor
S(OS
, raw_ostream::SAVEDCOLOR
, true, false, Mode
);
480 if (ProgName
&& ProgName
[0])
481 S
<< ProgName
<< ": ";
483 if (!Filename
.empty()) {
492 S
<< ':' << (ColumnNo
+ 1);
500 case SourceMgr::DK_Error
:
501 WithColor::error(OS
, "", !ShowColors
);
503 case SourceMgr::DK_Warning
:
504 WithColor::warning(OS
, "", !ShowColors
);
506 case SourceMgr::DK_Note
:
507 WithColor::note(OS
, "", !ShowColors
);
509 case SourceMgr::DK_Remark
:
510 WithColor::remark(OS
, "", !ShowColors
);
515 WithColor(OS
, raw_ostream::SAVEDCOLOR
, true, false, Mode
) << Message
<< '\n';
517 if (LineNo
== -1 || ColumnNo
== -1)
520 // FIXME: If there are multibyte or multi-column characters in the source, all
521 // our ranges will be wrong. To do this properly, we'll need a byte-to-column
522 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
523 // expanding them later, and bail out rather than show incorrect ranges and
524 // misaligned fixits for any other odd characters.
525 if (any_of(LineContents
, isNonASCII
)) {
526 printSourceLine(OS
, LineContents
);
529 size_t NumColumns
= LineContents
.size();
531 // Build the line with the caret and ranges.
532 std::string
CaretLine(NumColumns
+ 1, ' ');
534 // Expand any ranges.
535 for (const std::pair
<unsigned, unsigned> &R
: Ranges
)
536 std::fill(&CaretLine
[R
.first
],
537 &CaretLine
[std::min((size_t)R
.second
, CaretLine
.size())], '~');
540 // FIXME: Find the beginning of the line properly for multibyte characters.
541 std::string FixItInsertionLine
;
543 CaretLine
, FixItInsertionLine
, FixIts
,
544 makeArrayRef(Loc
.getPointer() - ColumnNo
, LineContents
.size()));
546 // Finally, plop on the caret.
547 if (unsigned(ColumnNo
) <= NumColumns
)
548 CaretLine
[ColumnNo
] = '^';
550 CaretLine
[NumColumns
] = '^';
552 // ... and remove trailing whitespace so the output doesn't wrap for it. We
553 // know that the line isn't completely empty because it has the caret in it at
555 CaretLine
.erase(CaretLine
.find_last_not_of(' ') + 1);
557 printSourceLine(OS
, LineContents
);
560 ColorMode Mode
= ShowColors
? ColorMode::Auto
: ColorMode::Disable
;
561 WithColor
S(OS
, raw_ostream::GREEN
, true, false, Mode
);
563 // Print out the caret line, matching tabs in the source line.
564 for (unsigned i
= 0, e
= CaretLine
.size(), OutCol
= 0; i
!= e
; ++i
) {
565 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
571 // Okay, we have a tab. Insert the appropriate number of characters.
575 } while ((OutCol
% TabStop
) != 0);
580 // Print out the replacement line, matching tabs in the source line.
581 if (FixItInsertionLine
.empty())
584 for (size_t i
= 0, e
= FixItInsertionLine
.size(), OutCol
= 0; i
< e
; ++i
) {
585 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
586 OS
<< FixItInsertionLine
[i
];
591 // Okay, we have a tab. Insert the appropriate number of characters.
593 OS
<< FixItInsertionLine
[i
];
594 // FIXME: This is trying not to break up replacements, but then to re-sync
595 // with the tabs between replacements. This will fail, though, if two
596 // fix-it replacements are exactly adjacent, or if a fix-it contains a
597 // space. Really we should be precomputing column widths, which we'll
598 // need anyway for multibyte chars.
599 if (FixItInsertionLine
[i
] != ' ')
602 } while (((OutCol
% TabStop
) != 0) && i
!= e
);