1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the SourceMgr class. This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
38 static const size_t TabStop
= 8;
40 unsigned SourceMgr::AddIncludeFile(const std::string
&Filename
,
42 std::string
&IncludedFile
) {
43 IncludedFile
= Filename
;
44 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> NewBufOrErr
=
45 MemoryBuffer::getFile(IncludedFile
);
47 // If the file didn't exist directly, see if it's in an include path.
48 for (unsigned i
= 0, e
= IncludeDirectories
.size(); i
!= e
&& !NewBufOrErr
;
51 IncludeDirectories
[i
] + sys::path::get_separator().data() + Filename
;
52 NewBufOrErr
= MemoryBuffer::getFile(IncludedFile
);
58 return AddNewSourceBuffer(std::move(*NewBufOrErr
), IncludeLoc
);
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc
) const {
62 for (unsigned i
= 0, e
= Buffers
.size(); i
!= e
; ++i
)
63 if (Loc
.getPointer() >= Buffers
[i
].Buffer
->getBufferStart() &&
64 // Use <= here so that a pointer to the null at the end of the buffer
65 // is included as part of the buffer.
66 Loc
.getPointer() <= Buffers
[i
].Buffer
->getBufferEnd())
72 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr
) const {
74 // Ensure OffsetCache is allocated and populated with offsets of all the
76 std::vector
<T
> *Offsets
= nullptr;
77 if (OffsetCache
.isNull()) {
78 Offsets
= new std::vector
<T
>();
79 OffsetCache
= Offsets
;
80 size_t Sz
= Buffer
->getBufferSize();
81 assert(Sz
<= std::numeric_limits
<T
>::max());
82 StringRef S
= Buffer
->getBuffer();
83 for (size_t N
= 0; N
< Sz
; ++N
) {
85 Offsets
->push_back(static_cast<T
>(N
));
89 Offsets
= OffsetCache
.get
<std::vector
<T
> *>();
92 const char *BufStart
= Buffer
->getBufferStart();
93 assert(Ptr
>= BufStart
&& Ptr
<= Buffer
->getBufferEnd());
94 ptrdiff_t PtrDiff
= Ptr
- BufStart
;
95 assert(PtrDiff
>= 0 && static_cast<size_t>(PtrDiff
) <= std::numeric_limits
<T
>::max());
96 T PtrOffset
= static_cast<T
>(PtrDiff
);
98 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
100 return llvm::lower_bound(*Offsets
, PtrOffset
) - Offsets
->begin() + 1;
103 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer
&&Other
)
104 : Buffer(std::move(Other
.Buffer
)),
105 OffsetCache(Other
.OffsetCache
),
106 IncludeLoc(Other
.IncludeLoc
) {
107 Other
.OffsetCache
= nullptr;
110 SourceMgr::SrcBuffer::~SrcBuffer() {
111 if (!OffsetCache
.isNull()) {
112 if (OffsetCache
.is
<std::vector
<uint8_t>*>())
113 delete OffsetCache
.get
<std::vector
<uint8_t>*>();
114 else if (OffsetCache
.is
<std::vector
<uint16_t>*>())
115 delete OffsetCache
.get
<std::vector
<uint16_t>*>();
116 else if (OffsetCache
.is
<std::vector
<uint32_t>*>())
117 delete OffsetCache
.get
<std::vector
<uint32_t>*>();
119 delete OffsetCache
.get
<std::vector
<uint64_t>*>();
120 OffsetCache
= nullptr;
124 std::pair
<unsigned, unsigned>
125 SourceMgr::getLineAndColumn(SMLoc Loc
, unsigned BufferID
) const {
127 BufferID
= FindBufferContainingLoc(Loc
);
128 assert(BufferID
&& "Invalid Location!");
130 auto &SB
= getBufferInfo(BufferID
);
131 const char *Ptr
= Loc
.getPointer();
133 size_t Sz
= SB
.Buffer
->getBufferSize();
135 if (Sz
<= std::numeric_limits
<uint8_t>::max())
136 LineNo
= SB
.getLineNumber
<uint8_t>(Ptr
);
137 else if (Sz
<= std::numeric_limits
<uint16_t>::max())
138 LineNo
= SB
.getLineNumber
<uint16_t>(Ptr
);
139 else if (Sz
<= std::numeric_limits
<uint32_t>::max())
140 LineNo
= SB
.getLineNumber
<uint32_t>(Ptr
);
142 LineNo
= SB
.getLineNumber
<uint64_t>(Ptr
);
144 const char *BufStart
= SB
.Buffer
->getBufferStart();
145 size_t NewlineOffs
= StringRef(BufStart
, Ptr
-BufStart
).find_last_of("\n\r");
146 if (NewlineOffs
== StringRef::npos
) NewlineOffs
= ~(size_t)0;
147 return std::make_pair(LineNo
, Ptr
-BufStart
-NewlineOffs
);
150 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc
, raw_ostream
&OS
) const {
151 if (IncludeLoc
== SMLoc()) return; // Top of stack.
153 unsigned CurBuf
= FindBufferContainingLoc(IncludeLoc
);
154 assert(CurBuf
&& "Invalid or unspecified location!");
156 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
158 OS
<< "Included from "
159 << getBufferInfo(CurBuf
).Buffer
->getBufferIdentifier()
160 << ":" << FindLineNumber(IncludeLoc
, CurBuf
) << ":\n";
163 SMDiagnostic
SourceMgr::GetMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
165 ArrayRef
<SMRange
> Ranges
,
166 ArrayRef
<SMFixIt
> FixIts
) const {
167 // First thing to do: find the current buffer containing the specified
168 // location to pull out the source line.
169 SmallVector
<std::pair
<unsigned, unsigned>, 4> ColRanges
;
170 std::pair
<unsigned, unsigned> LineAndCol
;
171 StringRef BufferID
= "<unknown>";
175 unsigned CurBuf
= FindBufferContainingLoc(Loc
);
176 assert(CurBuf
&& "Invalid or unspecified location!");
178 const MemoryBuffer
*CurMB
= getMemoryBuffer(CurBuf
);
179 BufferID
= CurMB
->getBufferIdentifier();
181 // Scan backward to find the start of the line.
182 const char *LineStart
= Loc
.getPointer();
183 const char *BufStart
= CurMB
->getBufferStart();
184 while (LineStart
!= BufStart
&& LineStart
[-1] != '\n' &&
185 LineStart
[-1] != '\r')
188 // Get the end of the line.
189 const char *LineEnd
= Loc
.getPointer();
190 const char *BufEnd
= CurMB
->getBufferEnd();
191 while (LineEnd
!= BufEnd
&& LineEnd
[0] != '\n' && LineEnd
[0] != '\r')
193 LineStr
= std::string(LineStart
, LineEnd
);
195 // Convert any ranges to column ranges that only intersect the line of the
197 for (unsigned i
= 0, e
= Ranges
.size(); i
!= e
; ++i
) {
198 SMRange R
= Ranges
[i
];
199 if (!R
.isValid()) continue;
201 // If the line doesn't contain any part of the range, then ignore it.
202 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
205 // Ignore pieces of the range that go onto other lines.
206 if (R
.Start
.getPointer() < LineStart
)
207 R
.Start
= SMLoc::getFromPointer(LineStart
);
208 if (R
.End
.getPointer() > LineEnd
)
209 R
.End
= SMLoc::getFromPointer(LineEnd
);
211 // Translate from SMLoc ranges to column ranges.
212 // FIXME: Handle multibyte characters.
213 ColRanges
.push_back(std::make_pair(R
.Start
.getPointer()-LineStart
,
214 R
.End
.getPointer()-LineStart
));
217 LineAndCol
= getLineAndColumn(Loc
, CurBuf
);
220 return SMDiagnostic(*this, Loc
, BufferID
, LineAndCol
.first
,
221 LineAndCol
.second
-1, Kind
, Msg
.str(),
222 LineStr
, ColRanges
, FixIts
);
225 void SourceMgr::PrintMessage(raw_ostream
&OS
, const SMDiagnostic
&Diagnostic
,
226 bool ShowColors
) const {
227 // Report the message with the diagnostic handler if present.
229 DiagHandler(Diagnostic
, DiagContext
);
233 if (Diagnostic
.getLoc().isValid()) {
234 unsigned CurBuf
= FindBufferContainingLoc(Diagnostic
.getLoc());
235 assert(CurBuf
&& "Invalid or unspecified location!");
236 PrintIncludeStack(getBufferInfo(CurBuf
).IncludeLoc
, OS
);
239 Diagnostic
.print(nullptr, OS
, ShowColors
);
242 void SourceMgr::PrintMessage(raw_ostream
&OS
, SMLoc Loc
,
243 SourceMgr::DiagKind Kind
,
244 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
245 ArrayRef
<SMFixIt
> FixIts
, bool ShowColors
) const {
246 PrintMessage(OS
, GetMessage(Loc
, Kind
, Msg
, Ranges
, FixIts
), ShowColors
);
249 void SourceMgr::PrintMessage(SMLoc Loc
, SourceMgr::DiagKind Kind
,
250 const Twine
&Msg
, ArrayRef
<SMRange
> Ranges
,
251 ArrayRef
<SMFixIt
> FixIts
, bool ShowColors
) const {
252 PrintMessage(errs(), Loc
, Kind
, Msg
, Ranges
, FixIts
, ShowColors
);
255 //===----------------------------------------------------------------------===//
256 // SMDiagnostic Implementation
257 //===----------------------------------------------------------------------===//
259 SMDiagnostic::SMDiagnostic(const SourceMgr
&sm
, SMLoc L
, StringRef FN
,
260 int Line
, int Col
, SourceMgr::DiagKind Kind
,
261 StringRef Msg
, StringRef LineStr
,
262 ArrayRef
<std::pair
<unsigned,unsigned>> Ranges
,
263 ArrayRef
<SMFixIt
> Hints
)
264 : SM(&sm
), Loc(L
), Filename(FN
), LineNo(Line
), ColumnNo(Col
), Kind(Kind
),
265 Message(Msg
), LineContents(LineStr
), Ranges(Ranges
.vec()),
266 FixIts(Hints
.begin(), Hints
.end()) {
270 static void buildFixItLine(std::string
&CaretLine
, std::string
&FixItLine
,
271 ArrayRef
<SMFixIt
> FixIts
, ArrayRef
<char> SourceLine
){
275 const char *LineStart
= SourceLine
.begin();
276 const char *LineEnd
= SourceLine
.end();
278 size_t PrevHintEndCol
= 0;
280 for (ArrayRef
<SMFixIt
>::iterator I
= FixIts
.begin(), E
= FixIts
.end();
282 // If the fixit contains a newline or tab, ignore it.
283 if (I
->getText().find_first_of("\n\r\t") != StringRef::npos
)
286 SMRange R
= I
->getRange();
288 // If the line doesn't contain any part of the range, then ignore it.
289 if (R
.Start
.getPointer() > LineEnd
|| R
.End
.getPointer() < LineStart
)
292 // Translate from SMLoc to column.
293 // Ignore pieces of the range that go onto other lines.
294 // FIXME: Handle multibyte characters in the source line.
296 if (R
.Start
.getPointer() < LineStart
)
299 FirstCol
= R
.Start
.getPointer() - LineStart
;
301 // If we inserted a long previous hint, push this one forwards, and add
302 // an extra space to show that this is not part of the previous
303 // completion. This is sort of the best we can do when two hints appear
306 // Note that if this hint is located immediately after the previous
307 // hint, no space will be added, since the location is more important.
308 unsigned HintCol
= FirstCol
;
309 if (HintCol
< PrevHintEndCol
)
310 HintCol
= PrevHintEndCol
+ 1;
312 // FIXME: This assertion is intended to catch unintended use of multibyte
313 // characters in fixits. If we decide to do this, we'll have to track
314 // separate byte widths for the source and fixit lines.
315 assert((size_t)sys::locale::columnWidth(I
->getText()) ==
316 I
->getText().size());
318 // This relies on one byte per column in our fixit hints.
319 unsigned LastColumnModified
= HintCol
+ I
->getText().size();
320 if (LastColumnModified
> FixItLine
.size())
321 FixItLine
.resize(LastColumnModified
, ' ');
323 std::copy(I
->getText().begin(), I
->getText().end(),
324 FixItLine
.begin() + HintCol
);
326 PrevHintEndCol
= LastColumnModified
;
328 // For replacements, mark the removal range with '~'.
329 // FIXME: Handle multibyte characters in the source line.
331 if (R
.End
.getPointer() >= LineEnd
)
332 LastCol
= LineEnd
- LineStart
;
334 LastCol
= R
.End
.getPointer() - LineStart
;
336 std::fill(&CaretLine
[FirstCol
], &CaretLine
[LastCol
], '~');
340 static void printSourceLine(raw_ostream
&S
, StringRef LineContents
) {
341 // Print out the source line one character at a time, so we can expand tabs.
342 for (unsigned i
= 0, e
= LineContents
.size(), OutCol
= 0; i
!= e
; ++i
) {
343 size_t NextTab
= LineContents
.find('\t', i
);
344 // If there were no tabs left, print the rest, we are done.
345 if (NextTab
== StringRef::npos
) {
346 S
<< LineContents
.drop_front(i
);
350 // Otherwise, print from i to NextTab.
351 S
<< LineContents
.slice(i
, NextTab
);
352 OutCol
+= NextTab
- i
;
355 // If we have a tab, emit at least one space, then round up to 8 columns.
359 } while ((OutCol
% TabStop
) != 0);
364 static bool isNonASCII(char c
) {
368 void SMDiagnostic::print(const char *ProgName
, raw_ostream
&OS
,
369 bool ShowColors
, bool ShowKindLabel
) const {
371 WithColor
S(OS
, raw_ostream::SAVEDCOLOR
, true, false, !ShowColors
);
373 if (ProgName
&& ProgName
[0])
374 S
<< ProgName
<< ": ";
376 if (!Filename
.empty()) {
385 S
<< ':' << (ColumnNo
+ 1);
393 case SourceMgr::DK_Error
:
394 WithColor::error(OS
, "", !ShowColors
);
396 case SourceMgr::DK_Warning
:
397 WithColor::warning(OS
, "", !ShowColors
);
399 case SourceMgr::DK_Note
:
400 WithColor::note(OS
, "", !ShowColors
);
402 case SourceMgr::DK_Remark
:
403 WithColor::remark(OS
, "", !ShowColors
);
408 WithColor(OS
, raw_ostream::SAVEDCOLOR
, true, false, !ShowColors
)
411 if (LineNo
== -1 || ColumnNo
== -1)
414 // FIXME: If there are multibyte or multi-column characters in the source, all
415 // our ranges will be wrong. To do this properly, we'll need a byte-to-column
416 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
417 // expanding them later, and bail out rather than show incorrect ranges and
418 // misaligned fixits for any other odd characters.
419 if (find_if(LineContents
, isNonASCII
) != LineContents
.end()) {
420 printSourceLine(OS
, LineContents
);
423 size_t NumColumns
= LineContents
.size();
425 // Build the line with the caret and ranges.
426 std::string
CaretLine(NumColumns
+1, ' ');
428 // Expand any ranges.
429 for (unsigned r
= 0, e
= Ranges
.size(); r
!= e
; ++r
) {
430 std::pair
<unsigned, unsigned> R
= Ranges
[r
];
431 std::fill(&CaretLine
[R
.first
],
432 &CaretLine
[std::min((size_t)R
.second
, CaretLine
.size())],
437 // FIXME: Find the beginning of the line properly for multibyte characters.
438 std::string FixItInsertionLine
;
439 buildFixItLine(CaretLine
, FixItInsertionLine
, FixIts
,
440 makeArrayRef(Loc
.getPointer() - ColumnNo
,
441 LineContents
.size()));
443 // Finally, plop on the caret.
444 if (unsigned(ColumnNo
) <= NumColumns
)
445 CaretLine
[ColumnNo
] = '^';
447 CaretLine
[NumColumns
] = '^';
449 // ... and remove trailing whitespace so the output doesn't wrap for it. We
450 // know that the line isn't completely empty because it has the caret in it at
452 CaretLine
.erase(CaretLine
.find_last_not_of(' ')+1);
454 printSourceLine(OS
, LineContents
);
457 WithColor
S(OS
, raw_ostream::GREEN
, true, false, !ShowColors
);
459 // Print out the caret line, matching tabs in the source line.
460 for (unsigned i
= 0, e
= CaretLine
.size(), OutCol
= 0; i
!= e
; ++i
) {
461 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
467 // Okay, we have a tab. Insert the appropriate number of characters.
471 } while ((OutCol
% TabStop
) != 0);
476 // Print out the replacement line, matching tabs in the source line.
477 if (FixItInsertionLine
.empty())
480 for (size_t i
= 0, e
= FixItInsertionLine
.size(), OutCol
= 0; i
< e
; ++i
) {
481 if (i
>= LineContents
.size() || LineContents
[i
] != '\t') {
482 OS
<< FixItInsertionLine
[i
];
487 // Okay, we have a tab. Insert the appropriate number of characters.
489 OS
<< FixItInsertionLine
[i
];
490 // FIXME: This is trying not to break up replacements, but then to re-sync
491 // with the tabs between replacements. This will fail, though, if two
492 // fix-it replacements are exactly adjacent, or if a fix-it contains a
493 // space. Really we should be precomputing column widths, which we'll
494 // need anyway for multibyte chars.
495 if (FixItInsertionLine
[i
] != ' ')
498 } while (((OutCol
% TabStop
) != 0) && i
!= e
);