[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Support / SourceMgr.cpp
blob89b7dc939dfcb793a9b919f7c6ac6012885b15be
1 //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SourceMgr class. This class is used as a simple
10 // substrate for diagnostics, #include handling, and other low level things for
11 // simple parsers.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Support/SourceMgr.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Support/ErrorOr.h"
22 #include "llvm/Support/Locale.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/SMLoc.h"
26 #include "llvm/Support/WithColor.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <limits>
32 #include <memory>
33 #include <string>
34 #include <utility>
36 using namespace llvm;
38 static const size_t TabStop = 8;
40 unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
41 SMLoc IncludeLoc,
42 std::string &IncludedFile) {
43 IncludedFile = Filename;
44 ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
45 MemoryBuffer::getFile(IncludedFile);
47 // If the file didn't exist directly, see if it's in an include path.
48 for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
49 ++i) {
50 IncludedFile =
51 IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
52 NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
55 if (!NewBufOrErr)
56 return 0;
58 return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
61 unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
62 for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
63 if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
64 // Use <= here so that a pointer to the null at the end of the buffer
65 // is included as part of the buffer.
66 Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
67 return i + 1;
68 return 0;
71 template <typename T>
72 static std::vector<T> &GetOrCreateOffsetCache(void *&OffsetCache,
73 MemoryBuffer *Buffer) {
74 if (OffsetCache)
75 return *static_cast<std::vector<T> *>(OffsetCache);
77 // Lazily fill in the offset cache.
78 auto *Offsets = new std::vector<T>();
79 size_t Sz = Buffer->getBufferSize();
80 assert(Sz <= std::numeric_limits<T>::max());
81 StringRef S = Buffer->getBuffer();
82 for (size_t N = 0; N < Sz; ++N) {
83 if (S[N] == '\n')
84 Offsets->push_back(static_cast<T>(N));
87 OffsetCache = Offsets;
88 return *Offsets;
91 template <typename T>
92 unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *Ptr) const {
93 std::vector<T> &Offsets =
94 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
96 const char *BufStart = Buffer->getBufferStart();
97 assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
98 ptrdiff_t PtrDiff = Ptr - BufStart;
99 assert(PtrDiff >= 0 &&
100 static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
101 T PtrOffset = static_cast<T>(PtrDiff);
103 // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
104 // the line number.
105 return llvm::lower_bound(Offsets, PtrOffset) - Offsets.begin() + 1;
108 /// Look up a given \p Ptr in in the buffer, determining which line it came
109 /// from.
110 unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
111 size_t Sz = Buffer->getBufferSize();
112 if (Sz <= std::numeric_limits<uint8_t>::max())
113 return getLineNumberSpecialized<uint8_t>(Ptr);
114 else if (Sz <= std::numeric_limits<uint16_t>::max())
115 return getLineNumberSpecialized<uint16_t>(Ptr);
116 else if (Sz <= std::numeric_limits<uint32_t>::max())
117 return getLineNumberSpecialized<uint32_t>(Ptr);
118 else
119 return getLineNumberSpecialized<uint64_t>(Ptr);
122 template <typename T>
123 const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
124 unsigned LineNo) const {
125 std::vector<T> &Offsets =
126 GetOrCreateOffsetCache<T>(OffsetCache, Buffer.get());
128 // We start counting line and column numbers from 1.
129 if (LineNo != 0)
130 --LineNo;
132 const char *BufStart = Buffer->getBufferStart();
134 // The offset cache contains the location of the \n for the specified line,
135 // we want the start of the line. As such, we look for the previous entry.
136 if (LineNo == 0)
137 return BufStart;
138 if (LineNo > Offsets.size())
139 return nullptr;
140 return BufStart + Offsets[LineNo - 1] + 1;
143 /// Return a pointer to the first character of the specified line number or
144 /// null if the line number is invalid.
145 const char *
146 SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned LineNo) const {
147 size_t Sz = Buffer->getBufferSize();
148 if (Sz <= std::numeric_limits<uint8_t>::max())
149 return getPointerForLineNumberSpecialized<uint8_t>(LineNo);
150 else if (Sz <= std::numeric_limits<uint16_t>::max())
151 return getPointerForLineNumberSpecialized<uint16_t>(LineNo);
152 else if (Sz <= std::numeric_limits<uint32_t>::max())
153 return getPointerForLineNumberSpecialized<uint32_t>(LineNo);
154 else
155 return getPointerForLineNumberSpecialized<uint64_t>(LineNo);
158 SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
159 : Buffer(std::move(Other.Buffer)), OffsetCache(Other.OffsetCache),
160 IncludeLoc(Other.IncludeLoc) {
161 Other.OffsetCache = nullptr;
164 SourceMgr::SrcBuffer::~SrcBuffer() {
165 if (OffsetCache) {
166 size_t Sz = Buffer->getBufferSize();
167 if (Sz <= std::numeric_limits<uint8_t>::max())
168 delete static_cast<std::vector<uint8_t> *>(OffsetCache);
169 else if (Sz <= std::numeric_limits<uint16_t>::max())
170 delete static_cast<std::vector<uint16_t> *>(OffsetCache);
171 else if (Sz <= std::numeric_limits<uint32_t>::max())
172 delete static_cast<std::vector<uint32_t> *>(OffsetCache);
173 else
174 delete static_cast<std::vector<uint64_t> *>(OffsetCache);
175 OffsetCache = nullptr;
179 std::pair<unsigned, unsigned>
180 SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
181 if (!BufferID)
182 BufferID = FindBufferContainingLoc(Loc);
183 assert(BufferID && "Invalid location!");
185 auto &SB = getBufferInfo(BufferID);
186 const char *Ptr = Loc.getPointer();
188 unsigned LineNo = SB.getLineNumber(Ptr);
189 const char *BufStart = SB.Buffer->getBufferStart();
190 size_t NewlineOffs = StringRef(BufStart, Ptr - BufStart).find_last_of("\n\r");
191 if (NewlineOffs == StringRef::npos)
192 NewlineOffs = ~(size_t)0;
193 return std::make_pair(LineNo, Ptr - BufStart - NewlineOffs);
196 // FIXME: Note that the formatting of source locations is spread between
197 // multiple functions, some in SourceMgr and some in SMDiagnostic. A better
198 // solution would be a general-purpose source location formatter
199 // in one of those two classes, or possibly in SMLoc.
201 /// Get a string with the source location formatted in the standard
202 /// style, but without the line offset. If \p IncludePath is true, the path
203 /// is included. If false, only the file name and extension are included.
204 std::string SourceMgr::getFormattedLocationNoOffset(SMLoc Loc,
205 bool IncludePath) const {
206 auto BufferID = FindBufferContainingLoc(Loc);
207 assert(BufferID && "Invalid location!");
208 auto FileSpec = getBufferInfo(BufferID).Buffer->getBufferIdentifier();
210 if (IncludePath) {
211 return FileSpec.str() + ":" + std::to_string(FindLineNumber(Loc, BufferID));
212 } else {
213 auto I = FileSpec.find_last_of("/\\");
214 I = (I == FileSpec.size()) ? 0 : (I + 1);
215 return FileSpec.substr(I).str() + ":" +
216 std::to_string(FindLineNumber(Loc, BufferID));
220 /// Given a line and column number in a mapped buffer, turn it into an SMLoc.
221 /// This will return a null SMLoc if the line/column location is invalid.
222 SMLoc SourceMgr::FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo,
223 unsigned ColNo) {
224 auto &SB = getBufferInfo(BufferID);
225 const char *Ptr = SB.getPointerForLineNumber(LineNo);
226 if (!Ptr)
227 return SMLoc();
229 // We start counting line and column numbers from 1.
230 if (ColNo != 0)
231 --ColNo;
233 // If we have a column number, validate it.
234 if (ColNo) {
235 // Make sure the location is within the current line.
236 if (Ptr + ColNo > SB.Buffer->getBufferEnd())
237 return SMLoc();
239 // Make sure there is no newline in the way.
240 if (StringRef(Ptr, ColNo).find_first_of("\n\r") != StringRef::npos)
241 return SMLoc();
243 Ptr += ColNo;
246 return SMLoc::getFromPointer(Ptr);
249 void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
250 if (IncludeLoc == SMLoc())
251 return; // Top of stack.
253 unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
254 assert(CurBuf && "Invalid or unspecified location!");
256 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
258 OS << "Included from " << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
259 << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
262 SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
263 const Twine &Msg, ArrayRef<SMRange> Ranges,
264 ArrayRef<SMFixIt> FixIts) const {
265 // First thing to do: find the current buffer containing the specified
266 // location to pull out the source line.
267 SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
268 std::pair<unsigned, unsigned> LineAndCol;
269 StringRef BufferID = "<unknown>";
270 StringRef LineStr;
272 if (Loc.isValid()) {
273 unsigned CurBuf = FindBufferContainingLoc(Loc);
274 assert(CurBuf && "Invalid or unspecified location!");
276 const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
277 BufferID = CurMB->getBufferIdentifier();
279 // Scan backward to find the start of the line.
280 const char *LineStart = Loc.getPointer();
281 const char *BufStart = CurMB->getBufferStart();
282 while (LineStart != BufStart && LineStart[-1] != '\n' &&
283 LineStart[-1] != '\r')
284 --LineStart;
286 // Get the end of the line.
287 const char *LineEnd = Loc.getPointer();
288 const char *BufEnd = CurMB->getBufferEnd();
289 while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
290 ++LineEnd;
291 LineStr = StringRef(LineStart, LineEnd - LineStart);
293 // Convert any ranges to column ranges that only intersect the line of the
294 // location.
295 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
296 SMRange R = Ranges[i];
297 if (!R.isValid())
298 continue;
300 // If the line doesn't contain any part of the range, then ignore it.
301 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
302 continue;
304 // Ignore pieces of the range that go onto other lines.
305 if (R.Start.getPointer() < LineStart)
306 R.Start = SMLoc::getFromPointer(LineStart);
307 if (R.End.getPointer() > LineEnd)
308 R.End = SMLoc::getFromPointer(LineEnd);
310 // Translate from SMLoc ranges to column ranges.
311 // FIXME: Handle multibyte characters.
312 ColRanges.push_back(std::make_pair(R.Start.getPointer() - LineStart,
313 R.End.getPointer() - LineStart));
316 LineAndCol = getLineAndColumn(Loc, CurBuf);
319 return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
320 LineAndCol.second - 1, Kind, Msg.str(), LineStr,
321 ColRanges, FixIts);
324 void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
325 bool ShowColors) const {
326 // Report the message with the diagnostic handler if present.
327 if (DiagHandler) {
328 DiagHandler(Diagnostic, DiagContext);
329 return;
332 if (Diagnostic.getLoc().isValid()) {
333 unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
334 assert(CurBuf && "Invalid or unspecified location!");
335 PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
338 Diagnostic.print(nullptr, OS, ShowColors);
341 void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
342 SourceMgr::DiagKind Kind, const Twine &Msg,
343 ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts,
344 bool ShowColors) const {
345 PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
348 void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
349 const Twine &Msg, ArrayRef<SMRange> Ranges,
350 ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
351 PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
354 //===----------------------------------------------------------------------===//
355 // SMFixIt Implementation
356 //===----------------------------------------------------------------------===//
358 SMFixIt::SMFixIt(SMRange R, const Twine &Replacement)
359 : Range(R), Text(Replacement.str()) {
360 assert(R.isValid());
363 //===----------------------------------------------------------------------===//
364 // SMDiagnostic Implementation
365 //===----------------------------------------------------------------------===//
367 SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN, int Line,
368 int Col, SourceMgr::DiagKind Kind, StringRef Msg,
369 StringRef LineStr,
370 ArrayRef<std::pair<unsigned, unsigned>> Ranges,
371 ArrayRef<SMFixIt> Hints)
372 : SM(&sm), Loc(L), Filename(std::string(FN)), LineNo(Line), ColumnNo(Col),
373 Kind(Kind), Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
374 FixIts(Hints.begin(), Hints.end()) {
375 llvm::sort(FixIts);
378 static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
379 ArrayRef<SMFixIt> FixIts,
380 ArrayRef<char> SourceLine) {
381 if (FixIts.empty())
382 return;
384 const char *LineStart = SourceLine.begin();
385 const char *LineEnd = SourceLine.end();
387 size_t PrevHintEndCol = 0;
389 for (const llvm::SMFixIt &Fixit : FixIts) {
390 // If the fixit contains a newline or tab, ignore it.
391 if (Fixit.getText().find_first_of("\n\r\t") != StringRef::npos)
392 continue;
394 SMRange R = Fixit.getRange();
396 // If the line doesn't contain any part of the range, then ignore it.
397 if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
398 continue;
400 // Translate from SMLoc to column.
401 // Ignore pieces of the range that go onto other lines.
402 // FIXME: Handle multibyte characters in the source line.
403 unsigned FirstCol;
404 if (R.Start.getPointer() < LineStart)
405 FirstCol = 0;
406 else
407 FirstCol = R.Start.getPointer() - LineStart;
409 // If we inserted a long previous hint, push this one forwards, and add
410 // an extra space to show that this is not part of the previous
411 // completion. This is sort of the best we can do when two hints appear
412 // to overlap.
414 // Note that if this hint is located immediately after the previous
415 // hint, no space will be added, since the location is more important.
416 unsigned HintCol = FirstCol;
417 if (HintCol < PrevHintEndCol)
418 HintCol = PrevHintEndCol + 1;
420 // FIXME: This assertion is intended to catch unintended use of multibyte
421 // characters in fixits. If we decide to do this, we'll have to track
422 // separate byte widths for the source and fixit lines.
423 assert((size_t)sys::locale::columnWidth(Fixit.getText()) ==
424 Fixit.getText().size());
426 // This relies on one byte per column in our fixit hints.
427 unsigned LastColumnModified = HintCol + Fixit.getText().size();
428 if (LastColumnModified > FixItLine.size())
429 FixItLine.resize(LastColumnModified, ' ');
431 llvm::copy(Fixit.getText(), FixItLine.begin() + HintCol);
433 PrevHintEndCol = LastColumnModified;
435 // For replacements, mark the removal range with '~'.
436 // FIXME: Handle multibyte characters in the source line.
437 unsigned LastCol;
438 if (R.End.getPointer() >= LineEnd)
439 LastCol = LineEnd - LineStart;
440 else
441 LastCol = R.End.getPointer() - LineStart;
443 std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
447 static void printSourceLine(raw_ostream &S, StringRef LineContents) {
448 // Print out the source line one character at a time, so we can expand tabs.
449 for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
450 size_t NextTab = LineContents.find('\t', i);
451 // If there were no tabs left, print the rest, we are done.
452 if (NextTab == StringRef::npos) {
453 S << LineContents.drop_front(i);
454 break;
457 // Otherwise, print from i to NextTab.
458 S << LineContents.slice(i, NextTab);
459 OutCol += NextTab - i;
460 i = NextTab;
462 // If we have a tab, emit at least one space, then round up to 8 columns.
463 do {
464 S << ' ';
465 ++OutCol;
466 } while ((OutCol % TabStop) != 0);
468 S << '\n';
471 static bool isNonASCII(char c) { return c & 0x80; }
473 void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors,
474 bool ShowKindLabel) const {
475 ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable;
478 WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, Mode);
480 if (ProgName && ProgName[0])
481 S << ProgName << ": ";
483 if (!Filename.empty()) {
484 if (Filename == "-")
485 S << "<stdin>";
486 else
487 S << Filename;
489 if (LineNo != -1) {
490 S << ':' << LineNo;
491 if (ColumnNo != -1)
492 S << ':' << (ColumnNo + 1);
494 S << ": ";
498 if (ShowKindLabel) {
499 switch (Kind) {
500 case SourceMgr::DK_Error:
501 WithColor::error(OS, "", !ShowColors);
502 break;
503 case SourceMgr::DK_Warning:
504 WithColor::warning(OS, "", !ShowColors);
505 break;
506 case SourceMgr::DK_Note:
507 WithColor::note(OS, "", !ShowColors);
508 break;
509 case SourceMgr::DK_Remark:
510 WithColor::remark(OS, "", !ShowColors);
511 break;
515 WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, Mode) << Message << '\n';
517 if (LineNo == -1 || ColumnNo == -1)
518 return;
520 // FIXME: If there are multibyte or multi-column characters in the source, all
521 // our ranges will be wrong. To do this properly, we'll need a byte-to-column
522 // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
523 // expanding them later, and bail out rather than show incorrect ranges and
524 // misaligned fixits for any other odd characters.
525 if (any_of(LineContents, isNonASCII)) {
526 printSourceLine(OS, LineContents);
527 return;
529 size_t NumColumns = LineContents.size();
531 // Build the line with the caret and ranges.
532 std::string CaretLine(NumColumns + 1, ' ');
534 // Expand any ranges.
535 for (const std::pair<unsigned, unsigned> &R : Ranges)
536 std::fill(&CaretLine[R.first],
537 &CaretLine[std::min((size_t)R.second, CaretLine.size())], '~');
539 // Add any fix-its.
540 // FIXME: Find the beginning of the line properly for multibyte characters.
541 std::string FixItInsertionLine;
542 buildFixItLine(
543 CaretLine, FixItInsertionLine, FixIts,
544 makeArrayRef(Loc.getPointer() - ColumnNo, LineContents.size()));
546 // Finally, plop on the caret.
547 if (unsigned(ColumnNo) <= NumColumns)
548 CaretLine[ColumnNo] = '^';
549 else
550 CaretLine[NumColumns] = '^';
552 // ... and remove trailing whitespace so the output doesn't wrap for it. We
553 // know that the line isn't completely empty because it has the caret in it at
554 // least.
555 CaretLine.erase(CaretLine.find_last_not_of(' ') + 1);
557 printSourceLine(OS, LineContents);
560 ColorMode Mode = ShowColors ? ColorMode::Auto : ColorMode::Disable;
561 WithColor S(OS, raw_ostream::GREEN, true, false, Mode);
563 // Print out the caret line, matching tabs in the source line.
564 for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
565 if (i >= LineContents.size() || LineContents[i] != '\t') {
566 S << CaretLine[i];
567 ++OutCol;
568 continue;
571 // Okay, we have a tab. Insert the appropriate number of characters.
572 do {
573 S << CaretLine[i];
574 ++OutCol;
575 } while ((OutCol % TabStop) != 0);
577 S << '\n';
580 // Print out the replacement line, matching tabs in the source line.
581 if (FixItInsertionLine.empty())
582 return;
584 for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
585 if (i >= LineContents.size() || LineContents[i] != '\t') {
586 OS << FixItInsertionLine[i];
587 ++OutCol;
588 continue;
591 // Okay, we have a tab. Insert the appropriate number of characters.
592 do {
593 OS << FixItInsertionLine[i];
594 // FIXME: This is trying not to break up replacements, but then to re-sync
595 // with the tabs between replacements. This will fail, though, if two
596 // fix-it replacements are exactly adjacent, or if a fix-it contains a
597 // space. Really we should be precomputing column widths, which we'll
598 // need anyway for multibyte chars.
599 if (FixItInsertionLine[i] != ' ')
600 ++i;
601 ++OutCol;
602 } while (((OutCol % TabStop) != 0) && i != e);
604 OS << '\n';