1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include "flang/Parser/characters.h"
13 #include "llvm/Support/Errno.h"
14 #include "llvm/Support/FileSystem.h"
15 #include "llvm/Support/Path.h"
16 #include "llvm/Support/raw_ostream.h"
23 namespace Fortran::parser
{
25 SourceFile::~SourceFile() { Close(); }
27 void SourceFile::RecordLineStarts() {
28 if (std::size_t chars
{bytes()}; chars
> 0) {
29 origins_
.emplace(1, SourcePositionOrigin
{path_
, 1});
30 const char *source
{content().data()};
31 CHECK(source
[chars
- 1] == '\n' && "missing ultimate newline");
33 do { // "at" is always at the beginning of a source line
34 lineStart_
.push_back(at
);
35 at
= reinterpret_cast<const char *>(
36 std::memchr(source
+ at
, '\n', chars
- at
)) -
40 lineStart_
.shrink_to_fit();
44 // Check for a Unicode byte order mark (BOM).
45 // Module files all have one; so can source files.
46 void SourceFile::IdentifyPayload() {
47 llvm::StringRef content
{buf_
->getBufferStart(), buf_
->getBufferSize()};
48 constexpr llvm::StringLiteral UTF8_BOM
{"\xef\xbb\xbf"};
49 if (content
.starts_with(UTF8_BOM
)) {
50 bom_end_
= UTF8_BOM
.size();
51 encoding_
= Encoding::UTF_8
;
55 std::string
DirectoryName(std::string path
) {
56 llvm::SmallString
<128> pathBuf
{path
};
57 llvm::sys::path::remove_filename(pathBuf
);
58 return pathBuf
.str().str();
61 std::optional
<std::string
> LocateSourceFile(
62 std::string name
, const std::list
<std::string
> &searchPath
) {
63 if (name
== "-" || llvm::sys::path::is_absolute(name
)) {
66 for (const std::string
&dir
: searchPath
) {
67 llvm::SmallString
<128> path
{dir
};
68 llvm::sys::path::append(path
, name
);
70 auto er
= llvm::sys::fs::is_directory(path
, isDir
);
72 return path
.str().str();
78 std::vector
<std::string
> LocateSourceFileAll(
79 std::string name
, const std::vector
<std::string
> &searchPath
) {
80 if (name
== "-" || llvm::sys::path::is_absolute(name
)) {
83 std::vector
<std::string
> result
;
84 for (const std::string
&dir
: searchPath
) {
85 llvm::SmallString
<128> path
{dir
};
86 llvm::sys::path::append(path
, name
);
88 auto er
= llvm::sys::fs::is_directory(path
, isDir
);
90 result
.emplace_back(path
.str().str());
96 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef
<char> buf
) {
98 char *buffer
{buf
.data()};
100 std::size_t bytes
= buf
.size();
102 void *vp
{static_cast<void *>(p
)};
103 void *crvp
{std::memchr(vp
, '\r', bytes
)};
104 char *crcp
{static_cast<char *>(crvp
)};
106 std::memmove(buffer
+ wrote
, p
, bytes
);
110 std::size_t chunk
= crcp
- p
;
111 auto advance
{chunk
+ 1};
112 if (chunk
+ 1 >= bytes
|| crcp
[1] == '\n') {
113 // CR followed by LF or EOF: omit
114 } else if ((chunk
== 0 && p
== buf
.data()) || crcp
[-1] == '\n') {
115 // CR preceded by LF or BOF: omit
117 // CR in line: retain
120 std::memmove(buffer
+ wrote
, p
, chunk
);
128 bool SourceFile::Open(std::string path
, llvm::raw_ostream
&error
) {
131 std::string errorPath
{"'"s
+ path_
+ "'"};
132 auto bufOr
{llvm::WritableMemoryBuffer::getFile(path
)};
134 auto err
= bufOr
.getError();
135 error
<< "Could not open " << errorPath
<< ": " << err
.message();
138 buf_
= std::move(bufOr
.get());
143 bool SourceFile::ReadStandardInput(llvm::raw_ostream
&error
) {
145 path_
= "standard input";
146 auto buf_or
= llvm::MemoryBuffer::getSTDIN();
148 auto err
= buf_or
.getError();
149 error
<< err
.message();
152 auto inbuf
= std::move(buf_or
.get());
154 llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf
->getBufferSize());
155 llvm::copy(inbuf
->getBuffer(), buf_
->getBufferStart());
160 void SourceFile::ReadFile() {
161 buf_end_
= RemoveCarriageReturns(buf_
->getBuffer());
162 if (content().size() == 0 || content().back() != '\n') {
163 // Don't bother to copy if we have spare memory
164 if (content().size() >= buf_
->getBufferSize()) {
165 auto tmp_buf
{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
166 content().size() + 1)};
167 llvm::copy(content(), tmp_buf
->getBufferStart());
168 buf_
= std::move(tmp_buf
);
171 buf_
->getBuffer()[buf_end_
- 1] = '\n';
177 void SourceFile::Close() {
180 distinctPaths_
.clear();
184 SourcePosition
SourceFile::GetSourcePosition(std::size_t at
) const {
186 auto it
{llvm::upper_bound(lineStart_
, at
)};
187 auto trueLineNumber
{std::distance(lineStart_
.begin(), it
- 1) + 1};
188 auto ub
{origins_
.upper_bound(trueLineNumber
)};
189 auto column
{static_cast<int>(at
- lineStart_
[trueLineNumber
- 1] + 1)};
190 if (ub
== origins_
.begin()) {
191 return {*this, path_
, static_cast<int>(trueLineNumber
), column
,
192 static_cast<int>(trueLineNumber
)};
195 const SourcePositionOrigin
&origin
{ub
->second
};
197 trueLineNumber
- ub
->first
+ static_cast<std::size_t>(origin
.line
)};
198 return {*this, origin
.path
, static_cast<int>(lineNumber
), column
,
199 static_cast<int>(trueLineNumber
)};
203 const std::string
&SourceFile::SavePath(std::string
&&path
) {
204 return *distinctPaths_
.emplace(std::move(path
)).first
;
207 void SourceFile::LineDirective(
208 int trueLineNumber
, const std::string
&path
, int lineNumber
) {
209 origins_
.emplace(trueLineNumber
, SourcePositionOrigin
{path
, lineNumber
});
212 llvm::raw_ostream
&SourceFile::Dump(llvm::raw_ostream
&o
) const {
213 o
<< "SourceFile '" << path_
<< "'\n";
214 for (const auto &[at
, spo
] : origins_
) {
215 o
<< " origin_[" << at
<< "] -> '" << spo
.path
<< "' " << spo
.line
<< '\n';
219 } // namespace Fortran::parser