1 //===-- lib/Parser/source.cpp ---------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "flang/Parser/source.h"
10 #include "flang/Common/idioms.h"
11 #include "flang/Parser/char-buffer.h"
12 #include "llvm/Support/Errno.h"
13 #include "llvm/Support/FileSystem.h"
14 #include "llvm/Support/Path.h"
15 #include "llvm/Support/raw_ostream.h"
20 namespace Fortran::parser
{
22 SourceFile::~SourceFile() { Close(); }
24 static std::vector
<std::size_t> FindLineStarts(llvm::StringRef source
) {
25 std::vector
<std::size_t> result
;
26 if (source
.size() > 0) {
27 CHECK(source
.back() == '\n' && "missing ultimate newline");
31 at
= source
.find('\n', at
) + 1;
32 } while (at
< source
.size());
33 result
.shrink_to_fit();
38 void SourceFile::RecordLineStarts() {
39 lineStart_
= FindLineStarts({content().data(), bytes()});
42 // Check for a Unicode byte order mark (BOM).
43 // Module files all have one; so can source files.
44 void SourceFile::IdentifyPayload() {
45 llvm::StringRef content
{buf_
->getBufferStart(), buf_
->getBufferSize()};
46 constexpr llvm::StringLiteral UTF8_BOM
{"\xef\xbb\xbf"};
47 if (content
.startswith(UTF8_BOM
)) {
48 bom_end_
= UTF8_BOM
.size();
49 encoding_
= Encoding::UTF_8
;
53 std::string
DirectoryName(std::string path
) {
54 llvm::SmallString
<128> pathBuf
{path
};
55 llvm::sys::path::remove_filename(pathBuf
);
56 return pathBuf
.str().str();
59 std::optional
<std::string
> LocateSourceFile(
60 std::string name
, const std::list
<std::string
> &searchPath
) {
61 if (name
== "-" || llvm::sys::path::is_absolute(name
)) {
64 for (const std::string
&dir
: searchPath
) {
65 llvm::SmallString
<128> path
{dir
};
66 llvm::sys::path::append(path
, name
);
68 auto er
= llvm::sys::fs::is_directory(path
, isDir
);
70 return path
.str().str();
76 std::size_t RemoveCarriageReturns(llvm::MutableArrayRef
<char> buf
) {
78 char *buffer
{buf
.data()};
80 std::size_t bytes
= buf
.size();
82 void *vp
{static_cast<void *>(p
)};
83 void *crvp
{std::memchr(vp
, '\r', bytes
)};
84 char *crcp
{static_cast<char *>(crvp
)};
86 std::memmove(buffer
+ wrote
, p
, bytes
);
90 std::size_t chunk
= crcp
- p
;
91 auto advance
{chunk
+ 1};
92 if (chunk
+ 1 >= bytes
|| crcp
[1] == '\n') {
93 // CR followed by LF or EOF: omit
94 } else if ((chunk
== 0 && p
== buf
.data()) || crcp
[-1] == '\n') {
95 // CR preceded by LF or BOF: omit
100 std::memmove(buffer
+ wrote
, p
, chunk
);
108 bool SourceFile::Open(std::string path
, llvm::raw_ostream
&error
) {
111 std::string errorPath
{"'"s
+ path_
+ "'"};
112 auto bufOr
{llvm::WritableMemoryBuffer::getFile(path
)};
114 auto err
= bufOr
.getError();
115 error
<< "Could not open " << errorPath
<< ": " << err
.message();
118 buf_
= std::move(bufOr
.get());
123 bool SourceFile::ReadStandardInput(llvm::raw_ostream
&error
) {
125 path_
= "standard input";
126 auto buf_or
= llvm::MemoryBuffer::getSTDIN();
128 auto err
= buf_or
.getError();
129 error
<< err
.message();
132 auto inbuf
= std::move(buf_or
.get());
134 llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf
->getBufferSize());
135 llvm::copy(inbuf
->getBuffer(), buf_
->getBufferStart());
140 void SourceFile::ReadFile() {
141 buf_end_
= RemoveCarriageReturns(buf_
->getBuffer());
142 if (content().size() == 0 || content().back() != '\n') {
143 // Don't bother to copy if we have spare memory
144 if (content().size() >= buf_
->getBufferSize()) {
145 auto tmp_buf
{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
146 content().size() + 1)};
147 llvm::copy(content(), tmp_buf
->getBufferStart());
148 buf_
= std::move(tmp_buf
);
151 buf_
->getBuffer()[buf_end_
- 1] = '\n';
157 void SourceFile::Close() {
162 SourcePosition
SourceFile::FindOffsetLineAndColumn(std::size_t at
) const {
165 auto it
= llvm::upper_bound(lineStart_
, at
);
166 auto low
= std::distance(lineStart_
.begin(), it
- 1);
167 return {*this, static_cast<int>(low
+ 1),
168 static_cast<int>(at
- lineStart_
[low
] + 1)};
170 } // namespace Fortran::parser