Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / libcxx / src / filesystem / path_parser.h
blobc6e63e8256adbbf7b0e1ad48975180cac668bcb9
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef PATH_PARSER_H
10 #define PATH_PARSER_H
12 #include <__config>
13 #include <__utility/unreachable.h>
14 #include <cstddef>
15 #include <filesystem>
16 #include <utility>
18 #include "format_string.h"
20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
22 inline bool isSeparator(path::value_type C) {
23 if (C == '/')
24 return true;
25 #if defined(_LIBCPP_WIN32API)
26 if (C == '\\')
27 return true;
28 #endif
29 return false;
32 inline bool isDriveLetter(path::value_type C) {
33 return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z');
36 namespace parser {
38 using string_view_t = path::__string_view;
39 using string_view_pair = pair<string_view_t, string_view_t>;
40 using PosPtr = path::value_type const*;
42 struct PathParser {
43 enum ParserState : unsigned char {
44 // Zero is a special sentinel value used by default constructed iterators.
45 PS_BeforeBegin = path::iterator::_BeforeBegin,
46 PS_InRootName = path::iterator::_InRootName,
47 PS_InRootDir = path::iterator::_InRootDir,
48 PS_InFilenames = path::iterator::_InFilenames,
49 PS_InTrailingSep = path::iterator::_InTrailingSep,
50 PS_AtEnd = path::iterator::_AtEnd
53 const string_view_t Path;
54 string_view_t RawEntry;
55 ParserState State;
57 private:
58 PathParser(string_view_t P, ParserState State) noexcept : Path(P),
59 State(State) {}
61 public:
62 PathParser(string_view_t P, string_view_t E, unsigned char S)
63 : Path(P), RawEntry(E), State(static_cast<ParserState>(S)) {
64 // S cannot be '0' or PS_BeforeBegin.
67 static PathParser CreateBegin(string_view_t P) noexcept {
68 PathParser PP(P, PS_BeforeBegin);
69 PP.increment();
70 return PP;
73 static PathParser CreateEnd(string_view_t P) noexcept {
74 PathParser PP(P, PS_AtEnd);
75 return PP;
78 PosPtr peek() const noexcept {
79 auto TkEnd = getNextTokenStartPos();
80 auto End = getAfterBack();
81 return TkEnd == End ? nullptr : TkEnd;
84 void increment() noexcept {
85 const PosPtr End = getAfterBack();
86 const PosPtr Start = getNextTokenStartPos();
87 if (Start == End)
88 return makeState(PS_AtEnd);
90 switch (State) {
91 case PS_BeforeBegin: {
92 PosPtr TkEnd = consumeRootName(Start, End);
93 if (TkEnd)
94 return makeState(PS_InRootName, Start, TkEnd);
96 _LIBCPP_FALLTHROUGH();
97 case PS_InRootName: {
98 PosPtr TkEnd = consumeAllSeparators(Start, End);
99 if (TkEnd)
100 return makeState(PS_InRootDir, Start, TkEnd);
101 else
102 return makeState(PS_InFilenames, Start, consumeName(Start, End));
104 case PS_InRootDir:
105 return makeState(PS_InFilenames, Start, consumeName(Start, End));
107 case PS_InFilenames: {
108 PosPtr SepEnd = consumeAllSeparators(Start, End);
109 if (SepEnd != End) {
110 PosPtr TkEnd = consumeName(SepEnd, End);
111 if (TkEnd)
112 return makeState(PS_InFilenames, SepEnd, TkEnd);
114 return makeState(PS_InTrailingSep, Start, SepEnd);
117 case PS_InTrailingSep:
118 return makeState(PS_AtEnd);
120 case PS_AtEnd:
121 __libcpp_unreachable();
125 void decrement() noexcept {
126 const PosPtr REnd = getBeforeFront();
127 const PosPtr RStart = getCurrentTokenStartPos() - 1;
128 if (RStart == REnd) // we're decrementing the begin
129 return makeState(PS_BeforeBegin);
131 switch (State) {
132 case PS_AtEnd: {
133 // Try to consume a trailing separator or root directory first.
134 if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
135 if (SepEnd == REnd)
136 return makeState(PS_InRootDir, Path.data(), RStart + 1);
137 PosPtr TkStart = consumeRootName(SepEnd, REnd);
138 if (TkStart == REnd)
139 return makeState(PS_InRootDir, RStart, RStart + 1);
140 return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
141 } else {
142 PosPtr TkStart = consumeRootName(RStart, REnd);
143 if (TkStart == REnd)
144 return makeState(PS_InRootName, TkStart + 1, RStart + 1);
145 TkStart = consumeName(RStart, REnd);
146 return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
149 case PS_InTrailingSep:
150 return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1,
151 RStart + 1);
152 case PS_InFilenames: {
153 PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
154 if (SepEnd == REnd)
155 return makeState(PS_InRootDir, Path.data(), RStart + 1);
156 PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
157 if (TkStart == REnd) {
158 if (SepEnd)
159 return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
160 return makeState(PS_InRootName, TkStart + 1, RStart + 1);
162 TkStart = consumeName(SepEnd, REnd);
163 return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
165 case PS_InRootDir:
166 return makeState(PS_InRootName, Path.data(), RStart + 1);
167 case PS_InRootName:
168 case PS_BeforeBegin:
169 __libcpp_unreachable();
173 /// \brief Return a view with the "preferred representation" of the current
174 /// element. For example trailing separators are represented as a '.'
175 string_view_t operator*() const noexcept {
176 switch (State) {
177 case PS_BeforeBegin:
178 case PS_AtEnd:
179 return PATHSTR("");
180 case PS_InRootDir:
181 if (RawEntry[0] == '\\')
182 return PATHSTR("\\");
183 else
184 return PATHSTR("/");
185 case PS_InTrailingSep:
186 return PATHSTR("");
187 case PS_InRootName:
188 case PS_InFilenames:
189 return RawEntry;
191 __libcpp_unreachable();
194 explicit operator bool() const noexcept {
195 return State != PS_BeforeBegin && State != PS_AtEnd;
198 PathParser& operator++() noexcept {
199 increment();
200 return *this;
203 PathParser& operator--() noexcept {
204 decrement();
205 return *this;
208 bool atEnd() const noexcept {
209 return State == PS_AtEnd;
212 bool inRootDir() const noexcept {
213 return State == PS_InRootDir;
216 bool inRootName() const noexcept {
217 return State == PS_InRootName;
220 bool inRootPath() const noexcept {
221 return inRootName() || inRootDir();
224 private:
225 void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
226 State = NewState;
227 RawEntry = string_view_t(Start, End - Start);
229 void makeState(ParserState NewState) noexcept {
230 State = NewState;
231 RawEntry = {};
234 PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
236 PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
238 /// \brief Return a pointer to the first character after the currently
239 /// lexed element.
240 PosPtr getNextTokenStartPos() const noexcept {
241 switch (State) {
242 case PS_BeforeBegin:
243 return Path.data();
244 case PS_InRootName:
245 case PS_InRootDir:
246 case PS_InFilenames:
247 return &RawEntry.back() + 1;
248 case PS_InTrailingSep:
249 case PS_AtEnd:
250 return getAfterBack();
252 __libcpp_unreachable();
255 /// \brief Return a pointer to the first character in the currently lexed
256 /// element.
257 PosPtr getCurrentTokenStartPos() const noexcept {
258 switch (State) {
259 case PS_BeforeBegin:
260 case PS_InRootName:
261 return &Path.front();
262 case PS_InRootDir:
263 case PS_InFilenames:
264 case PS_InTrailingSep:
265 return &RawEntry.front();
266 case PS_AtEnd:
267 return &Path.back() + 1;
269 __libcpp_unreachable();
272 // Consume all consecutive separators.
273 PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
274 if (P == nullptr || P == End || !isSeparator(*P))
275 return nullptr;
276 const int Inc = P < End ? 1 : -1;
277 P += Inc;
278 while (P != End && isSeparator(*P))
279 P += Inc;
280 return P;
283 // Consume exactly N separators, or return nullptr.
284 PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
285 PosPtr Ret = consumeAllSeparators(P, End);
286 if (Ret == nullptr)
287 return nullptr;
288 if (P < End) {
289 if (Ret == P + N)
290 return Ret;
291 } else {
292 if (Ret == P - N)
293 return Ret;
295 return nullptr;
298 PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
299 PosPtr Start = P;
300 if (P == nullptr || P == End || isSeparator(*P))
301 return nullptr;
302 const int Inc = P < End ? 1 : -1;
303 P += Inc;
304 while (P != End && !isSeparator(*P))
305 P += Inc;
306 if (P == End && Inc < 0) {
307 // Iterating backwards and consumed all the rest of the input.
308 // Check if the start of the string would have been considered
309 // a root name.
310 PosPtr RootEnd = consumeRootName(End + 1, Start);
311 if (RootEnd)
312 return RootEnd - 1;
314 return P;
317 PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
318 if (P == End)
319 return nullptr;
320 if (P < End) {
321 if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
322 return nullptr;
323 return P + 2;
324 } else {
325 if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
326 return nullptr;
327 return P - 2;
331 PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
332 if (P == End)
333 return nullptr;
334 if (P < End)
335 return consumeName(consumeNSeparators(P, End, 2), End);
336 else
337 return consumeNSeparators(consumeName(P, End), End, 2);
340 PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
341 #if defined(_LIBCPP_WIN32API)
342 if (PosPtr Ret = consumeDriveLetter(P, End))
343 return Ret;
344 if (PosPtr Ret = consumeNetworkRoot(P, End))
345 return Ret;
346 #endif
347 return nullptr;
351 inline string_view_pair separate_filename(string_view_t const& s) {
352 if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
353 return string_view_pair{s, PATHSTR("")};
354 auto pos = s.find_last_of('.');
355 if (pos == string_view_t::npos || pos == 0)
356 return string_view_pair{s, string_view_t{}};
357 return string_view_pair{s.substr(0, pos), s.substr(pos)};
360 inline string_view_t createView(PosPtr S, PosPtr E) noexcept {
361 return {S, static_cast<size_t>(E - S) + 1};
364 } // namespace parser
366 _LIBCPP_END_NAMESPACE_FILESYSTEM
368 #endif // PATH_PARSER_H