1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
13 #include <__utility/unreachable.h>
18 #include "format_string.h"
20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
22 inline bool isSeparator(path::value_type C
) {
25 #if defined(_LIBCPP_WIN32API)
32 inline bool isDriveLetter(path::value_type C
) {
33 return (C
>= 'a' && C
<= 'z') || (C
>= 'A' && C
<= 'Z');
38 using string_view_t
= path::__string_view
;
39 using string_view_pair
= pair
<string_view_t
, string_view_t
>;
40 using PosPtr
= path::value_type
const*;
43 enum ParserState
: unsigned char {
44 // Zero is a special sentinel value used by default constructed iterators.
45 PS_BeforeBegin
= path::iterator::_BeforeBegin
,
46 PS_InRootName
= path::iterator::_InRootName
,
47 PS_InRootDir
= path::iterator::_InRootDir
,
48 PS_InFilenames
= path::iterator::_InFilenames
,
49 PS_InTrailingSep
= path::iterator::_InTrailingSep
,
50 PS_AtEnd
= path::iterator::_AtEnd
53 const string_view_t Path
;
54 string_view_t RawEntry
;
58 PathParser(string_view_t P
, ParserState State
) noexcept
: Path(P
),
62 PathParser(string_view_t P
, string_view_t E
, unsigned char S
)
63 : Path(P
), RawEntry(E
), State(static_cast<ParserState
>(S
)) {
64 // S cannot be '0' or PS_BeforeBegin.
67 static PathParser
CreateBegin(string_view_t P
) noexcept
{
68 PathParser
PP(P
, PS_BeforeBegin
);
73 static PathParser
CreateEnd(string_view_t P
) noexcept
{
74 PathParser
PP(P
, PS_AtEnd
);
78 PosPtr
peek() const noexcept
{
79 auto TkEnd
= getNextTokenStartPos();
80 auto End
= getAfterBack();
81 return TkEnd
== End
? nullptr : TkEnd
;
84 void increment() noexcept
{
85 const PosPtr End
= getAfterBack();
86 const PosPtr Start
= getNextTokenStartPos();
88 return makeState(PS_AtEnd
);
91 case PS_BeforeBegin
: {
92 PosPtr TkEnd
= consumeRootName(Start
, End
);
94 return makeState(PS_InRootName
, Start
, TkEnd
);
96 _LIBCPP_FALLTHROUGH();
98 PosPtr TkEnd
= consumeAllSeparators(Start
, End
);
100 return makeState(PS_InRootDir
, Start
, TkEnd
);
102 return makeState(PS_InFilenames
, Start
, consumeName(Start
, End
));
105 return makeState(PS_InFilenames
, Start
, consumeName(Start
, End
));
107 case PS_InFilenames
: {
108 PosPtr SepEnd
= consumeAllSeparators(Start
, End
);
110 PosPtr TkEnd
= consumeName(SepEnd
, End
);
112 return makeState(PS_InFilenames
, SepEnd
, TkEnd
);
114 return makeState(PS_InTrailingSep
, Start
, SepEnd
);
117 case PS_InTrailingSep
:
118 return makeState(PS_AtEnd
);
121 __libcpp_unreachable();
125 void decrement() noexcept
{
126 const PosPtr REnd
= getBeforeFront();
127 const PosPtr RStart
= getCurrentTokenStartPos() - 1;
128 if (RStart
== REnd
) // we're decrementing the begin
129 return makeState(PS_BeforeBegin
);
133 // Try to consume a trailing separator or root directory first.
134 if (PosPtr SepEnd
= consumeAllSeparators(RStart
, REnd
)) {
136 return makeState(PS_InRootDir
, Path
.data(), RStart
+ 1);
137 PosPtr TkStart
= consumeRootName(SepEnd
, REnd
);
139 return makeState(PS_InRootDir
, RStart
, RStart
+ 1);
140 return makeState(PS_InTrailingSep
, SepEnd
+ 1, RStart
+ 1);
142 PosPtr TkStart
= consumeRootName(RStart
, REnd
);
144 return makeState(PS_InRootName
, TkStart
+ 1, RStart
+ 1);
145 TkStart
= consumeName(RStart
, REnd
);
146 return makeState(PS_InFilenames
, TkStart
+ 1, RStart
+ 1);
149 case PS_InTrailingSep
:
150 return makeState(PS_InFilenames
, consumeName(RStart
, REnd
) + 1,
152 case PS_InFilenames
: {
153 PosPtr SepEnd
= consumeAllSeparators(RStart
, REnd
);
155 return makeState(PS_InRootDir
, Path
.data(), RStart
+ 1);
156 PosPtr TkStart
= consumeRootName(SepEnd
? SepEnd
: RStart
, REnd
);
157 if (TkStart
== REnd
) {
159 return makeState(PS_InRootDir
, SepEnd
+ 1, RStart
+ 1);
160 return makeState(PS_InRootName
, TkStart
+ 1, RStart
+ 1);
162 TkStart
= consumeName(SepEnd
, REnd
);
163 return makeState(PS_InFilenames
, TkStart
+ 1, SepEnd
+ 1);
166 return makeState(PS_InRootName
, Path
.data(), RStart
+ 1);
169 __libcpp_unreachable();
173 /// \brief Return a view with the "preferred representation" of the current
174 /// element. For example trailing separators are represented as a '.'
175 string_view_t
operator*() const noexcept
{
181 if (RawEntry
[0] == '\\')
182 return PATHSTR("\\");
185 case PS_InTrailingSep
:
191 __libcpp_unreachable();
194 explicit operator bool() const noexcept
{
195 return State
!= PS_BeforeBegin
&& State
!= PS_AtEnd
;
198 PathParser
& operator++() noexcept
{
203 PathParser
& operator--() noexcept
{
208 bool atEnd() const noexcept
{
209 return State
== PS_AtEnd
;
212 bool inRootDir() const noexcept
{
213 return State
== PS_InRootDir
;
216 bool inRootName() const noexcept
{
217 return State
== PS_InRootName
;
220 bool inRootPath() const noexcept
{
221 return inRootName() || inRootDir();
225 void makeState(ParserState NewState
, PosPtr Start
, PosPtr End
) noexcept
{
227 RawEntry
= string_view_t(Start
, End
- Start
);
229 void makeState(ParserState NewState
) noexcept
{
234 PosPtr
getAfterBack() const noexcept
{ return Path
.data() + Path
.size(); }
236 PosPtr
getBeforeFront() const noexcept
{ return Path
.data() - 1; }
238 /// \brief Return a pointer to the first character after the currently
240 PosPtr
getNextTokenStartPos() const noexcept
{
247 return &RawEntry
.back() + 1;
248 case PS_InTrailingSep
:
250 return getAfterBack();
252 __libcpp_unreachable();
255 /// \brief Return a pointer to the first character in the currently lexed
257 PosPtr
getCurrentTokenStartPos() const noexcept
{
261 return &Path
.front();
264 case PS_InTrailingSep
:
265 return &RawEntry
.front();
267 return &Path
.back() + 1;
269 __libcpp_unreachable();
272 // Consume all consecutive separators.
273 PosPtr
consumeAllSeparators(PosPtr P
, PosPtr End
) const noexcept
{
274 if (P
== nullptr || P
== End
|| !isSeparator(*P
))
276 const int Inc
= P
< End
? 1 : -1;
278 while (P
!= End
&& isSeparator(*P
))
283 // Consume exactly N separators, or return nullptr.
284 PosPtr
consumeNSeparators(PosPtr P
, PosPtr End
, int N
) const noexcept
{
285 PosPtr Ret
= consumeAllSeparators(P
, End
);
298 PosPtr
consumeName(PosPtr P
, PosPtr End
) const noexcept
{
300 if (P
== nullptr || P
== End
|| isSeparator(*P
))
302 const int Inc
= P
< End
? 1 : -1;
304 while (P
!= End
&& !isSeparator(*P
))
306 if (P
== End
&& Inc
< 0) {
307 // Iterating backwards and consumed all the rest of the input.
308 // Check if the start of the string would have been considered
310 PosPtr RootEnd
= consumeRootName(End
+ 1, Start
);
317 PosPtr
consumeDriveLetter(PosPtr P
, PosPtr End
) const noexcept
{
321 if (P
+ 1 == End
|| !isDriveLetter(P
[0]) || P
[1] != ':')
325 if (P
- 1 == End
|| !isDriveLetter(P
[-1]) || P
[0] != ':')
331 PosPtr
consumeNetworkRoot(PosPtr P
, PosPtr End
) const noexcept
{
335 return consumeName(consumeNSeparators(P
, End
, 2), End
);
337 return consumeNSeparators(consumeName(P
, End
), End
, 2);
340 PosPtr
consumeRootName(PosPtr P
, PosPtr End
) const noexcept
{
341 #if defined(_LIBCPP_WIN32API)
342 if (PosPtr Ret
= consumeDriveLetter(P
, End
))
344 if (PosPtr Ret
= consumeNetworkRoot(P
, End
))
351 inline string_view_pair
separate_filename(string_view_t
const& s
) {
352 if (s
== PATHSTR(".") || s
== PATHSTR("..") || s
.empty())
353 return string_view_pair
{s
, PATHSTR("")};
354 auto pos
= s
.find_last_of('.');
355 if (pos
== string_view_t::npos
|| pos
== 0)
356 return string_view_pair
{s
, string_view_t
{}};
357 return string_view_pair
{s
.substr(0, pos
), s
.substr(pos
)};
360 inline string_view_t
createView(PosPtr S
, PosPtr E
) noexcept
{
361 return {S
, static_cast<size_t>(E
- S
) + 1};
364 } // namespace parser
366 _LIBCPP_END_NAMESPACE_FILESYSTEM
368 #endif // PATH_PARSER_H