1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
13 #include <__utility/unreachable.h>
18 #include "format_string.h"
20 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
22 inline bool isSeparator(path::value_type C
) {
25 #if defined(_LIBCPP_WIN32API)
32 inline bool isDriveLetter(path::value_type C
) { return (C
>= 'a' && C
<= 'z') || (C
>= 'A' && C
<= 'Z'); }
36 using string_view_t
= path::__string_view
;
37 using string_view_pair
= pair
<string_view_t
, string_view_t
>;
38 using PosPtr
= path::value_type
const*;
41 enum ParserState
: unsigned char {
42 // Zero is a special sentinel value used by default constructed iterators.
43 PS_BeforeBegin
= path::iterator::_BeforeBegin
,
44 PS_InRootName
= path::iterator::_InRootName
,
45 PS_InRootDir
= path::iterator::_InRootDir
,
46 PS_InFilenames
= path::iterator::_InFilenames
,
47 PS_InTrailingSep
= path::iterator::_InTrailingSep
,
48 PS_AtEnd
= path::iterator::_AtEnd
51 const string_view_t Path
;
52 string_view_t RawEntry
;
56 PathParser(string_view_t P
, ParserState State
) noexcept
: Path(P
), State_(State
) {}
59 PathParser(string_view_t P
, string_view_t E
, unsigned char S
)
60 : Path(P
), RawEntry(E
), State_(static_cast<ParserState
>(S
)) {
61 // S cannot be '0' or PS_BeforeBegin.
64 static PathParser
CreateBegin(string_view_t P
) noexcept
{
65 PathParser
PP(P
, PS_BeforeBegin
);
70 static PathParser
CreateEnd(string_view_t P
) noexcept
{
71 PathParser
PP(P
, PS_AtEnd
);
75 PosPtr
peek() const noexcept
{
76 auto TkEnd
= getNextTokenStartPos();
77 auto End
= getAfterBack();
78 return TkEnd
== End
? nullptr : TkEnd
;
81 void increment() noexcept
{
82 const PosPtr End
= getAfterBack();
83 const PosPtr Start
= getNextTokenStartPos();
85 return makeState(PS_AtEnd
);
88 case PS_BeforeBegin
: {
89 PosPtr TkEnd
= consumeRootName(Start
, End
);
91 return makeState(PS_InRootName
, Start
, TkEnd
);
93 _LIBCPP_FALLTHROUGH();
95 PosPtr TkEnd
= consumeAllSeparators(Start
, End
);
97 return makeState(PS_InRootDir
, Start
, TkEnd
);
99 return makeState(PS_InFilenames
, Start
, consumeName(Start
, End
));
102 return makeState(PS_InFilenames
, Start
, consumeName(Start
, End
));
104 case PS_InFilenames
: {
105 PosPtr SepEnd
= consumeAllSeparators(Start
, End
);
107 PosPtr TkEnd
= consumeName(SepEnd
, End
);
109 return makeState(PS_InFilenames
, SepEnd
, TkEnd
);
111 return makeState(PS_InTrailingSep
, Start
, SepEnd
);
114 case PS_InTrailingSep
:
115 return makeState(PS_AtEnd
);
118 __libcpp_unreachable();
122 void decrement() noexcept
{
123 const PosPtr REnd
= getBeforeFront();
124 const PosPtr RStart
= getCurrentTokenStartPos() - 1;
125 if (RStart
== REnd
) // we're decrementing the begin
126 return makeState(PS_BeforeBegin
);
130 // Try to consume a trailing separator or root directory first.
131 if (PosPtr SepEnd
= consumeAllSeparators(RStart
, REnd
)) {
133 return makeState(PS_InRootDir
, Path
.data(), RStart
+ 1);
134 PosPtr TkStart
= consumeRootName(SepEnd
, REnd
);
136 return makeState(PS_InRootDir
, RStart
, RStart
+ 1);
137 return makeState(PS_InTrailingSep
, SepEnd
+ 1, RStart
+ 1);
139 PosPtr TkStart
= consumeRootName(RStart
, REnd
);
141 return makeState(PS_InRootName
, TkStart
+ 1, RStart
+ 1);
142 TkStart
= consumeName(RStart
, REnd
);
143 return makeState(PS_InFilenames
, TkStart
+ 1, RStart
+ 1);
146 case PS_InTrailingSep
:
147 return makeState(PS_InFilenames
, consumeName(RStart
, REnd
) + 1, RStart
+ 1);
148 case PS_InFilenames
: {
149 PosPtr SepEnd
= consumeAllSeparators(RStart
, REnd
);
151 return makeState(PS_InRootDir
, Path
.data(), RStart
+ 1);
152 PosPtr TkStart
= consumeRootName(SepEnd
? SepEnd
: RStart
, REnd
);
153 if (TkStart
== REnd
) {
155 return makeState(PS_InRootDir
, SepEnd
+ 1, RStart
+ 1);
156 return makeState(PS_InRootName
, TkStart
+ 1, RStart
+ 1);
158 TkStart
= consumeName(SepEnd
, REnd
);
159 return makeState(PS_InFilenames
, TkStart
+ 1, SepEnd
+ 1);
162 return makeState(PS_InRootName
, Path
.data(), RStart
+ 1);
165 __libcpp_unreachable();
169 /// \brief Return a view with the "preferred representation" of the current
170 /// element. For example trailing separators are represented as a '.'
171 string_view_t
operator*() const noexcept
{
177 if (RawEntry
[0] == '\\')
178 return PATHSTR("\\");
181 case PS_InTrailingSep
:
187 __libcpp_unreachable();
190 explicit operator bool() const noexcept
{ return State_
!= PS_BeforeBegin
&& State_
!= PS_AtEnd
; }
192 PathParser
& operator++() noexcept
{
197 PathParser
& operator--() noexcept
{
202 bool atEnd() const noexcept
{ return State_
== PS_AtEnd
; }
204 bool inRootDir() const noexcept
{ return State_
== PS_InRootDir
; }
206 bool inRootName() const noexcept
{ return State_
== PS_InRootName
; }
208 bool inRootPath() const noexcept
{ return inRootName() || inRootDir(); }
211 void makeState(ParserState NewState
, PosPtr Start
, PosPtr End
) noexcept
{
213 RawEntry
= string_view_t(Start
, End
- Start
);
215 void makeState(ParserState NewState
) noexcept
{
220 PosPtr
getAfterBack() const noexcept
{ return Path
.data() + Path
.size(); }
222 PosPtr
getBeforeFront() const noexcept
{ return Path
.data() - 1; }
224 /// \brief Return a pointer to the first character after the currently
226 PosPtr
getNextTokenStartPos() const noexcept
{
233 return &RawEntry
.back() + 1;
234 case PS_InTrailingSep
:
236 return getAfterBack();
238 __libcpp_unreachable();
241 /// \brief Return a pointer to the first character in the currently lexed
243 PosPtr
getCurrentTokenStartPos() const noexcept
{
247 return &Path
.front();
250 case PS_InTrailingSep
:
251 return &RawEntry
.front();
253 return &Path
.back() + 1;
255 __libcpp_unreachable();
258 // Consume all consecutive separators.
259 PosPtr
consumeAllSeparators(PosPtr P
, PosPtr End
) const noexcept
{
260 if (P
== nullptr || P
== End
|| !isSeparator(*P
))
262 const int Inc
= P
< End
? 1 : -1;
264 while (P
!= End
&& isSeparator(*P
))
269 // Consume exactly N separators, or return nullptr.
270 PosPtr
consumeNSeparators(PosPtr P
, PosPtr End
, int N
) const noexcept
{
271 PosPtr Ret
= consumeAllSeparators(P
, End
);
284 PosPtr
consumeName(PosPtr P
, PosPtr End
) const noexcept
{
286 if (P
== nullptr || P
== End
|| isSeparator(*P
))
288 const int Inc
= P
< End
? 1 : -1;
290 while (P
!= End
&& !isSeparator(*P
))
292 if (P
== End
&& Inc
< 0) {
293 // Iterating backwards and consumed all the rest of the input.
294 // Check if the start of the string would have been considered
296 PosPtr RootEnd
= consumeRootName(End
+ 1, Start
);
303 PosPtr
consumeDriveLetter(PosPtr P
, PosPtr End
) const noexcept
{
307 if (P
+ 1 == End
|| !isDriveLetter(P
[0]) || P
[1] != ':')
311 if (P
- 1 == End
|| !isDriveLetter(P
[-1]) || P
[0] != ':')
317 PosPtr
consumeNetworkRoot(PosPtr P
, PosPtr End
) const noexcept
{
321 return consumeName(consumeNSeparators(P
, End
, 2), End
);
323 return consumeNSeparators(consumeName(P
, End
), End
, 2);
326 PosPtr
consumeRootName(PosPtr P
, PosPtr End
) const noexcept
{
327 #if defined(_LIBCPP_WIN32API)
328 if (PosPtr Ret
= consumeDriveLetter(P
, End
))
330 if (PosPtr Ret
= consumeNetworkRoot(P
, End
))
337 inline string_view_pair
separate_filename(string_view_t
const& s
) {
338 if (s
== PATHSTR(".") || s
== PATHSTR("..") || s
.empty())
339 return string_view_pair
{s
, PATHSTR("")};
340 auto pos
= s
.find_last_of('.');
341 if (pos
== string_view_t::npos
|| pos
== 0)
342 return string_view_pair
{s
, string_view_t
{}};
343 return string_view_pair
{s
.substr(0, pos
), s
.substr(pos
)};
346 inline string_view_t
createView(PosPtr S
, PosPtr E
) noexcept
{ return {S
, static_cast<size_t>(E
- S
) + 1}; }
348 } // namespace parser
350 _LIBCPP_END_NAMESPACE_FILESYSTEM
352 #endif // PATH_PARSER_H