1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "support/Logger.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Path.h"
17 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry
)
23 bool isWindowsPath(llvm::StringRef Path
) {
24 return Path
.size() > 1 && llvm::isAlpha(Path
[0]) && Path
[1] == ':';
27 bool isNetworkPath(llvm::StringRef Path
) {
28 return Path
.size() > 2 && Path
[0] == Path
[1] &&
29 llvm::sys::path::is_separator(Path
[0]);
32 /// This manages file paths in the file system. All paths in the scheme
33 /// are absolute (with leading '/').
34 /// Note that this scheme is hardcoded into the library and not registered in
36 class FileSystemScheme
: public URIScheme
{
38 llvm::Expected
<std::string
>
39 getAbsolutePath(llvm::StringRef Authority
, llvm::StringRef Body
,
40 llvm::StringRef
/*HintPath*/) const override
{
41 if (!Body
.starts_with("/"))
42 return error("File scheme: expect body to be an absolute path starting "
45 llvm::SmallString
<128> Path
;
46 if (!Authority
.empty()) {
47 // Windows UNC paths e.g. file://server/share => \\server\share
48 ("//" + Authority
).toVector(Path
);
49 } else if (isWindowsPath(Body
.substr(1))) {
50 // Windows paths e.g. file:///X:/path => X:\path
51 Body
.consume_front("/");
54 llvm::sys::path::native(Path
);
55 return std::string(Path
);
59 uriFromAbsolutePath(llvm::StringRef AbsolutePath
) const override
{
61 llvm::StringRef Authority
;
62 llvm::StringRef Root
= llvm::sys::path::root_name(AbsolutePath
);
63 if (isNetworkPath(Root
)) {
64 // Windows UNC paths e.g. \\server\share => file://server/share
65 Authority
= Root
.drop_front(2);
66 AbsolutePath
.consume_front(Root
);
67 } else if (isWindowsPath(Root
)) {
68 // Windows paths e.g. X:\path => file:///X:/path
71 Body
+= llvm::sys::path::convert_to_slash(AbsolutePath
);
72 return URI("file", Authority
, Body
);
76 llvm::Expected
<std::unique_ptr
<URIScheme
>>
77 findSchemeByName(llvm::StringRef Scheme
) {
79 return std::make_unique
<FileSystemScheme
>();
81 for (const auto &URIScheme
: URISchemeRegistry::entries()) {
82 if (URIScheme
.getName() != Scheme
)
84 return URIScheme
.instantiate();
86 return error("Can't find scheme: {0}", Scheme
);
89 bool shouldEscape(unsigned char C
) {
90 // Unreserved characters.
91 if ((C
>= 'a' && C
<= 'z') || (C
>= 'A' && C
<= 'Z') ||
92 (C
>= '0' && C
<= '9'))
99 case '/': // '/' is only reserved when parsing.
100 // ':' is only reserved for relative URI paths, which clangd doesn't produce.
107 /// Encodes a string according to percent-encoding.
108 /// - Unreserved characters are not escaped.
109 /// - Reserved characters always escaped with exceptions like '/'.
110 /// - All other characters are escaped.
111 void percentEncode(llvm::StringRef Content
, std::string
&Out
) {
112 for (unsigned char C
: Content
)
113 if (shouldEscape(C
)) {
115 Out
.push_back(llvm::hexdigit(C
/ 16));
116 Out
.push_back(llvm::hexdigit(C
% 16));
122 /// Decodes a string according to percent-encoding.
123 std::string
percentDecode(llvm::StringRef Content
) {
125 for (auto I
= Content
.begin(), E
= Content
.end(); I
!= E
; ++I
) {
130 if (*I
== '%' && I
+ 2 < Content
.end() && llvm::isHexDigit(*(I
+ 1)) &&
131 llvm::isHexDigit(*(I
+ 2))) {
132 Result
.push_back(llvm::hexFromNibbles(*(I
+ 1), *(I
+ 2)));
135 Result
.push_back(*I
);
140 bool isValidScheme(llvm::StringRef Scheme
) {
143 if (!llvm::isAlpha(Scheme
[0]))
145 return llvm::all_of(llvm::drop_begin(Scheme
), [](char C
) {
146 return llvm::isAlnum(C
) || C
== '+' || C
== '.' || C
== '-';
152 URI::URI(llvm::StringRef Scheme
, llvm::StringRef Authority
,
153 llvm::StringRef Body
)
154 : Scheme(Scheme
), Authority(Authority
), Body(Body
) {
155 assert(!Scheme
.empty());
156 assert((Authority
.empty() || Body
.starts_with("/")) &&
157 "URI body must start with '/' when authority is present.");
160 std::string
URI::toString() const {
162 percentEncode(Scheme
, Result
);
163 Result
.push_back(':');
164 if (Authority
.empty() && Body
.empty())
166 // If authority if empty, we only print body if it starts with "/"; otherwise,
167 // the URI is invalid.
168 if (!Authority
.empty() || llvm::StringRef(Body
).starts_with("/")) {
170 percentEncode(Authority
, Result
);
172 percentEncode(Body
, Result
);
176 llvm::Expected
<URI
> URI::parse(llvm::StringRef OrigUri
) {
178 llvm::StringRef Uri
= OrigUri
;
180 auto Pos
= Uri
.find(':');
181 if (Pos
== llvm::StringRef::npos
)
182 return error("Scheme must be provided in URI: {0}", OrigUri
);
183 auto SchemeStr
= Uri
.substr(0, Pos
);
184 U
.Scheme
= percentDecode(SchemeStr
);
185 if (!isValidScheme(U
.Scheme
))
186 return error("Invalid scheme: {0} (decoded: {1})", SchemeStr
, U
.Scheme
);
187 Uri
= Uri
.substr(Pos
+ 1);
188 if (Uri
.consume_front("//")) {
190 U
.Authority
= percentDecode(Uri
.substr(0, Pos
));
191 Uri
= Uri
.substr(Pos
);
193 U
.Body
= percentDecode(Uri
);
197 llvm::Expected
<std::string
> URI::resolve(llvm::StringRef FileURI
,
198 llvm::StringRef HintPath
) {
199 auto Uri
= URI::parse(FileURI
);
201 return Uri
.takeError();
202 auto Path
= URI::resolve(*Uri
, HintPath
);
204 return Path
.takeError();
208 llvm::Expected
<URI
> URI::create(llvm::StringRef AbsolutePath
,
209 llvm::StringRef Scheme
) {
210 if (!llvm::sys::path::is_absolute(AbsolutePath
))
211 return error("Not a valid absolute path: {0}", AbsolutePath
);
212 auto S
= findSchemeByName(Scheme
);
214 return S
.takeError();
215 return S
->get()->uriFromAbsolutePath(AbsolutePath
);
218 URI
URI::create(llvm::StringRef AbsolutePath
) {
219 if (!llvm::sys::path::is_absolute(AbsolutePath
))
221 ("Not a valid absolute path: " + AbsolutePath
).str().c_str());
222 for (auto &Entry
: URISchemeRegistry::entries()) {
223 auto URI
= Entry
.instantiate()->uriFromAbsolutePath(AbsolutePath
);
224 // For some paths, conversion to different URI schemes is impossible. These
225 // should be just skipped.
228 llvm::consumeError(URI
.takeError());
231 return std::move(*URI
);
233 // Fallback to file: scheme which should work for any paths.
234 return URI::createFile(AbsolutePath
);
237 URI
URI::createFile(llvm::StringRef AbsolutePath
) {
238 auto U
= FileSystemScheme().uriFromAbsolutePath(AbsolutePath
);
240 llvm_unreachable(llvm::toString(U
.takeError()).c_str());
241 return std::move(*U
);
244 llvm::Expected
<std::string
> URI::resolve(const URI
&Uri
,
245 llvm::StringRef HintPath
) {
246 auto S
= findSchemeByName(Uri
.Scheme
);
248 return S
.takeError();
249 return S
->get()->getAbsolutePath(Uri
.Authority
, Uri
.Body
, HintPath
);
252 llvm::Expected
<std::string
> URI::resolvePath(llvm::StringRef AbsPath
,
253 llvm::StringRef HintPath
) {
254 if (!llvm::sys::path::is_absolute(AbsPath
))
255 llvm_unreachable(("Not a valid absolute path: " + AbsPath
).str().c_str());
256 for (auto &Entry
: URISchemeRegistry::entries()) {
257 auto S
= Entry
.instantiate();
258 auto U
= S
->uriFromAbsolutePath(AbsPath
);
259 // For some paths, conversion to different URI schemes is impossible. These
260 // should be just skipped.
263 llvm::consumeError(U
.takeError());
266 return S
->getAbsolutePath(U
->Authority
, U
->Body
, HintPath
);
268 // Fallback to file: scheme which doesn't do any canonicalization.
269 return std::string(AbsPath
);
272 llvm::Expected
<std::string
> URI::includeSpelling(const URI
&Uri
) {
273 auto S
= findSchemeByName(Uri
.Scheme
);
275 return S
.takeError();
276 return S
->get()->getIncludeSpelling(Uri
);
279 } // namespace clangd