1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "support/Logger.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Path.h"
17 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry
)
23 bool isWindowsPath(llvm::StringRef Path
) {
24 return Path
.size() > 1 && llvm::isAlpha(Path
[0]) && Path
[1] == ':';
27 bool isNetworkPath(llvm::StringRef Path
) {
28 return Path
.size() > 2 && Path
[0] == Path
[1] &&
29 llvm::sys::path::is_separator(Path
[0]);
32 /// This manages file paths in the file system. All paths in the scheme
33 /// are absolute (with leading '/').
34 /// Note that this scheme is hardcoded into the library and not registered in
36 class FileSystemScheme
: public URIScheme
{
38 llvm::Expected
<std::string
>
39 getAbsolutePath(llvm::StringRef Authority
, llvm::StringRef Body
,
40 llvm::StringRef
/*HintPath*/) const override
{
41 if (!Body
.startswith("/"))
42 return error("File scheme: expect body to be an absolute path starting "
45 llvm::SmallString
<128> Path
;
46 if (!Authority
.empty()) {
47 // Windows UNC paths e.g. file://server/share => \\server\share
48 ("//" + Authority
).toVector(Path
);
49 } else if (isWindowsPath(Body
.substr(1))) {
50 // Windows paths e.g. file:///X:/path => X:\path
51 Body
.consume_front("/");
54 llvm::sys::path::native(Path
);
55 return std::string(Path
);
59 uriFromAbsolutePath(llvm::StringRef AbsolutePath
) const override
{
61 llvm::StringRef Authority
;
62 llvm::StringRef Root
= llvm::sys::path::root_name(AbsolutePath
);
63 if (isNetworkPath(Root
)) {
64 // Windows UNC paths e.g. \\server\share => file://server/share
65 Authority
= Root
.drop_front(2);
66 AbsolutePath
.consume_front(Root
);
67 } else if (isWindowsPath(Root
)) {
68 // Windows paths e.g. X:\path => file:///X:/path
71 Body
+= llvm::sys::path::convert_to_slash(AbsolutePath
);
72 return URI("file", Authority
, Body
);
76 llvm::Expected
<std::unique_ptr
<URIScheme
>>
77 findSchemeByName(llvm::StringRef Scheme
) {
79 return std::make_unique
<FileSystemScheme
>();
81 for (const auto &URIScheme
: URISchemeRegistry::entries()) {
82 if (URIScheme
.getName() != Scheme
)
84 return URIScheme
.instantiate();
86 return error("Can't find scheme: {0}", Scheme
);
89 bool shouldEscape(unsigned char C
) {
90 // Unreserved characters.
91 if ((C
>= 'a' && C
<= 'z') || (C
>= 'A' && C
<= 'Z') ||
92 (C
>= '0' && C
<= '9'))
99 case '/': // '/' is only reserved when parsing.
100 // ':' is only reserved for relative URI paths, which clangd doesn't produce.
107 /// Encodes a string according to percent-encoding.
108 /// - Unreserved characters are not escaped.
109 /// - Reserved characters always escaped with exceptions like '/'.
110 /// - All other characters are escaped.
111 void percentEncode(llvm::StringRef Content
, std::string
&Out
) {
112 for (unsigned char C
: Content
)
113 if (shouldEscape(C
)) {
115 Out
.push_back(llvm::hexdigit(C
/ 16));
116 Out
.push_back(llvm::hexdigit(C
% 16));
122 /// Decodes a string according to percent-encoding.
123 std::string
percentDecode(llvm::StringRef Content
) {
125 for (auto I
= Content
.begin(), E
= Content
.end(); I
!= E
; ++I
) {
130 if (*I
== '%' && I
+ 2 < Content
.end() && llvm::isHexDigit(*(I
+ 1)) &&
131 llvm::isHexDigit(*(I
+ 2))) {
132 Result
.push_back(llvm::hexFromNibbles(*(I
+ 1), *(I
+ 2)));
135 Result
.push_back(*I
);
140 bool isValidScheme(llvm::StringRef Scheme
) {
143 if (!llvm::isAlpha(Scheme
[0]))
145 return llvm::all_of(llvm::drop_begin(Scheme
), [](char C
) {
146 return llvm::isAlnum(C
) || C
== '+' || C
== '.' || C
== '-';
152 URI::URI(llvm::StringRef Scheme
, llvm::StringRef Authority
,
153 llvm::StringRef Body
)
154 : Scheme(Scheme
), Authority(Authority
), Body(Body
) {
155 assert(!Scheme
.empty());
156 assert((Authority
.empty() || Body
.startswith("/")) &&
157 "URI body must start with '/' when authority is present.");
160 std::string
URI::toString() const {
162 percentEncode(Scheme
, Result
);
163 Result
.push_back(':');
164 if (Authority
.empty() && Body
.empty())
166 // If authority if empty, we only print body if it starts with "/"; otherwise,
167 // the URI is invalid.
168 if (!Authority
.empty() || llvm::StringRef(Body
).startswith("/"))
171 percentEncode(Authority
, Result
);
173 percentEncode(Body
, Result
);
177 llvm::Expected
<URI
> URI::parse(llvm::StringRef OrigUri
) {
179 llvm::StringRef Uri
= OrigUri
;
181 auto Pos
= Uri
.find(':');
182 if (Pos
== llvm::StringRef::npos
)
183 return error("Scheme must be provided in URI: {0}", OrigUri
);
184 auto SchemeStr
= Uri
.substr(0, Pos
);
185 U
.Scheme
= percentDecode(SchemeStr
);
186 if (!isValidScheme(U
.Scheme
))
187 return error("Invalid scheme: {0} (decoded: {1})", SchemeStr
, U
.Scheme
);
188 Uri
= Uri
.substr(Pos
+ 1);
189 if (Uri
.consume_front("//")) {
191 U
.Authority
= percentDecode(Uri
.substr(0, Pos
));
192 Uri
= Uri
.substr(Pos
);
194 U
.Body
= percentDecode(Uri
);
198 llvm::Expected
<std::string
> URI::resolve(llvm::StringRef FileURI
,
199 llvm::StringRef HintPath
) {
200 auto Uri
= URI::parse(FileURI
);
202 return Uri
.takeError();
203 auto Path
= URI::resolve(*Uri
, HintPath
);
205 return Path
.takeError();
209 llvm::Expected
<URI
> URI::create(llvm::StringRef AbsolutePath
,
210 llvm::StringRef Scheme
) {
211 if (!llvm::sys::path::is_absolute(AbsolutePath
))
212 return error("Not a valid absolute path: {0}", AbsolutePath
);
213 auto S
= findSchemeByName(Scheme
);
215 return S
.takeError();
216 return S
->get()->uriFromAbsolutePath(AbsolutePath
);
219 URI
URI::create(llvm::StringRef AbsolutePath
) {
220 if (!llvm::sys::path::is_absolute(AbsolutePath
))
222 ("Not a valid absolute path: " + AbsolutePath
).str().c_str());
223 for (auto &Entry
: URISchemeRegistry::entries()) {
224 auto URI
= Entry
.instantiate()->uriFromAbsolutePath(AbsolutePath
);
225 // For some paths, conversion to different URI schemes is impossible. These
226 // should be just skipped.
229 llvm::consumeError(URI
.takeError());
232 return std::move(*URI
);
234 // Fallback to file: scheme which should work for any paths.
235 return URI::createFile(AbsolutePath
);
238 URI
URI::createFile(llvm::StringRef AbsolutePath
) {
239 auto U
= FileSystemScheme().uriFromAbsolutePath(AbsolutePath
);
241 llvm_unreachable(llvm::toString(U
.takeError()).c_str());
242 return std::move(*U
);
245 llvm::Expected
<std::string
> URI::resolve(const URI
&Uri
,
246 llvm::StringRef HintPath
) {
247 auto S
= findSchemeByName(Uri
.Scheme
);
249 return S
.takeError();
250 return S
->get()->getAbsolutePath(Uri
.Authority
, Uri
.Body
, HintPath
);
253 llvm::Expected
<std::string
> URI::resolvePath(llvm::StringRef AbsPath
,
254 llvm::StringRef HintPath
) {
255 if (!llvm::sys::path::is_absolute(AbsPath
))
256 llvm_unreachable(("Not a valid absolute path: " + AbsPath
).str().c_str());
257 for (auto &Entry
: URISchemeRegistry::entries()) {
258 auto S
= Entry
.instantiate();
259 auto U
= S
->uriFromAbsolutePath(AbsPath
);
260 // For some paths, conversion to different URI schemes is impossible. These
261 // should be just skipped.
264 llvm::consumeError(U
.takeError());
267 return S
->getAbsolutePath(U
->Authority
, U
->Body
, HintPath
);
269 // Fallback to file: scheme which doesn't do any canonicalization.
270 return std::string(AbsPath
);
273 llvm::Expected
<std::string
> URI::includeSpelling(const URI
&Uri
) {
274 auto S
= findSchemeByName(Uri
.Scheme
);
276 return S
.takeError();
277 return S
->get()->getIncludeSpelling(Uri
);
280 } // namespace clangd