1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Functions for canonicalizing "file:" URLs.
7 #include "url/url_canon.h"
8 #include "url/url_canon_internal.h"
9 #include "url/url_file.h"
10 #include "url/url_parse_internal.h"
18 // Given a pointer into the spec, this copies and canonicalizes the drive
19 // letter and colon to the output, if one is found. If there is not a drive
20 // spec, it won't do anything. The index of the next character in the input
21 // spec is returned (after the colon when a drive spec is found, the begin
22 // offset if one is not).
23 template<typename CHAR
>
24 int FileDoDriveSpec(const CHAR
* spec
, int begin
, int end
,
25 CanonOutput
* output
) {
26 // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
27 // (with backslashes instead of slashes as well).
28 int num_slashes
= url_parse::CountConsecutiveSlashes(spec
, begin
, end
);
29 int after_slashes
= begin
+ num_slashes
;
31 if (!url_parse::DoesBeginWindowsDriveSpec(spec
, after_slashes
, end
))
32 return begin
; // Haven't consumed any characters
34 // A drive spec is the start of a path, so we need to add a slash for the
35 // authority terminator (typically the third slash).
36 output
->push_back('/');
38 // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
39 // and that it is followed by a colon/pipe.
41 // Normalize Windows drive letters to uppercase
42 if (spec
[after_slashes
] >= 'a' && spec
[after_slashes
] <= 'z')
43 output
->push_back(spec
[after_slashes
] - 'a' + 'A');
45 output
->push_back(static_cast<char>(spec
[after_slashes
]));
47 // Normalize the character following it to a colon rather than pipe.
48 output
->push_back(':');
49 return after_slashes
+ 2;
54 template<typename CHAR
, typename UCHAR
>
55 bool DoFileCanonicalizePath(const CHAR
* spec
,
56 const url_parse::Component
& path
,
58 url_parse::Component
* out_path
) {
59 // Copies and normalizes the "c:" at the beginning, if present.
60 out_path
->begin
= output
->length();
63 after_drive
= FileDoDriveSpec(spec
, path
.begin
, path
.end(), output
);
65 after_drive
= path
.begin
;
68 // Copies the rest of the path, starting from the slash following the
69 // drive colon (if any, Windows only), or the first slash of the path.
71 if (after_drive
< path
.end()) {
72 // Use the regular path canonicalizer to canonicalize the rest of the
73 // path. Give it a fake output component to write into. DoCanonicalizeFile
74 // will compute the full path component.
75 url_parse::Component sub_path
=
76 url_parse::MakeRange(after_drive
, path
.end());
77 url_parse::Component fake_output_path
;
78 success
= CanonicalizePath(spec
, sub_path
, output
, &fake_output_path
);
80 // No input path, canonicalize to a slash.
81 output
->push_back('/');
84 out_path
->len
= output
->length() - out_path
->begin
;
88 template<typename CHAR
, typename UCHAR
>
89 bool DoCanonicalizeFileURL(const URLComponentSource
<CHAR
>& source
,
90 const url_parse::Parsed
& parsed
,
91 CharsetConverter
* query_converter
,
93 url_parse::Parsed
* new_parsed
) {
94 // Things we don't set in file: URLs.
95 new_parsed
->username
= url_parse::Component();
96 new_parsed
->password
= url_parse::Component();
97 new_parsed
->port
= url_parse::Component();
99 // Scheme (known, so we don't bother running it through the more
100 // complicated scheme canonicalizer).
101 new_parsed
->scheme
.begin
= output
->length();
102 output
->Append("file://", 7);
103 new_parsed
->scheme
.len
= 4;
105 // Append the host. For many file URLs, this will be empty. For UNC, this
107 // TODO(brettw) This doesn't do any checking for host name validity. We
108 // should probably handle validity checking of UNC hosts differently than
109 // for regular IP hosts.
110 bool success
= CanonicalizeHost(source
.host
, parsed
.host
,
111 output
, &new_parsed
->host
);
112 success
&= DoFileCanonicalizePath
<CHAR
, UCHAR
>(source
.path
, parsed
.path
,
113 output
, &new_parsed
->path
);
114 CanonicalizeQuery(source
.query
, parsed
.query
, query_converter
,
115 output
, &new_parsed
->query
);
117 // Ignore failure for refs since the URL can probably still be loaded.
118 CanonicalizeRef(source
.ref
, parsed
.ref
, output
, &new_parsed
->ref
);
125 bool CanonicalizeFileURL(const char* spec
,
127 const url_parse::Parsed
& parsed
,
128 CharsetConverter
* query_converter
,
130 url_parse::Parsed
* new_parsed
) {
131 return DoCanonicalizeFileURL
<char, unsigned char>(
132 URLComponentSource
<char>(spec
), parsed
, query_converter
,
136 bool CanonicalizeFileURL(const base::char16
* spec
,
138 const url_parse::Parsed
& parsed
,
139 CharsetConverter
* query_converter
,
141 url_parse::Parsed
* new_parsed
) {
142 return DoCanonicalizeFileURL
<base::char16
, base::char16
>(
143 URLComponentSource
<base::char16
>(spec
), parsed
, query_converter
,
147 bool FileCanonicalizePath(const char* spec
,
148 const url_parse::Component
& path
,
150 url_parse::Component
* out_path
) {
151 return DoFileCanonicalizePath
<char, unsigned char>(spec
, path
,
155 bool FileCanonicalizePath(const base::char16
* spec
,
156 const url_parse::Component
& path
,
158 url_parse::Component
* out_path
) {
159 return DoFileCanonicalizePath
<base::char16
, base::char16
>(spec
, path
,
163 bool ReplaceFileURL(const char* base
,
164 const url_parse::Parsed
& base_parsed
,
165 const Replacements
<char>& replacements
,
166 CharsetConverter
* query_converter
,
168 url_parse::Parsed
* new_parsed
) {
169 URLComponentSource
<char> source(base
);
170 url_parse::Parsed
parsed(base_parsed
);
171 SetupOverrideComponents(base
, replacements
, &source
, &parsed
);
172 return DoCanonicalizeFileURL
<char, unsigned char>(
173 source
, parsed
, query_converter
, output
, new_parsed
);
176 bool ReplaceFileURL(const char* base
,
177 const url_parse::Parsed
& base_parsed
,
178 const Replacements
<base::char16
>& replacements
,
179 CharsetConverter
* query_converter
,
181 url_parse::Parsed
* new_parsed
) {
182 RawCanonOutput
<1024> utf8
;
183 URLComponentSource
<char> source(base
);
184 url_parse::Parsed
parsed(base_parsed
);
185 SetupUTF16OverrideComponents(base
, replacements
, &utf8
, &source
, &parsed
);
186 return DoCanonicalizeFileURL
<char, unsigned char>(
187 source
, parsed
, query_converter
, output
, new_parsed
);
190 } // namespace url_canon