1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Functions for canonicalizing "path" URLs. Not to be confused with the path
6 // of a URL, these are URLs that have no authority section, only a path. For
7 // example, "javascript:" and "data:".
9 #include "url/url_canon.h"
10 #include "url/url_canon_internal.h"
16 template<typename CHAR
, typename UCHAR
>
17 bool DoCanonicalizePathURL(const URLComponentSource
<CHAR
>& source
,
18 const url_parse::Parsed
& parsed
,
20 url_parse::Parsed
* new_parsed
) {
21 // Scheme: this will append the colon.
22 bool success
= CanonicalizeScheme(source
.scheme
, parsed
.scheme
,
23 output
, &new_parsed
->scheme
);
25 // We assume there's no authority for path URLs. Note that hosts should never
27 new_parsed
->username
.reset();
28 new_parsed
->password
.reset();
29 new_parsed
->host
.reset();
30 new_parsed
->port
.reset();
32 if (parsed
.path
.is_valid()) {
33 // Copy the path using path URL's more lax escaping rules (think for
34 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
35 // ASCII characters alone. This helps readability of JavaStript.
36 new_parsed
->path
.begin
= output
->length();
37 int end
= parsed
.path
.end();
38 for (int i
= parsed
.path
.begin
; i
< end
; i
++) {
39 UCHAR uch
= static_cast<UCHAR
>(source
.path
[i
]);
40 if (uch
< 0x20 || uch
>= 0x80)
41 success
&= AppendUTF8EscapedChar(source
.path
, &i
, end
, output
);
43 output
->push_back(static_cast<char>(uch
));
45 new_parsed
->path
.len
= output
->length() - new_parsed
->path
.begin
;
48 new_parsed
->path
.reset();
51 // Assume there's no query or ref.
52 new_parsed
->query
.reset();
53 new_parsed
->ref
.reset();
60 bool CanonicalizePathURL(const char* spec
,
62 const url_parse::Parsed
& parsed
,
64 url_parse::Parsed
* new_parsed
) {
65 return DoCanonicalizePathURL
<char, unsigned char>(
66 URLComponentSource
<char>(spec
), parsed
, output
, new_parsed
);
69 bool CanonicalizePathURL(const base::char16
* spec
,
71 const url_parse::Parsed
& parsed
,
73 url_parse::Parsed
* new_parsed
) {
74 return DoCanonicalizePathURL
<base::char16
, base::char16
>(
75 URLComponentSource
<base::char16
>(spec
), parsed
, output
, new_parsed
);
78 bool ReplacePathURL(const char* base
,
79 const url_parse::Parsed
& base_parsed
,
80 const Replacements
<char>& replacements
,
82 url_parse::Parsed
* new_parsed
) {
83 URLComponentSource
<char> source(base
);
84 url_parse::Parsed
parsed(base_parsed
);
85 SetupOverrideComponents(base
, replacements
, &source
, &parsed
);
86 return DoCanonicalizePathURL
<char, unsigned char>(
87 source
, parsed
, output
, new_parsed
);
90 bool ReplacePathURL(const char* base
,
91 const url_parse::Parsed
& base_parsed
,
92 const Replacements
<base::char16
>& replacements
,
94 url_parse::Parsed
* new_parsed
) {
95 RawCanonOutput
<1024> utf8
;
96 URLComponentSource
<char> source(base
);
97 url_parse::Parsed
parsed(base_parsed
);
98 SetupUTF16OverrideComponents(base
, replacements
, &utf8
, &source
, &parsed
);
99 return DoCanonicalizePathURL
<char, unsigned char>(
100 source
, parsed
, output
, new_parsed
);
103 } // namespace url_canon