12 "golang.org/x/net/idna"
15 // domainPrefixBasic does the basic domain prefix conversion. Does not do any
16 // IDNA mapping, such as https://www.unicode.org/reports/tr46/.
18 // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#basic-algorithm
19 func domainPrefixBasic(domain
string) (string, error
) {
20 // 1. Punycode Decode the publisher domain.
21 prefix
, err
:= idna
.ToUnicode(domain
)
26 // 2. Replace any "-" (hyphen) character in the output of step 1 with
27 // "--" (two hyphens).
28 prefix
= strings
.Replace(prefix
, "-", "--", -1)
30 // 3. Replace any "." (dot) character in the output of step 2 with "-"
32 prefix
= strings
.Replace(prefix
, ".", "-", -1)
34 // 4. If the output of step 3 has a "-" (hyphen) at both positions 3 and
35 // 4, then to the output of step 3, add a prefix of "0-" and add a
37 if len(prefix
) >= 4 && prefix
[2] == '-' && prefix
[3] == '-' {
38 prefix
= "0-" + prefix
+ "-0"
41 // 5. Punycode Encode the output of step 3.
42 return idna
.ToASCII(prefix
)
45 // Lower-case base32 without padding.
46 var fallbackBase32Encoding
= base32
.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32
.NoPadding
)
48 // domainPrefixFallback does the fallback domain prefix conversion. The returned
49 // base32 domain uses lower-case letters.
51 // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#fallback-algorithm
52 func domainPrefixFallback(domain
string) string {
53 // The algorithm specification does not say what, exactly, we are to
54 // take the SHA-256 of. domain is notionally an abstract Unicode
55 // string, not a byte sequence. While
56 // https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/AmpCurlUrlGenerator.js#L62
57 // says "Take the SHA256 of the punycode view of the domain," in reality
58 // it hashes the UTF-8 encoding of the domain, without Punycode:
59 // https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/AmpCurlUrlGenerator.js#L141
60 // https://github.com/ampproject/amp-toolbox/blob/84cb3057e5f6c54d64369ddd285db1cb36237ee8/packages/cache-url/lib/browser/Sha256.js#L24
61 // We do the same here, hashing the raw bytes of domain, presumed to be
64 // 1. Hash the publisher's domain using SHA256.
65 h
:= sha256
.Sum256([]byte(domain
))
67 // 2. Base32 Escape the output of step 1.
68 // 3. Remove the last 4 characters from the output of step 2, which are
69 // always "=" (equals) characters.
70 return fallbackBase32Encoding
.EncodeToString(h
[:])
73 // domainPrefix computes the domain prefix of an AMP cache URL.
75 // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#domain-name-prefix
76 func domainPrefix(domain
string) string {
77 // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#combined-algorithm
78 // 1. Run the Basic Algorithm. If the output is a valid DNS label,
79 // [append the Cache domain suffix and] return. Otherwise continue to
81 prefix
, err
:= domainPrefixBasic(domain
)
82 // "A domain prefix is not a valid DNS label if it is longer than 63
84 if err
== nil && len(prefix
) <= 63 {
87 // 2. Run the Fallback Algorithm. [Append the Cache domain suffix and]
89 return domainPrefixFallback(domain
)
92 // CacheURL computes the AMP cache URL for the publisher URL pubURL, using the
93 // AMP cache at cacheURL. contentType is a string such as "c" or "i" that
94 // indicates what type of serving the AMP cache is to perform. The Scheme of
95 // pubURL must be "http" or "https". The Port of pubURL, if any, must match the
96 // default for the scheme. cacheURL may not have RawQuery, Fragment, or
97 // RawFragment set, because the resulting URL's query and fragment are taken
98 // from the publisher URL.
100 // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/
101 func CacheURL(pubURL
, cacheURL
*url
.URL
, contentType
string) (*url
.URL
, error
) {
102 // The cache URL subdomain, including the domain prefix corresponding to
103 // the publisher URL's domain.
104 resultHost
:= domainPrefix(pubURL
.Hostname()) + "." + cacheURL
.Hostname()
105 if cacheURL
.Port() != "" {
106 resultHost
= net
.JoinHostPort(resultHost
, cacheURL
.Port())
109 // https://amp.dev/documentation/guides-and-tutorials/learn/amp-caches-and-cors/amp-cache-urls/#url-path
110 // The first part of the path is the cache URL's own path, if any.
111 pathComponents
:= []string{cacheURL
.EscapedPath()}
112 // The next path component is the content type. We cannot encode an
113 // empty content type, because it would result in consecutive path
114 // separators, which would semantically combine into a single separator.
115 if contentType
== "" {
116 return nil, fmt
.Errorf("invalid content type %+q", contentType
)
118 pathComponents
= append(pathComponents
, url
.PathEscape(contentType
))
119 // Then, we add an "s" path component, if the publisher URL scheme is
121 switch pubURL
.Scheme
{
125 pathComponents
= append(pathComponents
, "s")
127 return nil, fmt
.Errorf("invalid scheme %+q in publisher URL", pubURL
.Scheme
)
129 // The next path component is the publisher URL's host. The AMP cache
130 // URL format specification is not clear about whether other
131 // subcomponents of the authority (namely userinfo and port) may appear
132 // here. We adopt a policy of forbidding userinfo, and requiring that
133 // the port be the default for the scheme (and then we omit the port
134 // entirely from the returned URL).
135 if pubURL
.User
!= nil {
136 return nil, fmt
.Errorf("publisher URL may not contain userinfo")
138 if port
:= pubURL
.Port(); port
!= "" {
139 if !((pubURL
.Scheme
== "http" && port
== "80") ||
(pubURL
.Scheme
== "https" && port
== "443")) {
140 return nil, fmt
.Errorf("publisher URL port %+q is not the default for scheme %+q", port
, pubURL
.Scheme
)
143 // As with the content type, we cannot encode an empty host, because
144 // that would result in an empty path component.
145 if pubURL
.Hostname() == "" {
146 return nil, fmt
.Errorf("invalid host %+q in publisher URL", pubURL
.Hostname())
148 pathComponents
= append(pathComponents
, url
.PathEscape(pubURL
.Hostname()))
149 // Finally, we append the remainder of the original escaped path from
150 // the publisher URL.
151 pathComponents
= append(pathComponents
, pubURL
.EscapedPath())
153 resultRawPath
:= path
.Join(pathComponents
...)
154 resultPath
, err
:= url
.PathUnescape(resultRawPath
)
159 // The query and fragment of the returned URL always come from pubURL.
160 // Any query or fragment of cacheURL would be ignored. Return an error
162 if cacheURL
.RawQuery
!= "" {
163 return nil, fmt
.Errorf("cache URL may not contain a query")
165 if cacheURL
.Fragment
!= "" {
166 return nil, fmt
.Errorf("cache URL may not contain a fragment")
170 Scheme
: cacheURL
.Scheme
,
174 RawPath
: resultRawPath
,
175 RawQuery
: pubURL
.RawQuery
,
176 Fragment
: pubURL
.Fragment
,