net/cookies/cookie_util.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "net/cookies/cookie_util.h"
   6
   7 #include <cstdio>
   8 #include <cstdlib>
   9
  10 #include "base/logging.h"
  11 #include "base/strings/string_tokenizer.h"
  12 #include "base/strings/string_util.h"
  13 #include "build/build_config.h"
  14 #include "net/base/net_util.h"
  15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
  16 #include "url/gurl.h"
  17
  18 namespace net {
  19 namespace cookie_util {
  20
  21 bool DomainIsHostOnly(const std::string& domain_string) {
  22   return (domain_string.empty() || domain_string[0] != '.');
  23 }
  24
  25 std::string GetEffectiveDomain(const std::string& scheme,
  26                                const std::string& host) {
  27   if (scheme == "http" || scheme == "https") {
  28     return registry_controlled_domains::GetDomainAndRegistry(
  29         host, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
  30   }
  31
  32   if (!DomainIsHostOnly(host))
  33     return host.substr(1);
  34   return host;
  35 }
  36
  37 bool GetCookieDomainWithString(const GURL& url,
  38                                const std::string& domain_string,
  39                                std::string* result) {
  40   const std::string url_host(url.host());
  41
  42   // If no domain was specified in the domain string, default to a host cookie.
  43   // We match IE/Firefox in allowing a domain=IPADDR if it matches the url
  44   // ip address hostname exactly.  It should be treated as a host cookie.
  45   if (domain_string.empty() ||
  46       (url.HostIsIPAddress() && url_host == domain_string)) {
  47     *result = url_host;
  48     DCHECK(DomainIsHostOnly(*result));
  49     return true;
  50   }
  51
  52   // Get the normalized domain specified in cookie line.
  53   url_canon::CanonHostInfo ignored;
  54   std::string cookie_domain(CanonicalizeHost(domain_string, &ignored));
  55   if (cookie_domain.empty())
  56     return false;
  57   if (cookie_domain[0] != '.')
  58     cookie_domain = "." + cookie_domain;
  59
  60   // Ensure |url| and |cookie_domain| have the same domain+registry.
  61   const std::string url_scheme(url.scheme());
  62   const std::string url_domain_and_registry(
  63       GetEffectiveDomain(url_scheme, url_host));
  64   if (url_domain_and_registry.empty())
  65     return false;  // IP addresses/intranet hosts can't set domain cookies.
  66   const std::string cookie_domain_and_registry(
  67       GetEffectiveDomain(url_scheme, cookie_domain));
  68   if (url_domain_and_registry != cookie_domain_and_registry)
  69     return false;  // Can't set a cookie on a different domain + registry.
  70
  71   // Ensure |url_host| is |cookie_domain| or one of its subdomains.  Given that
  72   // we know the domain+registry are the same from the above checks, this is
  73   // basically a simple string suffix check.
  74   const bool is_suffix = (url_host.length() < cookie_domain.length()) ?
  75       (cookie_domain != ("." + url_host)) :
  76       (url_host.compare(url_host.length() - cookie_domain.length(),
  77                         cookie_domain.length(), cookie_domain) != 0);
  78   if (is_suffix)
  79     return false;
  80
  81   *result = cookie_domain;
  82   return true;
  83 }
  84
  85 // Parse a cookie expiration time.  We try to be lenient, but we need to
  86 // assume some order to distinguish the fields.  The basic rules:
  87 //  - The month name must be present and prefix the first 3 letters of the
  88 //    full month name (jan for January, jun for June).
  89 //  - If the year is <= 2 digits, it must occur after the day of month.
  90 //  - The time must be of the format hh:mm:ss.
  91 // An average cookie expiration will look something like this:
  92 //   Sat, 15-Apr-17 21:01:22 GMT
  93 base::Time ParseCookieTime(const std::string& time_string) {
  94   static const char* kMonths[] = { "jan", "feb", "mar", "apr", "may", "jun",
  95                                    "jul", "aug", "sep", "oct", "nov", "dec" };
  96   static const int kMonthsLen = arraysize(kMonths);
  97   // We want to be pretty liberal, and support most non-ascii and non-digit
  98   // characters as a delimiter.  We can't treat : as a delimiter, because it
  99   // is the delimiter for hh:mm:ss, and we want to keep this field together.
 100   // We make sure to include - and +, since they could prefix numbers.
 101   // If the cookie attribute came in in quotes (ex expires="XXX"), the quotes
 102   // will be preserved, and we will get them here.  So we make sure to include
 103   // quote characters, and also \ for anything that was internally escaped.
 104   static const char* kDelimiters = "\t !\"#$%&'()*+,-./;<=>?@[\\]^_`{|}~";
 105
 106   base::Time::Exploded exploded = {0};
 107
 108   base::StringTokenizer tokenizer(time_string, kDelimiters);
 109
 110   bool found_day_of_month = false;
 111   bool found_month = false;
 112   bool found_time = false;
 113   bool found_year = false;
 114
 115   while (tokenizer.GetNext()) {
 116     const std::string token = tokenizer.token();
 117     DCHECK(!token.empty());
 118     bool numerical = IsAsciiDigit(token[0]);
 119
 120     // String field
 121     if (!numerical) {
 122       if (!found_month) {
 123         for (int i = 0; i < kMonthsLen; ++i) {
 124           // Match prefix, so we could match January, etc
 125           if (base::strncasecmp(token.c_str(), kMonths[i], 3) == 0) {
 126             exploded.month = i + 1;
 127             found_month = true;
 128             break;
 129           }
 130         }
 131       } else {
 132         // If we've gotten here, it means we've already found and parsed our
 133         // month, and we have another string, which we would expect to be the
 134         // the time zone name.  According to the RFC and my experiments with
 135         // how sites format their expirations, we don't have much of a reason
 136         // to support timezones.  We don't want to ever barf on user input,
 137         // but this DCHECK should pass for well-formed data.
 138         // DCHECK(token == "GMT");
 139       }
 140     // Numeric field w/ a colon
 141     } else if (token.find(':') != std::string::npos) {
 142       if (!found_time &&
 143 #ifdef COMPILER_MSVC
 144           sscanf_s(
 145 #else
 146           sscanf(
 147 #endif
 148                  token.c_str(), "%2u:%2u:%2u", &exploded.hour,
 149                  &exploded.minute, &exploded.second) == 3) {
 150         found_time = true;
 151       } else {
 152         // We should only ever encounter one time-like thing.  If we're here,
 153         // it means we've found a second, which shouldn't happen.  We keep
 154         // the first.  This check should be ok for well-formed input:
 155         // NOTREACHED();
 156       }
 157     // Numeric field
 158     } else {
 159       // Overflow with atoi() is unspecified, so we enforce a max length.
 160       if (!found_day_of_month && token.length() <= 2) {
 161         exploded.day_of_month = atoi(token.c_str());
 162         found_day_of_month = true;
 163       } else if (!found_year && token.length() <= 5) {
 164         exploded.year = atoi(token.c_str());
 165         found_year = true;
 166       } else {
 167         // If we're here, it means we've either found an extra numeric field,
 168         // or a numeric field which was too long.  For well-formed input, the
 169         // following check would be reasonable:
 170         // NOTREACHED();
 171       }
 172     }
 173   }
 174
 175   if (!found_day_of_month || !found_month || !found_time || !found_year) {
 176     // We didn't find all of the fields we need.  For well-formed input, the
 177     // following check would be reasonable:
 178     // NOTREACHED() << "Cookie parse expiration failed: " << time_string;
 179     return base::Time();
 180   }
 181
 182   // Normalize the year to expand abbreviated years to the full year.
 183   if (exploded.year >= 69 && exploded.year <= 99)
 184     exploded.year += 1900;
 185   if (exploded.year >= 0 && exploded.year <= 68)
 186     exploded.year += 2000;
 187
 188   // If our values are within their correct ranges, we got our time.
 189   if (exploded.day_of_month >= 1 && exploded.day_of_month <= 31 &&
 190       exploded.month >= 1 && exploded.month <= 12 &&
 191       exploded.year >= 1601 && exploded.year <= 30827 &&
 192       exploded.hour <= 23 && exploded.minute <= 59 && exploded.second <= 59) {
 193     return base::Time::FromUTCExploded(exploded);
 194   }
 195
 196   // One of our values was out of expected range.  For well-formed input,
 197   // the following check would be reasonable:
 198   // NOTREACHED() << "Cookie exploded expiration failed: " << time_string;
 199
 200   return base::Time();
 201 }
 202
 203 GURL CookieOriginToURL(const std::string& domain, bool is_https) {
 204   if (domain.empty())
 205     return GURL();
 206
 207   const std::string scheme = is_https ? "https" : "http";
 208   const std::string host = domain[0] == '.' ? domain.substr(1) : domain;
 209   return GURL(scheme + "://" + host);
 210 }
 211
 212 }  // namespace cookie_utils
 213 }  // namespace net
 214