1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
15 #include "build/build_config.h"
22 #pragma comment(lib, "iphlpapi.lib")
23 #elif defined(OS_POSIX)
26 #include <netinet/in.h>
30 #if !defined(OS_ANDROID)
32 #endif // !defined(OS_NACL)
33 #endif // !defined(OS_ANDROID)
34 #endif // defined(OS_POSIX)
36 #include "base/basictypes.h"
37 #include "base/json/string_escape.h"
38 #include "base/lazy_instance.h"
39 #include "base/logging.h"
40 #include "base/strings/string_number_conversions.h"
41 #include "base/strings/string_piece.h"
42 #include "base/strings/string_split.h"
43 #include "base/strings/string_util.h"
44 #include "base/strings/stringprintf.h"
45 #include "base/strings/utf_string_conversions.h"
46 #include "base/sys_byteorder.h"
47 #include "base/values.h"
48 #include "net/base/address_list.h"
49 #include "net/base/dns_util.h"
50 #include "net/base/ip_address_number.h"
51 #include "net/base/net_module.h"
52 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
53 #include "net/grit/net_resources.h"
54 #include "net/http/http_content_disposition.h"
56 #include "url/third_party/mozilla/url_parse.h"
57 #include "url/url_canon.h"
58 #include "url/url_canon_ip.h"
59 #include "url/url_constants.h"
61 #if defined(OS_ANDROID)
62 #include "net/android/network_library.h"
65 #include "net/base/winsock_init.h"
72 // The general list of blocked ports. Will be blocked unless a specific
73 // protocol overrides it. (Ex: ftp can use ports 20 and 21)
74 static const int kRestrictedPorts
[] = {
108 135, // loc-srv /epmap
131 3659, // apple-sasl / PasswordServer
134 6665, // Alternate IRC [Apple addition]
135 6666, // Alternate IRC [Apple addition]
136 6667, // Standard IRC [Apple addition]
137 6668, // Alternate IRC [Apple addition]
138 6669, // Alternate IRC [Apple addition]
139 0xFFFF, // Used to block all invalid port numbers (see
140 // third_party/WebKit/Source/platform/weborigin/KURL.cpp,
144 // FTP overrides the following restricted ports.
145 static const int kAllowedFtpPorts
[] = {
150 std::string
NormalizeHostname(const std::string
& host
) {
151 std::string result
= base::StringToLowerASCII(host
);
152 if (!result
.empty() && *result
.rbegin() == '.')
153 result
.resize(result
.size() - 1);
157 bool IsNormalizedLocalhostTLD(const std::string
& host
) {
158 return base::EndsWith(host
, ".localhost", true);
161 // |host| should be normalized.
162 bool IsLocalHostname(const std::string
& host
) {
163 return host
== "localhost" || host
== "localhost.localdomain" ||
164 IsNormalizedLocalhostTLD(host
);
167 // |host| should be normalized.
168 bool IsLocal6Hostname(const std::string
& host
) {
169 return host
== "localhost6" || host
== "localhost6.localdomain6";
174 static base::LazyInstance
<std::multiset
<int> >::Leaky
175 g_explicitly_allowed_ports
= LAZY_INSTANCE_INITIALIZER
;
177 std::string
GetSpecificHeader(const std::string
& headers
,
178 const std::string
& name
) {
179 // We want to grab the Value from the "Key: Value" pairs in the headers,
180 // which should look like this (no leading spaces, \n-separated) (we format
181 // them this way in url_request_inet.cc):
183 // ETag: "6d0b8-947-24f35ec0"\n
184 // Content-Length: 2375\n
185 // Content-Type: text/html; charset=UTF-8\n
186 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
188 return std::string();
190 std::string
match('\n' + name
+ ':');
192 std::string::const_iterator begin
=
193 std::search(headers
.begin(), headers
.end(), match
.begin(), match
.end(),
194 base::CaseInsensitiveCompareASCII
<char>());
196 if (begin
== headers
.end())
197 return std::string();
199 begin
+= match
.length();
202 base::TrimWhitespace(std::string(begin
,
203 std::find(begin
, headers
.end(), '\n')),
204 base::TRIM_ALL
, &ret
);
208 std::string
CanonicalizeHost(const std::string
& host
,
209 url::CanonHostInfo
* host_info
) {
210 // Try to canonicalize the host.
211 const url::Component
raw_host_component(0, static_cast<int>(host
.length()));
212 std::string canon_host
;
213 url::StdStringCanonOutput
canon_host_output(&canon_host
);
214 url::CanonicalizeHostVerbose(host
.c_str(), raw_host_component
,
215 &canon_host_output
, host_info
);
217 if (host_info
->out_host
.is_nonempty() &&
218 host_info
->family
!= url::CanonHostInfo::BROKEN
) {
219 // Success! Assert that there's no extra garbage.
220 canon_host_output
.Complete();
221 DCHECK_EQ(host_info
->out_host
.len
, static_cast<int>(canon_host
.length()));
223 // Empty host, or canonicalization failed. We'll return empty.
230 std::string
GetDirectoryListingHeader(const base::string16
& title
) {
231 static const base::StringPiece
header(
232 NetModule::GetResource(IDR_DIR_HEADER_HTML
));
233 // This can be null in unit tests.
234 DLOG_IF(WARNING
, header
.empty()) <<
235 "Missing resource: directory listing header";
239 result
.assign(header
.data(), header
.size());
241 result
.append("<script>start(");
242 base::EscapeJSONString(title
, true, &result
);
243 result
.append(");</script>\n");
248 inline bool IsHostCharAlphanumeric(char c
) {
249 // We can just check lowercase because uppercase characters have already been
251 return ((c
>= 'a') && (c
<= 'z')) || ((c
>= '0') && (c
<= '9'));
254 bool IsCanonicalizedHostCompliant(const std::string
& host
) {
258 bool in_component
= false;
259 bool most_recent_component_started_alphanumeric
= false;
261 for (std::string::const_iterator
i(host
.begin()); i
!= host
.end(); ++i
) {
264 most_recent_component_started_alphanumeric
= IsHostCharAlphanumeric(c
);
265 if (!most_recent_component_started_alphanumeric
&& (c
!= '-') &&
270 } else if (c
== '.') {
271 in_component
= false;
272 } else if (!IsHostCharAlphanumeric(c
) && (c
!= '-') && (c
!= '_')) {
277 return most_recent_component_started_alphanumeric
;
280 base::string16
StripWWW(const base::string16
& text
) {
281 const base::string16
www(base::ASCIIToUTF16("www."));
282 return base::StartsWith(text
, www
, base::CompareCase::SENSITIVE
)
283 ? text
.substr(www
.length()) : text
;
286 base::string16
StripWWWFromHost(const GURL
& url
) {
287 DCHECK(url
.is_valid());
288 return StripWWW(base::ASCIIToUTF16(url
.host()));
291 bool IsPortValid(int port
) {
292 return port
>= 0 && port
<= std::numeric_limits
<uint16_t>::max();
295 bool IsWellKnownPort(int port
) {
296 return port
>= 0 && port
< 1024;
299 bool IsPortAllowedForScheme(int port
, const std::string
& url_scheme
) {
300 // Reject invalid ports.
301 if (!IsPortValid(port
))
304 // Allow explitly allowed ports for any scheme.
305 if (g_explicitly_allowed_ports
.Get().count(port
) > 0)
308 // FTP requests have an extra set of whitelisted schemes.
309 if (base::LowerCaseEqualsASCII(url_scheme
, url::kFtpScheme
)) {
310 for (int allowed_ftp_port
: kAllowedFtpPorts
) {
311 if (allowed_ftp_port
== port
)
316 // Finally check against the generic list of restricted ports for all
318 for (int restricted_port
: kRestrictedPorts
) {
319 if (restricted_port
== port
)
326 size_t GetCountOfExplicitlyAllowedPorts() {
327 return g_explicitly_allowed_ports
.Get().size();
330 // Specifies a comma separated list of port numbers that should be accepted
331 // despite bans. If the string is invalid no allowed ports are stored.
332 void SetExplicitlyAllowedPorts(const std::string
& allowed_ports
) {
333 if (allowed_ports
.empty())
336 std::multiset
<int> ports
;
338 size_t size
= allowed_ports
.size();
339 // The comma delimiter.
340 const std::string::value_type kComma
= ',';
342 // Overflow is still possible for evil user inputs.
343 for (size_t i
= 0; i
<= size
; ++i
) {
344 // The string should be composed of only digits and commas.
345 if (i
!= size
&& !base::IsAsciiDigit(allowed_ports
[i
]) &&
346 (allowed_ports
[i
] != kComma
))
348 if (i
== size
|| allowed_ports
[i
] == kComma
) {
351 base::StringToInt(base::StringPiece(allowed_ports
.begin() + last
,
352 allowed_ports
.begin() + i
),
359 g_explicitly_allowed_ports
.Get() = ports
;
362 ScopedPortException::ScopedPortException(int port
) : port_(port
) {
363 g_explicitly_allowed_ports
.Get().insert(port
);
366 ScopedPortException::~ScopedPortException() {
367 std::multiset
<int>::iterator it
=
368 g_explicitly_allowed_ports
.Get().find(port_
);
369 if (it
!= g_explicitly_allowed_ports
.Get().end())
370 g_explicitly_allowed_ports
.Get().erase(it
);
375 int SetNonBlocking(int fd
) {
377 unsigned long no_block
= 1;
378 return ioctlsocket(fd
, FIONBIO
, &no_block
);
379 #elif defined(OS_POSIX)
380 int flags
= fcntl(fd
, F_GETFL
, 0);
383 return fcntl(fd
, F_SETFL
, flags
| O_NONBLOCK
);
387 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin
,
388 std::string::const_iterator host_and_port_end
,
391 if (host_and_port_begin
>= host_and_port_end
)
394 // When using url, we use char*.
395 const char* auth_begin
= &(*host_and_port_begin
);
396 int auth_len
= host_and_port_end
- host_and_port_begin
;
398 url::Component
auth_component(0, auth_len
);
399 url::Component username_component
;
400 url::Component password_component
;
401 url::Component hostname_component
;
402 url::Component port_component
;
404 url::ParseAuthority(auth_begin
, auth_component
, &username_component
,
405 &password_component
, &hostname_component
, &port_component
);
407 // There shouldn't be a username/password.
408 if (username_component
.is_valid() || password_component
.is_valid())
411 if (!hostname_component
.is_nonempty())
412 return false; // Failed parsing.
414 int parsed_port_number
= -1;
415 if (port_component
.is_nonempty()) {
416 parsed_port_number
= url::ParsePort(auth_begin
, port_component
);
418 // If parsing failed, port_number will be either PORT_INVALID or
419 // PORT_UNSPECIFIED, both of which are negative.
420 if (parsed_port_number
< 0)
421 return false; // Failed parsing the port number.
424 if (port_component
.len
== 0)
425 return false; // Reject inputs like "foo:"
427 unsigned char tmp_ipv6_addr
[16];
429 // If the hostname starts with a bracket, it is either an IPv6 literal or
430 // invalid. If it is an IPv6 literal then strip the brackets.
431 if (hostname_component
.len
> 0 &&
432 auth_begin
[hostname_component
.begin
] == '[') {
433 if (auth_begin
[hostname_component
.end() - 1] == ']' &&
434 url::IPv6AddressToNumber(
435 auth_begin
, hostname_component
, tmp_ipv6_addr
)) {
436 // Strip the brackets.
437 hostname_component
.begin
++;
438 hostname_component
.len
-= 2;
444 // Pass results back to caller.
445 host
->assign(auth_begin
+ hostname_component
.begin
, hostname_component
.len
);
446 *port
= parsed_port_number
;
448 return true; // Success.
451 bool ParseHostAndPort(const std::string
& host_and_port
,
454 return ParseHostAndPort(
455 host_and_port
.begin(), host_and_port
.end(), host
, port
);
458 std::string
GetHostAndPort(const GURL
& url
) {
459 // For IPv6 literals, GURL::host() already includes the brackets so it is
460 // safe to just append a colon.
461 return base::StringPrintf("%s:%d", url
.host().c_str(),
462 url
.EffectiveIntPort());
465 std::string
GetHostAndOptionalPort(const GURL
& url
) {
466 // For IPv6 literals, GURL::host() already includes the brackets
467 // so it is safe to just append a colon.
469 return base::StringPrintf("%s:%s", url
.host().c_str(), url
.port().c_str());
473 bool IsHostnameNonUnique(const std::string
& hostname
) {
474 // CanonicalizeHost requires surrounding brackets to parse an IPv6 address.
475 const std::string host_or_ip
= hostname
.find(':') != std::string::npos
?
476 "[" + hostname
+ "]" : hostname
;
477 url::CanonHostInfo host_info
;
478 std::string canonical_name
= CanonicalizeHost(host_or_ip
, &host_info
);
480 // If canonicalization fails, then the input is truly malformed. However,
481 // to avoid mis-reporting bad inputs as "non-unique", treat them as unique.
482 if (canonical_name
.empty())
485 // If |hostname| is an IP address, check to see if it's in an IANA-reserved
487 if (host_info
.IsIPAddress()) {
488 IPAddressNumber host_addr
;
489 if (!ParseIPLiteralToNumber(hostname
.substr(host_info
.out_host
.begin
,
490 host_info
.out_host
.len
),
494 switch (host_info
.family
) {
495 case url::CanonHostInfo::IPV4
:
496 case url::CanonHostInfo::IPV6
:
497 return IsIPAddressReserved(host_addr
);
498 case url::CanonHostInfo::NEUTRAL
:
499 case url::CanonHostInfo::BROKEN
:
504 // Check for a registry controlled portion of |hostname|, ignoring private
505 // registries, as they already chain to ICANN-administered registries,
506 // and explicitly ignoring unknown registries.
508 // Note: This means that as new gTLDs are introduced on the Internet, they
509 // will be treated as non-unique until the registry controlled domain list
510 // is updated. However, because gTLDs are expected to provide significant
511 // advance notice to deprecate older versions of this code, this an
512 // acceptable tradeoff.
513 return 0 == registry_controlled_domains::GetRegistryLength(
515 registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
,
516 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
);
519 SockaddrStorage::SockaddrStorage(const SockaddrStorage
& other
)
520 : addr_len(other
.addr_len
),
521 addr(reinterpret_cast<struct sockaddr
*>(&addr_storage
)) {
522 memcpy(addr
, other
.addr
, addr_len
);
525 void SockaddrStorage::operator=(const SockaddrStorage
& other
) {
526 addr_len
= other
.addr_len
;
527 // addr is already set to &this->addr_storage by default ctor.
528 memcpy(addr
, other
.addr
, addr_len
);
531 // Extracts the address and port portions of a sockaddr.
532 bool GetIPAddressFromSockAddr(const struct sockaddr
* sock_addr
,
533 socklen_t sock_addr_len
,
534 const uint8_t** address
,
537 if (sock_addr
->sa_family
== AF_INET
) {
538 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in
)))
540 const struct sockaddr_in
* addr
=
541 reinterpret_cast<const struct sockaddr_in
*>(sock_addr
);
542 *address
= reinterpret_cast<const uint8_t*>(&addr
->sin_addr
);
543 *address_len
= kIPv4AddressSize
;
545 *port
= base::NetToHost16(addr
->sin_port
);
549 if (sock_addr
->sa_family
== AF_INET6
) {
550 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in6
)))
552 const struct sockaddr_in6
* addr
=
553 reinterpret_cast<const struct sockaddr_in6
*>(sock_addr
);
554 *address
= reinterpret_cast<const uint8_t*>(&addr
->sin6_addr
);
555 *address_len
= kIPv6AddressSize
;
557 *port
= base::NetToHost16(addr
->sin6_port
);
562 if (sock_addr
->sa_family
== AF_BTH
) {
563 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(SOCKADDR_BTH
)))
565 const SOCKADDR_BTH
* addr
=
566 reinterpret_cast<const SOCKADDR_BTH
*>(sock_addr
);
567 *address
= reinterpret_cast<const uint8_t*>(&addr
->btAddr
);
568 *address_len
= kBluetoothAddressSize
;
570 *port
= static_cast<uint16_t>(addr
->port
);
575 return false; // Unrecognized |sa_family|.
578 std::string
NetAddressToString(const struct sockaddr
* sa
,
579 socklen_t sock_addr_len
) {
580 const uint8_t* address
;
582 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
583 &address_len
, NULL
)) {
585 return std::string();
587 return IPAddressToString(address
, address_len
);
590 std::string
NetAddressToStringWithPort(const struct sockaddr
* sa
,
591 socklen_t sock_addr_len
) {
592 const uint8_t* address
;
595 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
596 &address_len
, &port
)) {
598 return std::string();
600 return IPAddressToStringWithPort(address
, address_len
, port
);
603 std::string
GetHostName() {
606 return std::string();
607 #else // defined(OS_NACL)
612 // Host names are limited to 255 bytes.
614 int result
= gethostname(buffer
, sizeof(buffer
));
616 DVLOG(1) << "gethostname() failed with " << result
;
619 return std::string(buffer
);
620 #endif // !defined(OS_NACL)
623 void GetIdentityFromURL(const GURL
& url
,
624 base::string16
* username
,
625 base::string16
* password
) {
626 UnescapeRule::Type flags
=
627 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
;
628 *username
= UnescapeAndDecodeUTF8URLComponent(url
.username(), flags
);
629 *password
= UnescapeAndDecodeUTF8URLComponent(url
.password(), flags
);
632 std::string
GetHostOrSpecFromURL(const GURL
& url
) {
633 return url
.has_host() ? TrimEndingDot(url
.host()) : url
.spec();
636 bool CanStripTrailingSlash(const GURL
& url
) {
637 // Omit the path only for standard, non-file URLs with nothing but "/" after
639 return url
.IsStandard() && !url
.SchemeIsFile() &&
640 !url
.SchemeIsFileSystem() && !url
.has_query() && !url
.has_ref()
641 && url
.path() == "/";
644 GURL
SimplifyUrlForRequest(const GURL
& url
) {
645 DCHECK(url
.is_valid());
646 GURL::Replacements replacements
;
647 replacements
.ClearUsername();
648 replacements
.ClearPassword();
649 replacements
.ClearRef();
650 return url
.ReplaceComponents(replacements
);
653 bool HaveOnlyLoopbackAddresses() {
654 #if defined(OS_ANDROID)
655 return android::HaveOnlyLoopbackAddresses();
656 #elif defined(OS_NACL)
659 #elif defined(OS_POSIX)
660 struct ifaddrs
* interface_addr
= NULL
;
661 int rv
= getifaddrs(&interface_addr
);
663 DVLOG(1) << "getifaddrs() failed with errno = " << errno
;
668 for (struct ifaddrs
* interface
= interface_addr
;
670 interface
= interface
->ifa_next
) {
671 if (!(IFF_UP
& interface
->ifa_flags
))
673 if (IFF_LOOPBACK
& interface
->ifa_flags
)
675 const struct sockaddr
* addr
= interface
->ifa_addr
;
678 if (addr
->sa_family
== AF_INET6
) {
679 // Safe cast since this is AF_INET6.
680 const struct sockaddr_in6
* addr_in6
=
681 reinterpret_cast<const struct sockaddr_in6
*>(addr
);
682 const struct in6_addr
* sin6_addr
= &addr_in6
->sin6_addr
;
683 if (IN6_IS_ADDR_LOOPBACK(sin6_addr
) || IN6_IS_ADDR_LINKLOCAL(sin6_addr
))
686 if (addr
->sa_family
!= AF_INET6
&& addr
->sa_family
!= AF_INET
)
692 freeifaddrs(interface_addr
);
694 #elif defined(OS_WIN)
695 // TODO(wtc): implement with the GetAdaptersAddresses function.
701 #endif // defined(various platforms)
704 AddressFamily
GetAddressFamily(const IPAddressNumber
& address
) {
705 switch (address
.size()) {
706 case kIPv4AddressSize
:
707 return ADDRESS_FAMILY_IPV4
;
708 case kIPv6AddressSize
:
709 return ADDRESS_FAMILY_IPV6
;
711 return ADDRESS_FAMILY_UNSPECIFIED
;
715 int ConvertAddressFamily(AddressFamily address_family
) {
716 switch (address_family
) {
717 case ADDRESS_FAMILY_UNSPECIFIED
:
719 case ADDRESS_FAMILY_IPV4
:
721 case ADDRESS_FAMILY_IPV6
:
728 const uint16_t* GetPortFieldFromSockaddr(const struct sockaddr
* address
,
729 socklen_t address_len
) {
730 if (address
->sa_family
== AF_INET
) {
731 DCHECK_LE(sizeof(sockaddr_in
), static_cast<size_t>(address_len
));
732 const struct sockaddr_in
* sockaddr
=
733 reinterpret_cast<const struct sockaddr_in
*>(address
);
734 return &sockaddr
->sin_port
;
735 } else if (address
->sa_family
== AF_INET6
) {
736 DCHECK_LE(sizeof(sockaddr_in6
), static_cast<size_t>(address_len
));
737 const struct sockaddr_in6
* sockaddr
=
738 reinterpret_cast<const struct sockaddr_in6
*>(address
);
739 return &sockaddr
->sin6_port
;
746 int GetPortFromSockaddr(const struct sockaddr
* address
, socklen_t address_len
) {
747 const uint16_t* port_field
= GetPortFieldFromSockaddr(address
, address_len
);
750 return base::NetToHost16(*port_field
);
753 bool ResolveLocalHostname(const std::string
& host
,
755 AddressList
* address_list
) {
756 static const unsigned char kLocalhostIPv4
[] = {127, 0, 0, 1};
757 static const unsigned char kLocalhostIPv6
[] = {
758 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
760 std::string normalized_host
= NormalizeHostname(host
);
762 address_list
->clear();
764 bool is_local6
= IsLocal6Hostname(normalized_host
);
765 if (!is_local6
&& !IsLocalHostname(normalized_host
))
768 address_list
->push_back(
769 IPEndPoint(IPAddressNumber(kLocalhostIPv6
,
770 kLocalhostIPv6
+ arraysize(kLocalhostIPv6
)),
773 address_list
->push_back(
774 IPEndPoint(IPAddressNumber(kLocalhostIPv4
,
775 kLocalhostIPv4
+ arraysize(kLocalhostIPv4
)),
782 bool IsLocalhost(const std::string
& host
) {
783 std::string normalized_host
= NormalizeHostname(host
);
784 if (IsLocalHostname(normalized_host
) || IsLocal6Hostname(normalized_host
))
787 IPAddressNumber ip_number
;
788 if (ParseIPLiteralToNumber(host
, &ip_number
)) {
789 size_t size
= ip_number
.size();
791 case kIPv4AddressSize
: {
792 IPAddressNumber localhost_prefix
;
793 localhost_prefix
.push_back(127);
794 for (int i
= 0; i
< 3; ++i
) {
795 localhost_prefix
.push_back(0);
797 return IPNumberMatchesPrefix(ip_number
, localhost_prefix
, 8);
800 case kIPv6AddressSize
: {
801 struct in6_addr sin6_addr
;
802 memcpy(&sin6_addr
, &ip_number
[0], kIPv6AddressSize
);
803 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr
);
814 bool IsLocalhostTLD(const std::string
& host
) {
815 return IsNormalizedLocalhostTLD(NormalizeHostname(host
));
818 bool HasGoogleHost(const GURL
& url
) {
819 static const char* kGoogleHostSuffixes
[] = {
826 ".googleusercontent.com",
827 ".googlesyndication.com",
828 ".google-analytics.com",
829 ".googleadservices.com",
833 const std::string
& host
= url
.host();
834 for (const char* suffix
: kGoogleHostSuffixes
) {
835 if (base::EndsWith(host
, suffix
, false))