1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
15 #include "build/build_config.h"
22 #pragma comment(lib, "iphlpapi.lib")
23 #elif defined(OS_POSIX)
26 #include <netinet/in.h>
30 #if !defined(OS_ANDROID)
32 #endif // !defined(OS_NACL)
33 #endif // !defined(OS_ANDROID)
34 #endif // defined(OS_POSIX)
36 #include "base/basictypes.h"
37 #include "base/json/string_escape.h"
38 #include "base/lazy_instance.h"
39 #include "base/logging.h"
40 #include "base/strings/string_number_conversions.h"
41 #include "base/strings/string_piece.h"
42 #include "base/strings/string_split.h"
43 #include "base/strings/string_util.h"
44 #include "base/strings/stringprintf.h"
45 #include "base/strings/utf_string_conversions.h"
46 #include "base/sys_byteorder.h"
47 #include "base/values.h"
48 #include "net/base/address_list.h"
49 #include "net/base/dns_util.h"
50 #include "net/base/ip_address_number.h"
51 #include "net/base/net_module.h"
52 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
53 #include "net/grit/net_resources.h"
54 #include "net/http/http_content_disposition.h"
56 #include "url/third_party/mozilla/url_parse.h"
57 #include "url/url_canon.h"
58 #include "url/url_canon_ip.h"
59 #include "url/url_constants.h"
61 #if defined(OS_ANDROID)
62 #include "net/android/network_library.h"
65 #include "net/base/winsock_init.h"
72 // The general list of blocked ports. Will be blocked unless a specific
73 // protocol overrides it. (Ex: ftp can use ports 20 and 21)
74 static const int kRestrictedPorts
[] = {
108 135, // loc-srv /epmap
131 3659, // apple-sasl / PasswordServer
134 6665, // Alternate IRC [Apple addition]
135 6666, // Alternate IRC [Apple addition]
136 6667, // Standard IRC [Apple addition]
137 6668, // Alternate IRC [Apple addition]
138 6669, // Alternate IRC [Apple addition]
139 0xFFFF, // Used to block all invalid port numbers (see
140 // third_party/WebKit/Source/platform/weborigin/KURL.cpp,
144 // FTP overrides the following restricted ports.
145 static const int kAllowedFtpPorts
[] = {
150 std::string
NormalizeHostname(const std::string
& host
) {
151 std::string result
= base::StringToLowerASCII(host
);
152 if (!result
.empty() && *result
.rbegin() == '.')
153 result
.resize(result
.size() - 1);
157 bool IsNormalizedLocalhostTLD(const std::string
& host
) {
158 return base::EndsWith(host
, ".localhost", base::CompareCase::SENSITIVE
);
161 // |host| should be normalized.
162 bool IsLocalHostname(const std::string
& host
) {
163 return host
== "localhost" || host
== "localhost.localdomain" ||
164 IsNormalizedLocalhostTLD(host
);
167 // |host| should be normalized.
168 bool IsLocal6Hostname(const std::string
& host
) {
169 return host
== "localhost6" || host
== "localhost6.localdomain6";
174 static base::LazyInstance
<std::multiset
<int> >::Leaky
175 g_explicitly_allowed_ports
= LAZY_INSTANCE_INITIALIZER
;
177 std::string
CanonicalizeHost(const std::string
& host
,
178 url::CanonHostInfo
* host_info
) {
179 // Try to canonicalize the host.
180 const url::Component
raw_host_component(0, static_cast<int>(host
.length()));
181 std::string canon_host
;
182 url::StdStringCanonOutput
canon_host_output(&canon_host
);
183 url::CanonicalizeHostVerbose(host
.c_str(), raw_host_component
,
184 &canon_host_output
, host_info
);
186 if (host_info
->out_host
.is_nonempty() &&
187 host_info
->family
!= url::CanonHostInfo::BROKEN
) {
188 // Success! Assert that there's no extra garbage.
189 canon_host_output
.Complete();
190 DCHECK_EQ(host_info
->out_host
.len
, static_cast<int>(canon_host
.length()));
192 // Empty host, or canonicalization failed. We'll return empty.
199 std::string
GetDirectoryListingHeader(const base::string16
& title
) {
200 static const base::StringPiece
header(
201 NetModule::GetResource(IDR_DIR_HEADER_HTML
));
202 // This can be null in unit tests.
203 DLOG_IF(WARNING
, header
.empty()) <<
204 "Missing resource: directory listing header";
208 result
.assign(header
.data(), header
.size());
210 result
.append("<script>start(");
211 base::EscapeJSONString(title
, true, &result
);
212 result
.append(");</script>\n");
217 inline bool IsHostCharAlphanumeric(char c
) {
218 // We can just check lowercase because uppercase characters have already been
220 return ((c
>= 'a') && (c
<= 'z')) || ((c
>= '0') && (c
<= '9'));
223 bool IsCanonicalizedHostCompliant(const std::string
& host
) {
227 bool in_component
= false;
228 bool most_recent_component_started_alphanumeric
= false;
230 for (std::string::const_iterator
i(host
.begin()); i
!= host
.end(); ++i
) {
233 most_recent_component_started_alphanumeric
= IsHostCharAlphanumeric(c
);
234 if (!most_recent_component_started_alphanumeric
&& (c
!= '-') &&
239 } else if (c
== '.') {
240 in_component
= false;
241 } else if (!IsHostCharAlphanumeric(c
) && (c
!= '-') && (c
!= '_')) {
246 return most_recent_component_started_alphanumeric
;
249 base::string16
StripWWW(const base::string16
& text
) {
250 const base::string16
www(base::ASCIIToUTF16("www."));
251 return base::StartsWith(text
, www
, base::CompareCase::SENSITIVE
)
252 ? text
.substr(www
.length()) : text
;
255 base::string16
StripWWWFromHost(const GURL
& url
) {
256 DCHECK(url
.is_valid());
257 return StripWWW(base::ASCIIToUTF16(url
.host()));
260 bool IsPortValid(int port
) {
261 return port
>= 0 && port
<= std::numeric_limits
<uint16_t>::max();
264 bool IsWellKnownPort(int port
) {
265 return port
>= 0 && port
< 1024;
268 bool IsPortAllowedForScheme(int port
, const std::string
& url_scheme
) {
269 // Reject invalid ports.
270 if (!IsPortValid(port
))
273 // Allow explitly allowed ports for any scheme.
274 if (g_explicitly_allowed_ports
.Get().count(port
) > 0)
277 // FTP requests have an extra set of whitelisted schemes.
278 if (base::LowerCaseEqualsASCII(url_scheme
, url::kFtpScheme
)) {
279 for (int allowed_ftp_port
: kAllowedFtpPorts
) {
280 if (allowed_ftp_port
== port
)
285 // Finally check against the generic list of restricted ports for all
287 for (int restricted_port
: kRestrictedPorts
) {
288 if (restricted_port
== port
)
295 size_t GetCountOfExplicitlyAllowedPorts() {
296 return g_explicitly_allowed_ports
.Get().size();
299 // Specifies a comma separated list of port numbers that should be accepted
300 // despite bans. If the string is invalid no allowed ports are stored.
301 void SetExplicitlyAllowedPorts(const std::string
& allowed_ports
) {
302 if (allowed_ports
.empty())
305 std::multiset
<int> ports
;
307 size_t size
= allowed_ports
.size();
308 // The comma delimiter.
309 const std::string::value_type kComma
= ',';
311 // Overflow is still possible for evil user inputs.
312 for (size_t i
= 0; i
<= size
; ++i
) {
313 // The string should be composed of only digits and commas.
314 if (i
!= size
&& !base::IsAsciiDigit(allowed_ports
[i
]) &&
315 (allowed_ports
[i
] != kComma
))
317 if (i
== size
|| allowed_ports
[i
] == kComma
) {
320 base::StringToInt(base::StringPiece(allowed_ports
.begin() + last
,
321 allowed_ports
.begin() + i
),
328 g_explicitly_allowed_ports
.Get() = ports
;
331 ScopedPortException::ScopedPortException(int port
) : port_(port
) {
332 g_explicitly_allowed_ports
.Get().insert(port
);
335 ScopedPortException::~ScopedPortException() {
336 std::multiset
<int>::iterator it
=
337 g_explicitly_allowed_ports
.Get().find(port_
);
338 if (it
!= g_explicitly_allowed_ports
.Get().end())
339 g_explicitly_allowed_ports
.Get().erase(it
);
344 int SetNonBlocking(int fd
) {
346 unsigned long no_block
= 1;
347 return ioctlsocket(fd
, FIONBIO
, &no_block
);
348 #elif defined(OS_POSIX)
349 int flags
= fcntl(fd
, F_GETFL
, 0);
352 return fcntl(fd
, F_SETFL
, flags
| O_NONBLOCK
);
356 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin
,
357 std::string::const_iterator host_and_port_end
,
360 if (host_and_port_begin
>= host_and_port_end
)
363 // When using url, we use char*.
364 const char* auth_begin
= &(*host_and_port_begin
);
365 int auth_len
= host_and_port_end
- host_and_port_begin
;
367 url::Component
auth_component(0, auth_len
);
368 url::Component username_component
;
369 url::Component password_component
;
370 url::Component hostname_component
;
371 url::Component port_component
;
373 url::ParseAuthority(auth_begin
, auth_component
, &username_component
,
374 &password_component
, &hostname_component
, &port_component
);
376 // There shouldn't be a username/password.
377 if (username_component
.is_valid() || password_component
.is_valid())
380 if (!hostname_component
.is_nonempty())
381 return false; // Failed parsing.
383 int parsed_port_number
= -1;
384 if (port_component
.is_nonempty()) {
385 parsed_port_number
= url::ParsePort(auth_begin
, port_component
);
387 // If parsing failed, port_number will be either PORT_INVALID or
388 // PORT_UNSPECIFIED, both of which are negative.
389 if (parsed_port_number
< 0)
390 return false; // Failed parsing the port number.
393 if (port_component
.len
== 0)
394 return false; // Reject inputs like "foo:"
396 unsigned char tmp_ipv6_addr
[16];
398 // If the hostname starts with a bracket, it is either an IPv6 literal or
399 // invalid. If it is an IPv6 literal then strip the brackets.
400 if (hostname_component
.len
> 0 &&
401 auth_begin
[hostname_component
.begin
] == '[') {
402 if (auth_begin
[hostname_component
.end() - 1] == ']' &&
403 url::IPv6AddressToNumber(
404 auth_begin
, hostname_component
, tmp_ipv6_addr
)) {
405 // Strip the brackets.
406 hostname_component
.begin
++;
407 hostname_component
.len
-= 2;
413 // Pass results back to caller.
414 host
->assign(auth_begin
+ hostname_component
.begin
, hostname_component
.len
);
415 *port
= parsed_port_number
;
417 return true; // Success.
420 bool ParseHostAndPort(const std::string
& host_and_port
,
423 return ParseHostAndPort(
424 host_and_port
.begin(), host_and_port
.end(), host
, port
);
427 std::string
GetHostAndPort(const GURL
& url
) {
428 // For IPv6 literals, GURL::host() already includes the brackets so it is
429 // safe to just append a colon.
430 return base::StringPrintf("%s:%d", url
.host().c_str(),
431 url
.EffectiveIntPort());
434 std::string
GetHostAndOptionalPort(const GURL
& url
) {
435 // For IPv6 literals, GURL::host() already includes the brackets
436 // so it is safe to just append a colon.
438 return base::StringPrintf("%s:%s", url
.host().c_str(), url
.port().c_str());
442 bool IsHostnameNonUnique(const std::string
& hostname
) {
443 // CanonicalizeHost requires surrounding brackets to parse an IPv6 address.
444 const std::string host_or_ip
= hostname
.find(':') != std::string::npos
?
445 "[" + hostname
+ "]" : hostname
;
446 url::CanonHostInfo host_info
;
447 std::string canonical_name
= CanonicalizeHost(host_or_ip
, &host_info
);
449 // If canonicalization fails, then the input is truly malformed. However,
450 // to avoid mis-reporting bad inputs as "non-unique", treat them as unique.
451 if (canonical_name
.empty())
454 // If |hostname| is an IP address, check to see if it's in an IANA-reserved
456 if (host_info
.IsIPAddress()) {
457 IPAddressNumber host_addr
;
458 if (!ParseIPLiteralToNumber(hostname
.substr(host_info
.out_host
.begin
,
459 host_info
.out_host
.len
),
463 switch (host_info
.family
) {
464 case url::CanonHostInfo::IPV4
:
465 case url::CanonHostInfo::IPV6
:
466 return IsIPAddressReserved(host_addr
);
467 case url::CanonHostInfo::NEUTRAL
:
468 case url::CanonHostInfo::BROKEN
:
473 // Check for a registry controlled portion of |hostname|, ignoring private
474 // registries, as they already chain to ICANN-administered registries,
475 // and explicitly ignoring unknown registries.
477 // Note: This means that as new gTLDs are introduced on the Internet, they
478 // will be treated as non-unique until the registry controlled domain list
479 // is updated. However, because gTLDs are expected to provide significant
480 // advance notice to deprecate older versions of this code, this an
481 // acceptable tradeoff.
482 return 0 == registry_controlled_domains::GetRegistryLength(
484 registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
,
485 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
);
488 SockaddrStorage::SockaddrStorage(const SockaddrStorage
& other
)
489 : addr_len(other
.addr_len
),
490 addr(reinterpret_cast<struct sockaddr
*>(&addr_storage
)) {
491 memcpy(addr
, other
.addr
, addr_len
);
494 void SockaddrStorage::operator=(const SockaddrStorage
& other
) {
495 addr_len
= other
.addr_len
;
496 // addr is already set to &this->addr_storage by default ctor.
497 memcpy(addr
, other
.addr
, addr_len
);
500 // Extracts the address and port portions of a sockaddr.
501 bool GetIPAddressFromSockAddr(const struct sockaddr
* sock_addr
,
502 socklen_t sock_addr_len
,
503 const uint8_t** address
,
506 if (sock_addr
->sa_family
== AF_INET
) {
507 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in
)))
509 const struct sockaddr_in
* addr
=
510 reinterpret_cast<const struct sockaddr_in
*>(sock_addr
);
511 *address
= reinterpret_cast<const uint8_t*>(&addr
->sin_addr
);
512 *address_len
= kIPv4AddressSize
;
514 *port
= base::NetToHost16(addr
->sin_port
);
518 if (sock_addr
->sa_family
== AF_INET6
) {
519 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in6
)))
521 const struct sockaddr_in6
* addr
=
522 reinterpret_cast<const struct sockaddr_in6
*>(sock_addr
);
523 *address
= reinterpret_cast<const uint8_t*>(&addr
->sin6_addr
);
524 *address_len
= kIPv6AddressSize
;
526 *port
= base::NetToHost16(addr
->sin6_port
);
531 if (sock_addr
->sa_family
== AF_BTH
) {
532 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(SOCKADDR_BTH
)))
534 const SOCKADDR_BTH
* addr
=
535 reinterpret_cast<const SOCKADDR_BTH
*>(sock_addr
);
536 *address
= reinterpret_cast<const uint8_t*>(&addr
->btAddr
);
537 *address_len
= kBluetoothAddressSize
;
539 *port
= static_cast<uint16_t>(addr
->port
);
544 return false; // Unrecognized |sa_family|.
547 std::string
NetAddressToString(const struct sockaddr
* sa
,
548 socklen_t sock_addr_len
) {
549 const uint8_t* address
;
551 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
552 &address_len
, NULL
)) {
554 return std::string();
556 return IPAddressToString(address
, address_len
);
559 std::string
NetAddressToStringWithPort(const struct sockaddr
* sa
,
560 socklen_t sock_addr_len
) {
561 const uint8_t* address
;
564 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
565 &address_len
, &port
)) {
567 return std::string();
569 return IPAddressToStringWithPort(address
, address_len
, port
);
572 std::string
GetHostName() {
575 return std::string();
576 #else // defined(OS_NACL)
581 // Host names are limited to 255 bytes.
583 int result
= gethostname(buffer
, sizeof(buffer
));
585 DVLOG(1) << "gethostname() failed with " << result
;
588 return std::string(buffer
);
589 #endif // !defined(OS_NACL)
592 void GetIdentityFromURL(const GURL
& url
,
593 base::string16
* username
,
594 base::string16
* password
) {
595 UnescapeRule::Type flags
=
596 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
;
597 *username
= UnescapeAndDecodeUTF8URLComponent(url
.username(), flags
);
598 *password
= UnescapeAndDecodeUTF8URLComponent(url
.password(), flags
);
601 std::string
GetHostOrSpecFromURL(const GURL
& url
) {
602 return url
.has_host() ? TrimEndingDot(url
.host()) : url
.spec();
605 bool CanStripTrailingSlash(const GURL
& url
) {
606 // Omit the path only for standard, non-file URLs with nothing but "/" after
608 return url
.IsStandard() && !url
.SchemeIsFile() &&
609 !url
.SchemeIsFileSystem() && !url
.has_query() && !url
.has_ref()
610 && url
.path() == "/";
613 GURL
SimplifyUrlForRequest(const GURL
& url
) {
614 DCHECK(url
.is_valid());
615 GURL::Replacements replacements
;
616 replacements
.ClearUsername();
617 replacements
.ClearPassword();
618 replacements
.ClearRef();
619 return url
.ReplaceComponents(replacements
);
622 bool HaveOnlyLoopbackAddresses() {
623 #if defined(OS_ANDROID)
624 return android::HaveOnlyLoopbackAddresses();
625 #elif defined(OS_NACL)
628 #elif defined(OS_POSIX)
629 struct ifaddrs
* interface_addr
= NULL
;
630 int rv
= getifaddrs(&interface_addr
);
632 DVLOG(1) << "getifaddrs() failed with errno = " << errno
;
637 for (struct ifaddrs
* interface
= interface_addr
;
639 interface
= interface
->ifa_next
) {
640 if (!(IFF_UP
& interface
->ifa_flags
))
642 if (IFF_LOOPBACK
& interface
->ifa_flags
)
644 const struct sockaddr
* addr
= interface
->ifa_addr
;
647 if (addr
->sa_family
== AF_INET6
) {
648 // Safe cast since this is AF_INET6.
649 const struct sockaddr_in6
* addr_in6
=
650 reinterpret_cast<const struct sockaddr_in6
*>(addr
);
651 const struct in6_addr
* sin6_addr
= &addr_in6
->sin6_addr
;
652 if (IN6_IS_ADDR_LOOPBACK(sin6_addr
) || IN6_IS_ADDR_LINKLOCAL(sin6_addr
))
655 if (addr
->sa_family
!= AF_INET6
&& addr
->sa_family
!= AF_INET
)
661 freeifaddrs(interface_addr
);
663 #elif defined(OS_WIN)
664 // TODO(wtc): implement with the GetAdaptersAddresses function.
670 #endif // defined(various platforms)
673 AddressFamily
GetAddressFamily(const IPAddressNumber
& address
) {
674 switch (address
.size()) {
675 case kIPv4AddressSize
:
676 return ADDRESS_FAMILY_IPV4
;
677 case kIPv6AddressSize
:
678 return ADDRESS_FAMILY_IPV6
;
680 return ADDRESS_FAMILY_UNSPECIFIED
;
684 int ConvertAddressFamily(AddressFamily address_family
) {
685 switch (address_family
) {
686 case ADDRESS_FAMILY_UNSPECIFIED
:
688 case ADDRESS_FAMILY_IPV4
:
690 case ADDRESS_FAMILY_IPV6
:
697 const uint16_t* GetPortFieldFromSockaddr(const struct sockaddr
* address
,
698 socklen_t address_len
) {
699 if (address
->sa_family
== AF_INET
) {
700 DCHECK_LE(sizeof(sockaddr_in
), static_cast<size_t>(address_len
));
701 const struct sockaddr_in
* sockaddr
=
702 reinterpret_cast<const struct sockaddr_in
*>(address
);
703 return &sockaddr
->sin_port
;
704 } else if (address
->sa_family
== AF_INET6
) {
705 DCHECK_LE(sizeof(sockaddr_in6
), static_cast<size_t>(address_len
));
706 const struct sockaddr_in6
* sockaddr
=
707 reinterpret_cast<const struct sockaddr_in6
*>(address
);
708 return &sockaddr
->sin6_port
;
715 int GetPortFromSockaddr(const struct sockaddr
* address
, socklen_t address_len
) {
716 const uint16_t* port_field
= GetPortFieldFromSockaddr(address
, address_len
);
719 return base::NetToHost16(*port_field
);
722 bool ResolveLocalHostname(const std::string
& host
,
724 AddressList
* address_list
) {
725 static const unsigned char kLocalhostIPv4
[] = {127, 0, 0, 1};
726 static const unsigned char kLocalhostIPv6
[] = {
727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
729 std::string normalized_host
= NormalizeHostname(host
);
731 address_list
->clear();
733 bool is_local6
= IsLocal6Hostname(normalized_host
);
734 if (!is_local6
&& !IsLocalHostname(normalized_host
))
737 address_list
->push_back(
738 IPEndPoint(IPAddressNumber(kLocalhostIPv6
,
739 kLocalhostIPv6
+ arraysize(kLocalhostIPv6
)),
742 address_list
->push_back(
743 IPEndPoint(IPAddressNumber(kLocalhostIPv4
,
744 kLocalhostIPv4
+ arraysize(kLocalhostIPv4
)),
751 bool IsLocalhost(const std::string
& host
) {
752 std::string normalized_host
= NormalizeHostname(host
);
753 if (IsLocalHostname(normalized_host
) || IsLocal6Hostname(normalized_host
))
756 IPAddressNumber ip_number
;
757 if (ParseIPLiteralToNumber(host
, &ip_number
)) {
758 size_t size
= ip_number
.size();
760 case kIPv4AddressSize
: {
761 IPAddressNumber localhost_prefix
;
762 localhost_prefix
.push_back(127);
763 for (int i
= 0; i
< 3; ++i
) {
764 localhost_prefix
.push_back(0);
766 return IPNumberMatchesPrefix(ip_number
, localhost_prefix
, 8);
769 case kIPv6AddressSize
: {
770 struct in6_addr sin6_addr
;
771 memcpy(&sin6_addr
, &ip_number
[0], kIPv6AddressSize
);
772 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr
);
783 bool IsLocalhostTLD(const std::string
& host
) {
784 return IsNormalizedLocalhostTLD(NormalizeHostname(host
));
787 bool HasGoogleHost(const GURL
& url
) {
788 static const char* kGoogleHostSuffixes
[] = {
795 ".googleusercontent.com",
796 ".googlesyndication.com",
797 ".google-analytics.com",
798 ".googleadservices.com",
802 const std::string
& host
= url
.host();
803 for (const char* suffix
: kGoogleHostSuffixes
) {
804 if (base::EndsWith(host
, suffix
, base::CompareCase::INSENSITIVE_ASCII
))