1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/net_util.h"
15 #include "build/build_config.h"
22 #pragma comment(lib, "iphlpapi.lib")
23 #elif defined(OS_POSIX)
26 #include <netinet/in.h>
30 #if !defined(OS_ANDROID)
32 #endif // !defined(OS_NACL)
33 #endif // !defined(OS_ANDROID)
34 #endif // defined(OS_POSIX)
36 #include "base/basictypes.h"
37 #include "base/json/string_escape.h"
38 #include "base/lazy_instance.h"
39 #include "base/logging.h"
40 #include "base/strings/string_number_conversions.h"
41 #include "base/strings/string_piece.h"
42 #include "base/strings/string_split.h"
43 #include "base/strings/string_util.h"
44 #include "base/strings/stringprintf.h"
45 #include "base/strings/utf_string_conversions.h"
46 #include "base/sys_byteorder.h"
47 #include "base/values.h"
48 #include "net/base/dns_util.h"
49 #include "net/base/ip_address_number.h"
50 #include "net/base/net_module.h"
51 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
52 #include "net/grit/net_resources.h"
53 #include "net/http/http_content_disposition.h"
55 #include "url/third_party/mozilla/url_parse.h"
56 #include "url/url_canon.h"
57 #include "url/url_canon_ip.h"
59 #if defined(OS_ANDROID)
60 #include "net/android/network_library.h"
63 #include "net/base/winsock_init.h"
70 // The general list of blocked ports. Will be blocked unless a specific
71 // protocol overrides it. (Ex: ftp can use ports 20 and 21)
72 static const int kRestrictedPorts
[] = {
106 135, // loc-srv /epmap
129 3659, // apple-sasl / PasswordServer
132 6665, // Alternate IRC [Apple addition]
133 6666, // Alternate IRC [Apple addition]
134 6667, // Standard IRC [Apple addition]
135 6668, // Alternate IRC [Apple addition]
136 6669, // Alternate IRC [Apple addition]
137 0xFFFF, // Used to block all invalid port numbers (see
138 // third_party/WebKit/Source/platform/weborigin/KURL.cpp,
142 // FTP overrides the following restricted ports.
143 static const int kAllowedFtpPorts
[] = {
150 static base::LazyInstance
<std::multiset
<int> >::Leaky
151 g_explicitly_allowed_ports
= LAZY_INSTANCE_INITIALIZER
;
153 size_t GetCountOfExplicitlyAllowedPorts() {
154 return g_explicitly_allowed_ports
.Get().size();
157 std::string
GetSpecificHeader(const std::string
& headers
,
158 const std::string
& name
) {
159 // We want to grab the Value from the "Key: Value" pairs in the headers,
160 // which should look like this (no leading spaces, \n-separated) (we format
161 // them this way in url_request_inet.cc):
163 // ETag: "6d0b8-947-24f35ec0"\n
164 // Content-Length: 2375\n
165 // Content-Type: text/html; charset=UTF-8\n
166 // Last-Modified: Sun, 03 Sep 2006 04:34:43 GMT\n
168 return std::string();
170 std::string
match('\n' + name
+ ':');
172 std::string::const_iterator begin
=
173 std::search(headers
.begin(), headers
.end(), match
.begin(), match
.end(),
174 base::CaseInsensitiveCompareASCII
<char>());
176 if (begin
== headers
.end())
177 return std::string();
179 begin
+= match
.length();
182 base::TrimWhitespace(std::string(begin
,
183 std::find(begin
, headers
.end(), '\n')),
184 base::TRIM_ALL
, &ret
);
188 std::string
CanonicalizeHost(const std::string
& host
,
189 url::CanonHostInfo
* host_info
) {
190 // Try to canonicalize the host.
191 const url::Component
raw_host_component(0, static_cast<int>(host
.length()));
192 std::string canon_host
;
193 url::StdStringCanonOutput
canon_host_output(&canon_host
);
194 url::CanonicalizeHostVerbose(host
.c_str(), raw_host_component
,
195 &canon_host_output
, host_info
);
197 if (host_info
->out_host
.is_nonempty() &&
198 host_info
->family
!= url::CanonHostInfo::BROKEN
) {
199 // Success! Assert that there's no extra garbage.
200 canon_host_output
.Complete();
201 DCHECK_EQ(host_info
->out_host
.len
, static_cast<int>(canon_host
.length()));
203 // Empty host, or canonicalization failed. We'll return empty.
210 std::string
GetDirectoryListingHeader(const base::string16
& title
) {
211 static const base::StringPiece
header(
212 NetModule::GetResource(IDR_DIR_HEADER_HTML
));
213 // This can be null in unit tests.
214 DLOG_IF(WARNING
, header
.empty()) <<
215 "Missing resource: directory listing header";
219 result
.assign(header
.data(), header
.size());
221 result
.append("<script>start(");
222 base::EscapeJSONString(title
, true, &result
);
223 result
.append(");</script>\n");
228 inline bool IsHostCharAlphanumeric(char c
) {
229 // We can just check lowercase because uppercase characters have already been
231 return ((c
>= 'a') && (c
<= 'z')) || ((c
>= '0') && (c
<= '9'));
234 bool IsCanonicalizedHostCompliant(const std::string
& host
) {
238 bool in_component
= false;
239 bool most_recent_component_started_alphanumeric
= false;
241 for (std::string::const_iterator
i(host
.begin()); i
!= host
.end(); ++i
) {
244 most_recent_component_started_alphanumeric
= IsHostCharAlphanumeric(c
);
245 if (!most_recent_component_started_alphanumeric
&& (c
!= '-') &&
250 } else if (c
== '.') {
251 in_component
= false;
252 } else if (!IsHostCharAlphanumeric(c
) && (c
!= '-') && (c
!= '_')) {
257 return most_recent_component_started_alphanumeric
;
260 base::string16
StripWWW(const base::string16
& text
) {
261 const base::string16
www(base::ASCIIToUTF16("www."));
262 return StartsWith(text
, www
, true) ? text
.substr(www
.length()) : text
;
265 base::string16
StripWWWFromHost(const GURL
& url
) {
266 DCHECK(url
.is_valid());
267 return StripWWW(base::ASCIIToUTF16(url
.host()));
270 bool IsPortValid(int port
) {
271 return port
>= 0 && port
<= std::numeric_limits
<uint16_t>::max();
274 bool IsPortAllowedByDefault(int port
) {
275 int array_size
= arraysize(kRestrictedPorts
);
276 for (int i
= 0; i
< array_size
; i
++) {
277 if (kRestrictedPorts
[i
] == port
) {
281 return IsPortValid(port
);
284 bool IsPortAllowedByFtp(int port
) {
285 int array_size
= arraysize(kAllowedFtpPorts
);
286 for (int i
= 0; i
< array_size
; i
++) {
287 if (kAllowedFtpPorts
[i
] == port
) {
291 // Port not explicitly allowed by FTP, so return the default restrictions.
292 return IsPortAllowedByDefault(port
);
295 bool IsPortAllowedByOverride(int port
) {
296 if (g_explicitly_allowed_ports
.Get().empty())
299 return g_explicitly_allowed_ports
.Get().count(port
) > 0;
302 int SetNonBlocking(int fd
) {
304 unsigned long no_block
= 1;
305 return ioctlsocket(fd
, FIONBIO
, &no_block
);
306 #elif defined(OS_POSIX)
307 int flags
= fcntl(fd
, F_GETFL
, 0);
310 return fcntl(fd
, F_SETFL
, flags
| O_NONBLOCK
);
314 bool ParseHostAndPort(std::string::const_iterator host_and_port_begin
,
315 std::string::const_iterator host_and_port_end
,
318 if (host_and_port_begin
>= host_and_port_end
)
321 // When using url, we use char*.
322 const char* auth_begin
= &(*host_and_port_begin
);
323 int auth_len
= host_and_port_end
- host_and_port_begin
;
325 url::Component
auth_component(0, auth_len
);
326 url::Component username_component
;
327 url::Component password_component
;
328 url::Component hostname_component
;
329 url::Component port_component
;
331 url::ParseAuthority(auth_begin
, auth_component
, &username_component
,
332 &password_component
, &hostname_component
, &port_component
);
334 // There shouldn't be a username/password.
335 if (username_component
.is_valid() || password_component
.is_valid())
338 if (!hostname_component
.is_nonempty())
339 return false; // Failed parsing.
341 int parsed_port_number
= -1;
342 if (port_component
.is_nonempty()) {
343 parsed_port_number
= url::ParsePort(auth_begin
, port_component
);
345 // If parsing failed, port_number will be either PORT_INVALID or
346 // PORT_UNSPECIFIED, both of which are negative.
347 if (parsed_port_number
< 0)
348 return false; // Failed parsing the port number.
351 if (port_component
.len
== 0)
352 return false; // Reject inputs like "foo:"
354 unsigned char tmp_ipv6_addr
[16];
356 // If the hostname starts with a bracket, it is either an IPv6 literal or
357 // invalid. If it is an IPv6 literal then strip the brackets.
358 if (hostname_component
.len
> 0 &&
359 auth_begin
[hostname_component
.begin
] == '[') {
360 if (auth_begin
[hostname_component
.end() - 1] == ']' &&
361 url::IPv6AddressToNumber(
362 auth_begin
, hostname_component
, tmp_ipv6_addr
)) {
363 // Strip the brackets.
364 hostname_component
.begin
++;
365 hostname_component
.len
-= 2;
371 // Pass results back to caller.
372 host
->assign(auth_begin
+ hostname_component
.begin
, hostname_component
.len
);
373 *port
= parsed_port_number
;
375 return true; // Success.
378 bool ParseHostAndPort(const std::string
& host_and_port
,
381 return ParseHostAndPort(
382 host_and_port
.begin(), host_and_port
.end(), host
, port
);
385 std::string
GetHostAndPort(const GURL
& url
) {
386 // For IPv6 literals, GURL::host() already includes the brackets so it is
387 // safe to just append a colon.
388 return base::StringPrintf("%s:%d", url
.host().c_str(),
389 url
.EffectiveIntPort());
392 std::string
GetHostAndOptionalPort(const GURL
& url
) {
393 // For IPv6 literals, GURL::host() already includes the brackets
394 // so it is safe to just append a colon.
396 return base::StringPrintf("%s:%s", url
.host().c_str(), url
.port().c_str());
400 bool IsHostnameNonUnique(const std::string
& hostname
) {
401 // CanonicalizeHost requires surrounding brackets to parse an IPv6 address.
402 const std::string host_or_ip
= hostname
.find(':') != std::string::npos
?
403 "[" + hostname
+ "]" : hostname
;
404 url::CanonHostInfo host_info
;
405 std::string canonical_name
= CanonicalizeHost(host_or_ip
, &host_info
);
407 // If canonicalization fails, then the input is truly malformed. However,
408 // to avoid mis-reporting bad inputs as "non-unique", treat them as unique.
409 if (canonical_name
.empty())
412 // If |hostname| is an IP address, check to see if it's in an IANA-reserved
414 if (host_info
.IsIPAddress()) {
415 IPAddressNumber host_addr
;
416 if (!ParseIPLiteralToNumber(hostname
.substr(host_info
.out_host
.begin
,
417 host_info
.out_host
.len
),
421 switch (host_info
.family
) {
422 case url::CanonHostInfo::IPV4
:
423 case url::CanonHostInfo::IPV6
:
424 return IsIPAddressReserved(host_addr
);
425 case url::CanonHostInfo::NEUTRAL
:
426 case url::CanonHostInfo::BROKEN
:
431 // Check for a registry controlled portion of |hostname|, ignoring private
432 // registries, as they already chain to ICANN-administered registries,
433 // and explicitly ignoring unknown registries.
435 // Note: This means that as new gTLDs are introduced on the Internet, they
436 // will be treated as non-unique until the registry controlled domain list
437 // is updated. However, because gTLDs are expected to provide significant
438 // advance notice to deprecate older versions of this code, this an
439 // acceptable tradeoff.
440 return 0 == registry_controlled_domains::GetRegistryLength(
442 registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES
,
443 registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES
);
446 SockaddrStorage::SockaddrStorage(const SockaddrStorage
& other
)
447 : addr_len(other
.addr_len
),
448 addr(reinterpret_cast<struct sockaddr
*>(&addr_storage
)) {
449 memcpy(addr
, other
.addr
, addr_len
);
452 void SockaddrStorage::operator=(const SockaddrStorage
& other
) {
453 addr_len
= other
.addr_len
;
454 // addr is already set to &this->addr_storage by default ctor.
455 memcpy(addr
, other
.addr
, addr_len
);
458 // Extracts the address and port portions of a sockaddr.
459 bool GetIPAddressFromSockAddr(const struct sockaddr
* sock_addr
,
460 socklen_t sock_addr_len
,
461 const uint8_t** address
,
464 if (sock_addr
->sa_family
== AF_INET
) {
465 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in
)))
467 const struct sockaddr_in
* addr
=
468 reinterpret_cast<const struct sockaddr_in
*>(sock_addr
);
469 *address
= reinterpret_cast<const uint8_t*>(&addr
->sin_addr
);
470 *address_len
= kIPv4AddressSize
;
472 *port
= base::NetToHost16(addr
->sin_port
);
476 if (sock_addr
->sa_family
== AF_INET6
) {
477 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(struct sockaddr_in6
)))
479 const struct sockaddr_in6
* addr
=
480 reinterpret_cast<const struct sockaddr_in6
*>(sock_addr
);
481 *address
= reinterpret_cast<const uint8_t*>(&addr
->sin6_addr
);
482 *address_len
= kIPv6AddressSize
;
484 *port
= base::NetToHost16(addr
->sin6_port
);
489 if (sock_addr
->sa_family
== AF_BTH
) {
490 if (sock_addr_len
< static_cast<socklen_t
>(sizeof(SOCKADDR_BTH
)))
492 const SOCKADDR_BTH
* addr
=
493 reinterpret_cast<const SOCKADDR_BTH
*>(sock_addr
);
494 *address
= reinterpret_cast<const uint8_t*>(&addr
->btAddr
);
495 *address_len
= kBluetoothAddressSize
;
497 *port
= static_cast<uint16_t>(addr
->port
);
502 return false; // Unrecognized |sa_family|.
505 std::string
NetAddressToString(const struct sockaddr
* sa
,
506 socklen_t sock_addr_len
) {
507 const uint8_t* address
;
509 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
510 &address_len
, NULL
)) {
512 return std::string();
514 return IPAddressToString(address
, address_len
);
517 std::string
NetAddressToStringWithPort(const struct sockaddr
* sa
,
518 socklen_t sock_addr_len
) {
519 const uint8_t* address
;
522 if (!GetIPAddressFromSockAddr(sa
, sock_addr_len
, &address
,
523 &address_len
, &port
)) {
525 return std::string();
527 return IPAddressToStringWithPort(address
, address_len
, port
);
530 std::string
GetHostName() {
533 return std::string();
534 #else // defined(OS_NACL)
539 // Host names are limited to 255 bytes.
541 int result
= gethostname(buffer
, sizeof(buffer
));
543 DVLOG(1) << "gethostname() failed with " << result
;
546 return std::string(buffer
);
547 #endif // !defined(OS_NACL)
550 void GetIdentityFromURL(const GURL
& url
,
551 base::string16
* username
,
552 base::string16
* password
) {
553 UnescapeRule::Type flags
=
554 UnescapeRule::SPACES
| UnescapeRule::URL_SPECIAL_CHARS
;
555 *username
= UnescapeAndDecodeUTF8URLComponent(url
.username(), flags
);
556 *password
= UnescapeAndDecodeUTF8URLComponent(url
.password(), flags
);
559 std::string
GetHostOrSpecFromURL(const GURL
& url
) {
560 return url
.has_host() ? TrimEndingDot(url
.host()) : url
.spec();
563 bool CanStripTrailingSlash(const GURL
& url
) {
564 // Omit the path only for standard, non-file URLs with nothing but "/" after
566 return url
.IsStandard() && !url
.SchemeIsFile() &&
567 !url
.SchemeIsFileSystem() && !url
.has_query() && !url
.has_ref()
568 && url
.path() == "/";
571 GURL
SimplifyUrlForRequest(const GURL
& url
) {
572 DCHECK(url
.is_valid());
573 GURL::Replacements replacements
;
574 replacements
.ClearUsername();
575 replacements
.ClearPassword();
576 replacements
.ClearRef();
577 return url
.ReplaceComponents(replacements
);
580 // Specifies a comma separated list of port numbers that should be accepted
581 // despite bans. If the string is invalid no allowed ports are stored.
582 void SetExplicitlyAllowedPorts(const std::string
& allowed_ports
) {
583 if (allowed_ports
.empty())
586 std::multiset
<int> ports
;
588 size_t size
= allowed_ports
.size();
589 // The comma delimiter.
590 const std::string::value_type kComma
= ',';
592 // Overflow is still possible for evil user inputs.
593 for (size_t i
= 0; i
<= size
; ++i
) {
594 // The string should be composed of only digits and commas.
595 if (i
!= size
&& !IsAsciiDigit(allowed_ports
[i
]) &&
596 (allowed_ports
[i
] != kComma
))
598 if (i
== size
|| allowed_ports
[i
] == kComma
) {
601 base::StringToInt(base::StringPiece(allowed_ports
.begin() + last
,
602 allowed_ports
.begin() + i
),
609 g_explicitly_allowed_ports
.Get() = ports
;
612 ScopedPortException::ScopedPortException(int port
) : port_(port
) {
613 g_explicitly_allowed_ports
.Get().insert(port
);
616 ScopedPortException::~ScopedPortException() {
617 std::multiset
<int>::iterator it
=
618 g_explicitly_allowed_ports
.Get().find(port_
);
619 if (it
!= g_explicitly_allowed_ports
.Get().end())
620 g_explicitly_allowed_ports
.Get().erase(it
);
625 bool HaveOnlyLoopbackAddresses() {
626 #if defined(OS_ANDROID)
627 return android::HaveOnlyLoopbackAddresses();
628 #elif defined(OS_NACL)
631 #elif defined(OS_POSIX)
632 struct ifaddrs
* interface_addr
= NULL
;
633 int rv
= getifaddrs(&interface_addr
);
635 DVLOG(1) << "getifaddrs() failed with errno = " << errno
;
640 for (struct ifaddrs
* interface
= interface_addr
;
642 interface
= interface
->ifa_next
) {
643 if (!(IFF_UP
& interface
->ifa_flags
))
645 if (IFF_LOOPBACK
& interface
->ifa_flags
)
647 const struct sockaddr
* addr
= interface
->ifa_addr
;
650 if (addr
->sa_family
== AF_INET6
) {
651 // Safe cast since this is AF_INET6.
652 const struct sockaddr_in6
* addr_in6
=
653 reinterpret_cast<const struct sockaddr_in6
*>(addr
);
654 const struct in6_addr
* sin6_addr
= &addr_in6
->sin6_addr
;
655 if (IN6_IS_ADDR_LOOPBACK(sin6_addr
) || IN6_IS_ADDR_LINKLOCAL(sin6_addr
))
658 if (addr
->sa_family
!= AF_INET6
&& addr
->sa_family
!= AF_INET
)
664 freeifaddrs(interface_addr
);
666 #elif defined(OS_WIN)
667 // TODO(wtc): implement with the GetAdaptersAddresses function.
673 #endif // defined(various platforms)
676 AddressFamily
GetAddressFamily(const IPAddressNumber
& address
) {
677 switch (address
.size()) {
678 case kIPv4AddressSize
:
679 return ADDRESS_FAMILY_IPV4
;
680 case kIPv6AddressSize
:
681 return ADDRESS_FAMILY_IPV6
;
683 return ADDRESS_FAMILY_UNSPECIFIED
;
687 int ConvertAddressFamily(AddressFamily address_family
) {
688 switch (address_family
) {
689 case ADDRESS_FAMILY_UNSPECIFIED
:
691 case ADDRESS_FAMILY_IPV4
:
693 case ADDRESS_FAMILY_IPV6
:
700 const uint16_t* GetPortFieldFromSockaddr(const struct sockaddr
* address
,
701 socklen_t address_len
) {
702 if (address
->sa_family
== AF_INET
) {
703 DCHECK_LE(sizeof(sockaddr_in
), static_cast<size_t>(address_len
));
704 const struct sockaddr_in
* sockaddr
=
705 reinterpret_cast<const struct sockaddr_in
*>(address
);
706 return &sockaddr
->sin_port
;
707 } else if (address
->sa_family
== AF_INET6
) {
708 DCHECK_LE(sizeof(sockaddr_in6
), static_cast<size_t>(address_len
));
709 const struct sockaddr_in6
* sockaddr
=
710 reinterpret_cast<const struct sockaddr_in6
*>(address
);
711 return &sockaddr
->sin6_port
;
718 int GetPortFromSockaddr(const struct sockaddr
* address
, socklen_t address_len
) {
719 const uint16_t* port_field
= GetPortFieldFromSockaddr(address
, address_len
);
722 return base::NetToHost16(*port_field
);
725 bool IsLocalhost(const std::string
& host
) {
726 if (host
== "localhost" || host
== "localhost.localdomain" ||
727 host
== "localhost6" || host
== "localhost6.localdomain6" ||
728 IsLocalhostTLD(host
))
731 IPAddressNumber ip_number
;
732 if (ParseIPLiteralToNumber(host
, &ip_number
)) {
733 size_t size
= ip_number
.size();
735 case kIPv4AddressSize
: {
736 IPAddressNumber localhost_prefix
;
737 localhost_prefix
.push_back(127);
738 for (int i
= 0; i
< 3; ++i
) {
739 localhost_prefix
.push_back(0);
741 return IPNumberMatchesPrefix(ip_number
, localhost_prefix
, 8);
744 case kIPv6AddressSize
: {
745 struct in6_addr sin6_addr
;
746 memcpy(&sin6_addr
, &ip_number
[0], kIPv6AddressSize
);
747 return !!IN6_IS_ADDR_LOOPBACK(&sin6_addr
);
758 bool IsLocalhostTLD(const std::string
& host
) {
759 const char kLocalhostTLD
[] = ".localhost";
760 const size_t kLocalhostTLDLength
= arraysize(kLocalhostTLD
) - 1;
765 size_t host_len
= host
.size();
766 if (*host
.rbegin() == '.')
768 if (host_len
< kLocalhostTLDLength
)
771 const char* host_suffix
= host
.data() + host_len
- kLocalhostTLDLength
;
772 return base::strncasecmp(host_suffix
, kLocalhostTLD
, kLocalhostTLDLength
) ==
776 bool HasGoogleHost(const GURL
& url
) {
777 static const char* kGoogleHostSuffixes
[] = {
784 ".googleusercontent.com",
785 ".googlesyndication.com",
786 ".google-analytics.com",
787 ".googleadservices.com",
791 const std::string
& host
= url
.host();
792 for (const char* suffix
: kGoogleHostSuffixes
) {
793 if (EndsWith(host
, suffix
, false))