1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef NET_BASE_NET_UTIL_H_
6 #define NET_BASE_NET_UTIL_H_
8 #include "build/build_config.h"
13 #elif defined(OS_POSIX)
14 #include <sys/types.h>
15 #include <sys/socket.h>
21 #include "base/basictypes.h"
22 #include "base/strings/string16.h"
23 #include "base/strings/utf_offset_string_conversions.h"
24 #include "net/base/address_family.h"
25 #include "net/base/escape.h"
26 #include "net/base/net_export.h"
27 #include "net/base/network_change_notifier.h"
28 // TODO(eroman): Remove this header and require consumers to include it
30 #include "net/base/network_interfaces.h"
45 // This is a "forward declaration" to avoid including ip_address_number.h
47 typedef std::vector
<unsigned char> IPAddressNumber
;
49 // Used by FormatUrl to specify handling of certain parts of the url.
50 typedef uint32_t FormatUrlType
;
51 typedef uint32_t FormatUrlTypes
;
54 // Bluetooth address size. Windows Bluetooth is supported via winsock.
55 static const size_t kBluetoothAddressSize
= 6;
58 // Nothing is ommitted.
59 NET_EXPORT
extern const FormatUrlType kFormatUrlOmitNothing
;
61 // If set, any username and password are removed.
62 NET_EXPORT
extern const FormatUrlType kFormatUrlOmitUsernamePassword
;
64 // If the scheme is 'http://', it's removed.
65 NET_EXPORT
extern const FormatUrlType kFormatUrlOmitHTTP
;
67 // Omits the path if it is just a slash and there is no query or ref. This is
68 // meaningful for non-file "standard" URLs.
69 NET_EXPORT
extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname
;
71 // Convenience for omitting all unecessary types.
72 NET_EXPORT
extern const FormatUrlType kFormatUrlOmitAll
;
74 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
75 // Saves the result into |*host| and |*port|. If the input did not have
76 // the optional port, sets |*port| to -1.
77 // Returns true if the parsing was successful, false otherwise.
78 // The returned host is NOT canonicalized, and may be invalid.
80 // IPv6 literals must be specified in a bracketed form, for instance:
83 // The resultant |*host| in both cases will be "::1" (not bracketed).
84 NET_EXPORT
bool ParseHostAndPort(
85 std::string::const_iterator host_and_port_begin
,
86 std::string::const_iterator host_and_port_end
,
89 NET_EXPORT
bool ParseHostAndPort(
90 const std::string
& host_and_port
,
94 // Returns a host:port string for the given URL.
95 NET_EXPORT
std::string
GetHostAndPort(const GURL
& url
);
97 // Returns a host[:port] string for the given URL, where the port is omitted
98 // if it is the default for the URL's scheme.
99 NET_EXPORT_PRIVATE
std::string
GetHostAndOptionalPort(const GURL
& url
);
101 // Returns true if |hostname| contains a non-registerable or non-assignable
102 // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
103 // that falls in an IANA-reserved range.
104 NET_EXPORT
bool IsHostnameNonUnique(const std::string
& hostname
);
106 // Convenience struct for when you need a |struct sockaddr|.
107 struct SockaddrStorage
{
108 SockaddrStorage() : addr_len(sizeof(addr_storage
)),
109 addr(reinterpret_cast<struct sockaddr
*>(&addr_storage
)) {}
110 SockaddrStorage(const SockaddrStorage
& other
);
111 void operator=(const SockaddrStorage
& other
);
113 struct sockaddr_storage addr_storage
;
115 struct sockaddr
* const addr
;
118 // Extracts the IP address and port portions of a sockaddr. |port| is optional,
119 // and will not be filled in if NULL.
120 bool GetIPAddressFromSockAddr(const struct sockaddr
* sock_addr
,
121 socklen_t sock_addr_len
,
122 const unsigned char** address
,
126 // Same as IPAddressToString() but for a sockaddr. This output will not include
127 // the IPv6 scope ID.
128 NET_EXPORT
std::string
NetAddressToString(const struct sockaddr
* sa
,
129 socklen_t sock_addr_len
);
131 // Same as IPAddressToStringWithPort() but for a sockaddr. This output will not
132 // include the IPv6 scope ID.
133 NET_EXPORT
std::string
NetAddressToStringWithPort(const struct sockaddr
* sa
,
134 socklen_t sock_addr_len
);
136 // Returns the hostname of the current system. Returns empty string on failure.
137 NET_EXPORT
std::string
GetHostName();
139 // Extracts the unescaped username/password from |url|, saving the results
140 // into |*username| and |*password|.
141 NET_EXPORT_PRIVATE
void GetIdentityFromURL(const GURL
& url
,
142 base::string16
* username
,
143 base::string16
* password
);
145 // Returns either the host from |url|, or, if the host is empty, the full spec.
146 NET_EXPORT
std::string
GetHostOrSpecFromURL(const GURL
& url
);
148 // Return the value of the HTTP response header with name 'name'. 'headers'
149 // should be in the format that URLRequest::GetResponseHeaders() returns.
150 // Returns the empty string if the header is not found.
151 NET_EXPORT
std::string
GetSpecificHeader(const std::string
& headers
,
152 const std::string
& name
);
154 // Converts the given host name to unicode characters. This can be called for
155 // any host name, if the input is not IDN or is invalid in some way, we'll just
156 // return the ASCII source so it is still usable.
158 // The input should be the canonicalized ASCII host name from GURL. This
159 // function does NOT accept UTF-8!
161 // |languages| is a comma separated list of ISO 639 language codes. It
162 // is used to determine whether a hostname is 'comprehensible' to a user
163 // who understands languages listed. |host| will be converted to a
164 // human-readable form (Unicode) ONLY when each component of |host| is
165 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that
166 // Latin letters in the ASCII range can be mixed with a limited set of
167 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
168 // When |languages| is empty, even that mixing is not allowed.
169 NET_EXPORT
base::string16
IDNToUnicode(const std::string
& host
,
170 const std::string
& languages
);
172 // Canonicalizes |host| and returns it. Also fills |host_info| with
173 // IP address information. |host_info| must not be NULL.
174 NET_EXPORT
std::string
CanonicalizeHost(const std::string
& host
,
175 url::CanonHostInfo
* host_info
);
177 // Returns true if |host| is not an IP address and is compliant with a set of
178 // rules based on RFC 1738 and tweaked to be compatible with the real world.
180 // * One or more components separated by '.'
181 // * Each component contains only alphanumeric characters and '-' or '_'
182 // * The last component begins with an alphanumeric character
183 // * Optional trailing dot after last component (means "treat as FQDN")
185 // NOTE: You should only pass in hosts that have been returned from
186 // CanonicalizeHost(), or you may not get accurate results.
187 NET_EXPORT
bool IsCanonicalizedHostCompliant(const std::string
& host
);
189 // Call these functions to get the html snippet for a directory listing.
190 // The return values of both functions are in UTF-8.
191 NET_EXPORT
std::string
GetDirectoryListingHeader(const base::string16
& title
);
193 // Given the name of a file in a directory (ftp or local) and
194 // other information (is_dir, size, modification time), it returns
195 // the html snippet to add the entry for the file to the directory listing.
196 // Currently, it's a script tag containing a call to a Javascript function
199 // |name| is the file name to be displayed. |raw_bytes| will be used
200 // as the actual target of the link (so for example, ftp links should use
201 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
204 // Both |name| and |raw_bytes| are escaped internally.
205 NET_EXPORT
std::string
GetDirectoryListingEntry(const base::string16
& name
,
206 const std::string
& raw_bytes
,
209 base::Time modified
);
211 // If text starts with "www." it is removed, otherwise text is returned
213 NET_EXPORT
base::string16
StripWWW(const base::string16
& text
);
215 // Runs |url|'s host through StripWWW(). |url| must be valid.
216 NET_EXPORT
base::string16
StripWWWFromHost(const GURL
& url
);
218 // Checks if |port| is in the valid range (0 to 65535, though 0 is technically
219 // reserved). Should be used before casting a port to a uint16_t.
220 NET_EXPORT
bool IsPortValid(int port
);
222 // Returns true if the port is in the range [0, 1023]. These ports are
223 // registered by IANA and typically need root access to listen on.
224 bool IsWellKnownPort(int port
);
226 // Checks if the port is allowed for the specified scheme. Ports set as allowed
227 // with SetExplicitlyAllowedPorts() or by using ScopedPortException() will be
228 // considered allowed for any scheme.
229 NET_EXPORT
bool IsPortAllowedForScheme(int port
, const std::string
& url_scheme
);
231 // Returns the number of explicitly allowed ports; for testing.
232 NET_EXPORT_PRIVATE
size_t GetCountOfExplicitlyAllowedPorts();
234 NET_EXPORT
void SetExplicitlyAllowedPorts(const std::string
& allowed_ports
);
236 class NET_EXPORT ScopedPortException
{
238 explicit ScopedPortException(int port
);
239 ~ScopedPortException();
244 DISALLOW_COPY_AND_ASSIGN(ScopedPortException
);
247 // Set socket to non-blocking mode
248 NET_EXPORT
int SetNonBlocking(int fd
);
250 // Formats the host in |url| and appends it to |output|. The host formatter
251 // takes the same accept languages component as ElideURL().
252 NET_EXPORT
void AppendFormattedHost(const GURL
& url
,
253 const std::string
& languages
,
254 base::string16
* output
);
256 // Creates a string representation of |url|. The IDN host name may be in Unicode
257 // if |languages| accepts the Unicode representation. |format_type| is a bitmask
258 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
259 // the URL for human readability. You will generally want |UnescapeRule::SPACES|
260 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
261 // if not. If the path part and the query part seem to be encoded in %-encoded
262 // UTF-8, decodes %-encoding and UTF-8.
264 // The last three parameters may be NULL.
266 // |new_parsed| will be set to the parsing parameters of the resultant URL.
268 // |prefix_end| will be the length before the hostname of the resultant URL.
270 // |offset[s]_for_adjustment| specifies one or more offsets into the original
271 // URL, representing insertion or selection points between characters: if the
272 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
273 // between the scheme and the host, and offset 15 is after the end of the URL.
274 // Valid input offsets range from 0 to the length of the input URL string. On
275 // exit, each offset will have been modified to reflect any changes made to the
276 // output string. For example, if |url| is "http://a:b@c.com/",
277 // |omit_username_password| is true, and an offset is 12 (pointing between 'c'
278 // and '.'), then on return the output string will be "http://c.com/" and the
279 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because
280 // it points into the middle of a component that was entirely removed or into
281 // the middle of an encoding sequence), it will be set to base::string16::npos.
282 // For consistency, if an input offset points between the scheme and the
283 // username/password, and both are removed, on output this offset will be 0
284 // rather than npos; this means that offsets at the starts and ends of removed
285 // components are always transformed the same way regardless of what other
286 // components are adjacent.
287 NET_EXPORT
base::string16
FormatUrl(const GURL
& url
,
288 const std::string
& languages
,
289 FormatUrlTypes format_types
,
290 UnescapeRule::Type unescape_rules
,
291 url::Parsed
* new_parsed
,
293 size_t* offset_for_adjustment
);
294 NET_EXPORT
base::string16
FormatUrlWithOffsets(
296 const std::string
& languages
,
297 FormatUrlTypes format_types
,
298 UnescapeRule::Type unescape_rules
,
299 url::Parsed
* new_parsed
,
301 std::vector
<size_t>* offsets_for_adjustment
);
302 // This function is like those above except it takes |adjustments| rather
303 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all
304 // the transformations that happened to |url| to convert it into the returned
306 NET_EXPORT
base::string16
FormatUrlWithAdjustments(
308 const std::string
& languages
,
309 FormatUrlTypes format_types
,
310 UnescapeRule::Type unescape_rules
,
311 url::Parsed
* new_parsed
,
313 base::OffsetAdjuster::Adjustments
* adjustments
);
315 // This is a convenience function for FormatUrl() with
316 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
317 // set of flags for "URLs to display to the user". You should be cautious about
318 // using this for URLs which will be parsed or sent to other applications.
319 inline base::string16
FormatUrl(const GURL
& url
, const std::string
& languages
) {
320 return FormatUrl(url
, languages
, kFormatUrlOmitAll
, UnescapeRule::SPACES
,
324 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a
325 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
326 NET_EXPORT
bool CanStripTrailingSlash(const GURL
& url
);
328 // Strip the portions of |url| that aren't core to the network request.
329 // - user name / password
330 // - reference section
331 NET_EXPORT_PRIVATE GURL
SimplifyUrlForRequest(const GURL
& url
);
333 // Returns true if it can determine that only loopback addresses are configured.
334 // i.e. if only 127.0.0.1 and ::1 are routable.
335 // Also returns false if it cannot determine this.
336 bool HaveOnlyLoopbackAddresses();
338 // Returns AddressFamily of the address.
339 NET_EXPORT_PRIVATE AddressFamily
GetAddressFamily(
340 const IPAddressNumber
& address
);
342 // Maps the given AddressFamily to either AF_INET, AF_INET6 or AF_UNSPEC.
343 NET_EXPORT_PRIVATE
int ConvertAddressFamily(AddressFamily address_family
);
345 // Retuns the port field of the |sockaddr|.
346 const uint16_t* GetPortFieldFromSockaddr(const struct sockaddr
* address
,
347 socklen_t address_len
);
348 // Returns the value of port in |sockaddr| (in host byte ordering).
349 NET_EXPORT_PRIVATE
int GetPortFromSockaddr(const struct sockaddr
* address
,
350 socklen_t address_len
);
352 // Returns true if |host| is one of the names (e.g. "localhost") or IP
353 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback.
355 // Note that this function does not check for IP addresses other than
356 // the above, although other IP addresses may point to the local
358 NET_EXPORT_PRIVATE
bool IsLocalhost(const std::string
& host
);
360 NET_EXPORT_PRIVATE
bool IsLocalhostTLD(const std::string
& host
);
362 // Returns true if the url's host is a Google server. This should only be used
363 // for histograms and shouldn't be used to affect behavior.
364 NET_EXPORT_PRIVATE
bool HasGoogleHost(const GURL
& url
);
366 // A subset of IP address attributes which are actionable by the
367 // application layer. Currently unimplemented for all hosts;
368 // IP_ADDRESS_ATTRIBUTE_NONE is always returned.
369 enum IPAddressAttributes
{
370 IP_ADDRESS_ATTRIBUTE_NONE
= 0,
372 // A temporary address is dynamic by nature and will not contain MAC
373 // address. Presence of MAC address in IPv6 addresses can be used to
374 // track an endpoint and cause privacy concern. Please refer to
376 IP_ADDRESS_ATTRIBUTE_TEMPORARY
= 1 << 0,
378 // A temporary address could become deprecated once the preferred
379 // lifetime is reached. It is still valid but shouldn't be used to
380 // create new connections.
381 IP_ADDRESS_ATTRIBUTE_DEPRECATED
= 1 << 1,
384 // Differentiated Services Code Point.
385 // See http://tools.ietf.org/html/rfc2474 for details.
386 enum DiffServCodePoint
{
388 DSCP_FIRST
= DSCP_NO_CHANGE
,
389 DSCP_DEFAULT
= 0, // Same as DSCP_CS0
390 DSCP_CS0
= 0, // The default
391 DSCP_CS1
= 8, // Bulk/background traffic
404 DSCP_AF41
= 34, // Video
405 DSCP_AF42
= 36, // Video
406 DSCP_AF43
= 38, // Video
407 DSCP_CS5
= 40, // Video
408 DSCP_EF
= 46, // Voice
409 DSCP_CS6
= 48, // Voice
410 DSCP_CS7
= 56, // Control messages
416 #endif // NET_BASE_NET_UTIL_H_