Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / net / base / net_util.h
blob1a32fbe1f00d09f2fbbbb9f94251a1ba0c23ae1a
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef NET_BASE_NET_UTIL_H_
6 #define NET_BASE_NET_UTIL_H_
8 #include "build/build_config.h"
10 #if defined(OS_WIN)
11 #include <windows.h>
12 #include <ws2tcpip.h>
13 #elif defined(OS_POSIX)
14 #include <sys/types.h>
15 #include <sys/socket.h>
16 #endif
18 #include <string>
19 #include <vector>
21 #include "base/basictypes.h"
22 #include "base/strings/string16.h"
23 #include "base/strings/utf_offset_string_conversions.h"
24 #include "net/base/address_family.h"
25 #include "net/base/escape.h"
26 #include "net/base/net_export.h"
27 #include "net/base/network_change_notifier.h"
28 // TODO(eroman): Remove this header and require consumers to include it
29 // directly.
30 #include "net/base/network_interfaces.h"
32 class GURL;
34 namespace base {
35 class Time;
38 namespace url {
39 struct CanonHostInfo;
40 struct Parsed;
43 namespace net {
45 // This is a "forward declaration" to avoid including ip_address_number.h
46 // Keep this in sync.
47 typedef std::vector<unsigned char> IPAddressNumber;
49 // Used by FormatUrl to specify handling of certain parts of the url.
50 typedef uint32_t FormatUrlType;
51 typedef uint32_t FormatUrlTypes;
53 #if defined(OS_WIN)
54 // Bluetooth address size. Windows Bluetooth is supported via winsock.
55 static const size_t kBluetoothAddressSize = 6;
56 #endif
58 // Nothing is ommitted.
59 NET_EXPORT extern const FormatUrlType kFormatUrlOmitNothing;
61 // If set, any username and password are removed.
62 NET_EXPORT extern const FormatUrlType kFormatUrlOmitUsernamePassword;
64 // If the scheme is 'http://', it's removed.
65 NET_EXPORT extern const FormatUrlType kFormatUrlOmitHTTP;
67 // Omits the path if it is just a slash and there is no query or ref. This is
68 // meaningful for non-file "standard" URLs.
69 NET_EXPORT extern const FormatUrlType kFormatUrlOmitTrailingSlashOnBareHostname;
71 // Convenience for omitting all unecessary types.
72 NET_EXPORT extern const FormatUrlType kFormatUrlOmitAll;
74 // Returns the number of explicitly allowed ports; for testing.
75 NET_EXPORT_PRIVATE extern size_t GetCountOfExplicitlyAllowedPorts();
77 // Splits an input of the form <host>[":"<port>] into its consitituent parts.
78 // Saves the result into |*host| and |*port|. If the input did not have
79 // the optional port, sets |*port| to -1.
80 // Returns true if the parsing was successful, false otherwise.
81 // The returned host is NOT canonicalized, and may be invalid.
83 // IPv6 literals must be specified in a bracketed form, for instance:
84 // [::1]:90 and [::1]
86 // The resultant |*host| in both cases will be "::1" (not bracketed).
87 NET_EXPORT bool ParseHostAndPort(
88 std::string::const_iterator host_and_port_begin,
89 std::string::const_iterator host_and_port_end,
90 std::string* host,
91 int* port);
92 NET_EXPORT bool ParseHostAndPort(
93 const std::string& host_and_port,
94 std::string* host,
95 int* port);
97 // Returns a host:port string for the given URL.
98 NET_EXPORT std::string GetHostAndPort(const GURL& url);
100 // Returns a host[:port] string for the given URL, where the port is omitted
101 // if it is the default for the URL's scheme.
102 NET_EXPORT_PRIVATE std::string GetHostAndOptionalPort(const GURL& url);
104 // Returns true if |hostname| contains a non-registerable or non-assignable
105 // domain name (eg: a gTLD that has not been assigned by IANA) or an IP address
106 // that falls in an IANA-reserved range.
107 NET_EXPORT bool IsHostnameNonUnique(const std::string& hostname);
109 // Convenience struct for when you need a |struct sockaddr|.
110 struct SockaddrStorage {
111 SockaddrStorage() : addr_len(sizeof(addr_storage)),
112 addr(reinterpret_cast<struct sockaddr*>(&addr_storage)) {}
113 SockaddrStorage(const SockaddrStorage& other);
114 void operator=(const SockaddrStorage& other);
116 struct sockaddr_storage addr_storage;
117 socklen_t addr_len;
118 struct sockaddr* const addr;
121 // Extracts the IP address and port portions of a sockaddr. |port| is optional,
122 // and will not be filled in if NULL.
123 bool GetIPAddressFromSockAddr(const struct sockaddr* sock_addr,
124 socklen_t sock_addr_len,
125 const unsigned char** address,
126 size_t* address_len,
127 uint16_t* port);
129 // Same as IPAddressToString() but for a sockaddr. This output will not include
130 // the IPv6 scope ID.
131 NET_EXPORT std::string NetAddressToString(const struct sockaddr* sa,
132 socklen_t sock_addr_len);
134 // Same as IPAddressToStringWithPort() but for a sockaddr. This output will not
135 // include the IPv6 scope ID.
136 NET_EXPORT std::string NetAddressToStringWithPort(const struct sockaddr* sa,
137 socklen_t sock_addr_len);
139 // Returns the hostname of the current system. Returns empty string on failure.
140 NET_EXPORT std::string GetHostName();
142 // Extracts the unescaped username/password from |url|, saving the results
143 // into |*username| and |*password|.
144 NET_EXPORT_PRIVATE void GetIdentityFromURL(const GURL& url,
145 base::string16* username,
146 base::string16* password);
148 // Returns either the host from |url|, or, if the host is empty, the full spec.
149 NET_EXPORT std::string GetHostOrSpecFromURL(const GURL& url);
151 // Return the value of the HTTP response header with name 'name'. 'headers'
152 // should be in the format that URLRequest::GetResponseHeaders() returns.
153 // Returns the empty string if the header is not found.
154 NET_EXPORT std::string GetSpecificHeader(const std::string& headers,
155 const std::string& name);
157 // Converts the given host name to unicode characters. This can be called for
158 // any host name, if the input is not IDN or is invalid in some way, we'll just
159 // return the ASCII source so it is still usable.
161 // The input should be the canonicalized ASCII host name from GURL. This
162 // function does NOT accept UTF-8!
164 // |languages| is a comma separated list of ISO 639 language codes. It
165 // is used to determine whether a hostname is 'comprehensible' to a user
166 // who understands languages listed. |host| will be converted to a
167 // human-readable form (Unicode) ONLY when each component of |host| is
168 // regarded as 'comprehensible'. Scipt-mixing is not allowed except that
169 // Latin letters in the ASCII range can be mixed with a limited set of
170 // script-language pairs (currently Han, Kana and Hangul for zh,ja and ko).
171 // When |languages| is empty, even that mixing is not allowed.
172 NET_EXPORT base::string16 IDNToUnicode(const std::string& host,
173 const std::string& languages);
175 // Canonicalizes |host| and returns it. Also fills |host_info| with
176 // IP address information. |host_info| must not be NULL.
177 NET_EXPORT std::string CanonicalizeHost(const std::string& host,
178 url::CanonHostInfo* host_info);
180 // Returns true if |host| is not an IP address and is compliant with a set of
181 // rules based on RFC 1738 and tweaked to be compatible with the real world.
182 // The rules are:
183 // * One or more components separated by '.'
184 // * Each component contains only alphanumeric characters and '-' or '_'
185 // * The last component begins with an alphanumeric character
186 // * Optional trailing dot after last component (means "treat as FQDN")
188 // NOTE: You should only pass in hosts that have been returned from
189 // CanonicalizeHost(), or you may not get accurate results.
190 NET_EXPORT bool IsCanonicalizedHostCompliant(const std::string& host);
192 // Call these functions to get the html snippet for a directory listing.
193 // The return values of both functions are in UTF-8.
194 NET_EXPORT std::string GetDirectoryListingHeader(const base::string16& title);
196 // Given the name of a file in a directory (ftp or local) and
197 // other information (is_dir, size, modification time), it returns
198 // the html snippet to add the entry for the file to the directory listing.
199 // Currently, it's a script tag containing a call to a Javascript function
200 // |addRow|.
202 // |name| is the file name to be displayed. |raw_bytes| will be used
203 // as the actual target of the link (so for example, ftp links should use
204 // server's encoding). If |raw_bytes| is an empty string, UTF-8 encoded |name|
205 // will be used.
207 // Both |name| and |raw_bytes| are escaped internally.
208 NET_EXPORT std::string GetDirectoryListingEntry(const base::string16& name,
209 const std::string& raw_bytes,
210 bool is_dir,
211 int64_t size,
212 base::Time modified);
214 // If text starts with "www." it is removed, otherwise text is returned
215 // unmodified.
216 NET_EXPORT base::string16 StripWWW(const base::string16& text);
218 // Runs |url|'s host through StripWWW(). |url| must be valid.
219 NET_EXPORT base::string16 StripWWWFromHost(const GURL& url);
221 // Checks if |port| is in the valid range (0 to 65535, though 0 is technically
222 // reserved). Should be used before casting a port to a uint16_t.
223 NET_EXPORT bool IsPortValid(int port);
225 // Checks |port| against a list of ports which are restricted by default.
226 // Returns true if |port| is allowed, false if it is restricted.
227 NET_EXPORT bool IsPortAllowedByDefault(int port);
229 // Checks |port| against a list of ports which are restricted by the FTP
230 // protocol. Returns true if |port| is allowed, false if it is restricted.
231 NET_EXPORT_PRIVATE bool IsPortAllowedByFtp(int port);
233 // Check if banned |port| has been overriden by an entry in
234 // |explicitly_allowed_ports_|.
235 NET_EXPORT_PRIVATE bool IsPortAllowedByOverride(int port);
237 // Set socket to non-blocking mode
238 NET_EXPORT int SetNonBlocking(int fd);
240 // Formats the host in |url| and appends it to |output|. The host formatter
241 // takes the same accept languages component as ElideURL().
242 NET_EXPORT void AppendFormattedHost(const GURL& url,
243 const std::string& languages,
244 base::string16* output);
246 // Creates a string representation of |url|. The IDN host name may be in Unicode
247 // if |languages| accepts the Unicode representation. |format_type| is a bitmask
248 // of FormatUrlTypes, see it for details. |unescape_rules| defines how to clean
249 // the URL for human readability. You will generally want |UnescapeRule::SPACES|
250 // for display to the user if you can handle spaces, or |UnescapeRule::NORMAL|
251 // if not. If the path part and the query part seem to be encoded in %-encoded
252 // UTF-8, decodes %-encoding and UTF-8.
254 // The last three parameters may be NULL.
256 // |new_parsed| will be set to the parsing parameters of the resultant URL.
258 // |prefix_end| will be the length before the hostname of the resultant URL.
260 // |offset[s]_for_adjustment| specifies one or more offsets into the original
261 // URL, representing insertion or selection points between characters: if the
262 // input is "http://foo.com/", offset 0 is before the entire URL, offset 7 is
263 // between the scheme and the host, and offset 15 is after the end of the URL.
264 // Valid input offsets range from 0 to the length of the input URL string. On
265 // exit, each offset will have been modified to reflect any changes made to the
266 // output string. For example, if |url| is "http://a:b@c.com/",
267 // |omit_username_password| is true, and an offset is 12 (pointing between 'c'
268 // and '.'), then on return the output string will be "http://c.com/" and the
269 // offset will be 8. If an offset cannot be successfully adjusted (e.g. because
270 // it points into the middle of a component that was entirely removed or into
271 // the middle of an encoding sequence), it will be set to base::string16::npos.
272 // For consistency, if an input offset points between the scheme and the
273 // username/password, and both are removed, on output this offset will be 0
274 // rather than npos; this means that offsets at the starts and ends of removed
275 // components are always transformed the same way regardless of what other
276 // components are adjacent.
277 NET_EXPORT base::string16 FormatUrl(const GURL& url,
278 const std::string& languages,
279 FormatUrlTypes format_types,
280 UnescapeRule::Type unescape_rules,
281 url::Parsed* new_parsed,
282 size_t* prefix_end,
283 size_t* offset_for_adjustment);
284 NET_EXPORT base::string16 FormatUrlWithOffsets(
285 const GURL& url,
286 const std::string& languages,
287 FormatUrlTypes format_types,
288 UnescapeRule::Type unescape_rules,
289 url::Parsed* new_parsed,
290 size_t* prefix_end,
291 std::vector<size_t>* offsets_for_adjustment);
292 // This function is like those above except it takes |adjustments| rather
293 // than |offset[s]_for_adjustment|. |adjustments| will be set to reflect all
294 // the transformations that happened to |url| to convert it into the returned
295 // value.
296 NET_EXPORT base::string16 FormatUrlWithAdjustments(
297 const GURL& url,
298 const std::string& languages,
299 FormatUrlTypes format_types,
300 UnescapeRule::Type unescape_rules,
301 url::Parsed* new_parsed,
302 size_t* prefix_end,
303 base::OffsetAdjuster::Adjustments* adjustments);
305 // This is a convenience function for FormatUrl() with
306 // format_types = kFormatUrlOmitAll and unescape = SPACES. This is the typical
307 // set of flags for "URLs to display to the user". You should be cautious about
308 // using this for URLs which will be parsed or sent to other applications.
309 inline base::string16 FormatUrl(const GURL& url, const std::string& languages) {
310 return FormatUrl(url, languages, kFormatUrlOmitAll, UnescapeRule::SPACES,
311 NULL, NULL, NULL);
314 // Returns whether FormatUrl() would strip a trailing slash from |url|, given a
315 // format flag including kFormatUrlOmitTrailingSlashOnBareHostname.
316 NET_EXPORT bool CanStripTrailingSlash(const GURL& url);
318 // Strip the portions of |url| that aren't core to the network request.
319 // - user name / password
320 // - reference section
321 NET_EXPORT_PRIVATE GURL SimplifyUrlForRequest(const GURL& url);
323 NET_EXPORT void SetExplicitlyAllowedPorts(const std::string& allowed_ports);
325 class NET_EXPORT ScopedPortException {
326 public:
327 explicit ScopedPortException(int port);
328 ~ScopedPortException();
330 private:
331 int port_;
333 DISALLOW_COPY_AND_ASSIGN(ScopedPortException);
336 // Returns true if it can determine that only loopback addresses are configured.
337 // i.e. if only 127.0.0.1 and ::1 are routable.
338 // Also returns false if it cannot determine this.
339 bool HaveOnlyLoopbackAddresses();
341 // Returns AddressFamily of the address.
342 NET_EXPORT_PRIVATE AddressFamily GetAddressFamily(
343 const IPAddressNumber& address);
345 // Maps the given AddressFamily to either AF_INET, AF_INET6 or AF_UNSPEC.
346 NET_EXPORT_PRIVATE int ConvertAddressFamily(AddressFamily address_family);
348 // Retuns the port field of the |sockaddr|.
349 const uint16_t* GetPortFieldFromSockaddr(const struct sockaddr* address,
350 socklen_t address_len);
351 // Returns the value of port in |sockaddr| (in host byte ordering).
352 NET_EXPORT_PRIVATE int GetPortFromSockaddr(const struct sockaddr* address,
353 socklen_t address_len);
355 // Returns true if |host| is one of the names (e.g. "localhost") or IP
356 // addresses (IPv4 127.0.0.0/8 or IPv6 ::1) that indicate a loopback.
358 // Note that this function does not check for IP addresses other than
359 // the above, although other IP addresses may point to the local
360 // machine.
361 NET_EXPORT_PRIVATE bool IsLocalhost(const std::string& host);
363 NET_EXPORT_PRIVATE bool IsLocalhostTLD(const std::string& host);
365 // Returns true if the url's host is a Google server. This should only be used
366 // for histograms and shouldn't be used to affect behavior.
367 NET_EXPORT_PRIVATE bool HasGoogleHost(const GURL& url);
369 // A subset of IP address attributes which are actionable by the
370 // application layer. Currently unimplemented for all hosts;
371 // IP_ADDRESS_ATTRIBUTE_NONE is always returned.
372 enum IPAddressAttributes {
373 IP_ADDRESS_ATTRIBUTE_NONE = 0,
375 // A temporary address is dynamic by nature and will not contain MAC
376 // address. Presence of MAC address in IPv6 addresses can be used to
377 // track an endpoint and cause privacy concern. Please refer to
378 // RFC4941.
379 IP_ADDRESS_ATTRIBUTE_TEMPORARY = 1 << 0,
381 // A temporary address could become deprecated once the preferred
382 // lifetime is reached. It is still valid but shouldn't be used to
383 // create new connections.
384 IP_ADDRESS_ATTRIBUTE_DEPRECATED = 1 << 1,
387 // Differentiated Services Code Point.
388 // See http://tools.ietf.org/html/rfc2474 for details.
389 enum DiffServCodePoint {
390 DSCP_NO_CHANGE = -1,
391 DSCP_FIRST = DSCP_NO_CHANGE,
392 DSCP_DEFAULT = 0, // Same as DSCP_CS0
393 DSCP_CS0 = 0, // The default
394 DSCP_CS1 = 8, // Bulk/background traffic
395 DSCP_AF11 = 10,
396 DSCP_AF12 = 12,
397 DSCP_AF13 = 14,
398 DSCP_CS2 = 16,
399 DSCP_AF21 = 18,
400 DSCP_AF22 = 20,
401 DSCP_AF23 = 22,
402 DSCP_CS3 = 24,
403 DSCP_AF31 = 26,
404 DSCP_AF32 = 28,
405 DSCP_AF33 = 30,
406 DSCP_CS4 = 32,
407 DSCP_AF41 = 34, // Video
408 DSCP_AF42 = 36, // Video
409 DSCP_AF43 = 38, // Video
410 DSCP_CS5 = 40, // Video
411 DSCP_EF = 46, // Voice
412 DSCP_CS6 = 48, // Voice
413 DSCP_CS7 = 56, // Control messages
414 DSCP_LAST = DSCP_CS7
417 } // namespace net
419 #endif // NET_BASE_NET_UTIL_H_