1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/dns/dns_hosts.h"
7 #include "base/file_util.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/string_tokenizer.h"
13 using base::StringPiece
;
17 // Parses the contents of a hosts file. Returns one token (IP or hostname) at
18 // a time. Doesn't copy anything; accepts the file as a StringPiece and
19 // returns tokens as StringPieces.
22 explicit HostsParser(const StringPiece
& text
)
28 token_is_ip_(false) {}
30 // Advances to the next token (IP or hostname). Returns whether another
31 // token was available. |token_is_ip| and |token| can be used to find out
32 // the type and text of the token.
34 bool next_is_ip
= (pos_
== 0);
35 while (pos_
< end_
&& pos_
!= std::string::npos
) {
36 switch (text_
[pos_
]) {
53 size_t token_start
= pos_
;
55 size_t token_end
= (pos_
== std::string::npos
) ? end_
: pos_
;
57 token_
= StringPiece(data_
+ token_start
, token_end
- token_start
);
58 token_is_ip_
= next_is_ip
;
65 text_
= StringPiece();
69 // Fast-forwards the parser to the next line. Should be called if an IP
70 // address doesn't parse, to avoid wasting time tokenizing hostnames that
72 void SkipRestOfLine() {
73 pos_
= text_
.find("\n", pos_
);
76 // Returns whether the last-parsed token is an IP address (true) or a
78 bool token_is_ip() { return token_is_ip_
; }
80 // Returns the text of the last-parsed token as a StringPiece referencing
81 // the same underlying memory as the StringPiece passed to the constructor.
82 // Returns an empty StringPiece if no token has been parsed or the end of
83 // the input string has been reached.
84 const StringPiece
& token() { return token_
; }
88 pos_
= text_
.find_first_of(" \t\n\r#", pos_
);
91 void SkipWhitespace() {
92 pos_
= text_
.find_first_not_of(" \t", pos_
);
103 DISALLOW_COPY_AND_ASSIGN(HostsParser
);
108 void ParseHosts(const std::string
& contents
, DnsHosts
* dns_hosts
) {
110 DnsHosts
& hosts
= *dns_hosts
;
114 AddressFamily family
= ADDRESS_FAMILY_IPV4
;
115 HostsParser
parser(contents
);
116 while (parser
.Advance()) {
117 if (parser
.token_is_ip()) {
118 StringPiece new_ip_text
= parser
.token();
119 // Some ad-blocking hosts files contain thousands of entries pointing to
120 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP
121 // again if it's the same as the one above it.
122 if (new_ip_text
!= ip_text
) {
123 IPAddressNumber new_ip
;
124 if (ParseIPLiteralToNumber(parser
.token().as_string(), &new_ip
)) {
125 ip_text
= new_ip_text
;
127 family
= (ip
.size() == 4) ? ADDRESS_FAMILY_IPV4
: ADDRESS_FAMILY_IPV6
;
129 parser
.SkipRestOfLine();
133 DnsHostsKey
key(parser
.token().as_string(), family
);
134 StringToLowerASCII(&key
.first
);
135 IPAddressNumber
& mapped_ip
= hosts
[key
];
136 if (mapped_ip
.empty())
138 // else ignore this entry (first hit counts)
143 bool ParseHostsFile(const base::FilePath
& path
, DnsHosts
* dns_hosts
) {
145 // Missing file indicates empty HOSTS.
146 if (!base::PathExists(path
))
150 if (!base::GetFileSize(path
, &size
))
153 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size
);
155 // Reject HOSTS files larger than |kMaxHostsSize| bytes.
156 const int64 kMaxHostsSize
= 1 << 25; // 32MB
157 if (size
> kMaxHostsSize
)
160 std::string contents
;
161 if (!base::ReadFileToString(path
, &contents
))
164 ParseHosts(contents
, dns_hosts
);