1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/dns/dns_hosts.h"
7 #include "base/files/file_util.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram_macros.h"
10 #include "base/strings/string_util.h"
12 using base::StringPiece
;
18 // Parses the contents of a hosts file. Returns one token (IP or hostname) at
19 // a time. Doesn't copy anything; accepts the file as a StringPiece and
20 // returns tokens as StringPieces.
23 explicit HostsParser(const StringPiece
& text
, ParseHostsCommaMode comma_mode
)
29 comma_mode_(comma_mode
) {}
31 // Advances to the next token (IP or hostname). Returns whether another
32 // token was available. |token_is_ip| and |token| can be used to find out
33 // the type and text of the token.
35 bool next_is_ip
= (pos_
== 0);
36 while (pos_
< end_
&& pos_
!= std::string::npos
) {
37 switch (text_
[pos_
]) {
54 if (comma_mode_
== PARSE_HOSTS_COMMA_IS_WHITESPACE
) {
59 // If comma_mode_ is COMMA_IS_TOKEN, fall through:
62 size_t token_start
= pos_
;
64 size_t token_end
= (pos_
== std::string::npos
) ? end_
: pos_
;
66 token_
= StringPiece(data_
+ token_start
, token_end
- token_start
);
67 token_is_ip_
= next_is_ip
;
77 // Fast-forwards the parser to the next line. Should be called if an IP
78 // address doesn't parse, to avoid wasting time tokenizing hostnames that
80 void SkipRestOfLine() {
81 pos_
= text_
.find("\n", pos_
);
84 // Returns whether the last-parsed token is an IP address (true) or a
86 bool token_is_ip() { return token_is_ip_
; }
88 // Returns the text of the last-parsed token as a StringPiece referencing
89 // the same underlying memory as the StringPiece passed to the constructor.
90 // Returns an empty StringPiece if no token has been parsed or the end of
91 // the input string has been reached.
92 const StringPiece
& token() { return token_
; }
96 switch (comma_mode_
) {
97 case PARSE_HOSTS_COMMA_IS_TOKEN
:
98 pos_
= text_
.find_first_of(" \t\n\r#", pos_
);
100 case PARSE_HOSTS_COMMA_IS_WHITESPACE
:
101 pos_
= text_
.find_first_of(" ,\t\n\r#", pos_
);
106 void SkipWhitespace() {
107 switch (comma_mode_
) {
108 case PARSE_HOSTS_COMMA_IS_TOKEN
:
109 pos_
= text_
.find_first_not_of(" \t", pos_
);
111 case PARSE_HOSTS_COMMA_IS_WHITESPACE
:
112 pos_
= text_
.find_first_not_of(" ,\t", pos_
);
117 const StringPiece text_
;
125 const ParseHostsCommaMode comma_mode_
;
127 DISALLOW_COPY_AND_ASSIGN(HostsParser
);
130 void ParseHostsWithCommaMode(const std::string
& contents
,
132 ParseHostsCommaMode comma_mode
) {
137 AddressFamily family
= ADDRESS_FAMILY_IPV4
;
138 HostsParser
parser(contents
, comma_mode
);
139 while (parser
.Advance()) {
140 if (parser
.token_is_ip()) {
141 StringPiece new_ip_text
= parser
.token();
142 // Some ad-blocking hosts files contain thousands of entries pointing to
143 // the same IP address (usually 127.0.0.1). Don't bother parsing the IP
144 // again if it's the same as the one above it.
145 if (new_ip_text
!= ip_text
) {
146 IPAddressNumber new_ip
;
147 if (ParseIPLiteralToNumber(parser
.token().as_string(), &new_ip
)) {
148 ip_text
= new_ip_text
;
150 family
= (ip
.size() == 4) ? ADDRESS_FAMILY_IPV4
: ADDRESS_FAMILY_IPV6
;
152 parser
.SkipRestOfLine();
156 DnsHostsKey
key(parser
.token().as_string(), family
);
157 base::StringToLowerASCII(&key
.first
);
158 IPAddressNumber
* mapped_ip
= &(*dns_hosts
)[key
];
159 if (mapped_ip
->empty())
161 // else ignore this entry (first hit counts)
168 void ParseHostsWithCommaModeForTesting(const std::string
& contents
,
170 ParseHostsCommaMode comma_mode
) {
171 ParseHostsWithCommaMode(contents
, dns_hosts
, comma_mode
);
174 void ParseHosts(const std::string
& contents
, DnsHosts
* dns_hosts
) {
175 ParseHostsCommaMode comma_mode
;
176 #if defined(OS_MACOSX)
177 // Mac OS X allows commas to separate hostnames.
178 comma_mode
= PARSE_HOSTS_COMMA_IS_WHITESPACE
;
180 // Linux allows commas in hostnames.
181 comma_mode
= PARSE_HOSTS_COMMA_IS_TOKEN
;
184 ParseHostsWithCommaMode(contents
, dns_hosts
, comma_mode
);
187 bool ParseHostsFile(const base::FilePath
& path
, DnsHosts
* dns_hosts
) {
189 // Missing file indicates empty HOSTS.
190 if (!base::PathExists(path
))
194 if (!base::GetFileSize(path
, &size
))
197 UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize",
198 static_cast<base::HistogramBase::Sample
>(size
));
200 // Reject HOSTS files larger than |kMaxHostsSize| bytes.
201 const int64 kMaxHostsSize
= 1 << 25; // 32MB
202 if (size
> kMaxHostsSize
)
205 std::string contents
;
206 if (!base::ReadFileToString(path
, &contents
))
209 ParseHosts(contents
, dns_hosts
);