[Metrics] Make MetricsStateManager take a callback param to check if UMA is enabled.
[chromium-blink-merge.git] / chrome / browser / safe_browsing / protocol_parser.cc
blobd22c73ec638f197d50c4a10c20fdaa27fce69ad8
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
7 #include <stdlib.h>
9 #include "base/format_macros.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/sys_byteorder.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/safe_browsing/protocol_parser.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
18 namespace {
19 // Helper function for quick scans of a line oriented protocol. Note that we use
20 // std::string::assign(const charT* s, size_type n)
21 // to copy data into 'line'. This form of 'assign' does not call strlen on
22 // 'input', which is binary data and is not NULL terminated. 'input' may also
23 // contain valid NULL bytes in the payload, which a strlen based copy would
24 // truncate.
25 bool GetLine(const char* input, int input_len, std::string* line) {
26 const char* pos = input;
27 while (pos && (pos - input < input_len)) {
28 if (*pos == '\n') {
29 line->assign(input, pos - input);
30 return true;
32 ++pos;
34 return false;
36 } // namespace
38 //------------------------------------------------------------------------------
39 // SafeBrowsingParser implementation
41 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
44 bool SafeBrowsingProtocolParser::ParseGetHash(
45 const char* chunk_data,
46 int chunk_len,
47 std::vector<SBFullHashResult>* full_hashes) {
48 full_hashes->clear();
49 int length = chunk_len;
50 const char* data = chunk_data;
52 int offset;
53 std::string line;
54 while (length > 0) {
55 if (!GetLine(data, length, &line))
56 return false;
58 offset = static_cast<int>(line.size()) + 1;
59 data += offset;
60 length -= offset;
62 std::vector<std::string> cmd_parts;
63 base::SplitString(line, ':', &cmd_parts);
64 if (cmd_parts.size() != 3)
65 return false;
67 SBFullHashResult full_hash;
68 full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
69 // Ignore cmd_parts[1] (add_chunk_id), as we no longer use it with SB 2.3
70 // caching rules.
71 int full_hash_len = atoi(cmd_parts[2].c_str());
73 if (full_hash_len < 0 || full_hash_len > length)
74 return false;
76 // Ignore hash results from lists we don't recognize.
77 if (full_hash.list_id < 0) {
78 data += full_hash_len;
79 length -= full_hash_len;
80 continue;
83 while (static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash)) {
84 memcpy(&full_hash.hash, data, sizeof(SBFullHash));
85 full_hashes->push_back(full_hash);
86 data += sizeof(SBFullHash);
87 length -= sizeof(SBFullHash);
88 full_hash_len -= sizeof(SBFullHash);
92 return length == 0;
95 void SafeBrowsingProtocolParser::FormatGetHash(
96 const std::vector<SBPrefix>& prefixes, std::string* request) {
97 DCHECK(request);
99 // Format the request for GetHash.
100 request->append(base::StringPrintf("%" PRIuS ":%" PRIuS "\n",
101 sizeof(SBPrefix),
102 sizeof(SBPrefix) * prefixes.size()));
103 for (size_t i = 0; i < prefixes.size(); ++i) {
104 request->append(reinterpret_cast<const char*>(&prefixes[i]),
105 sizeof(SBPrefix));
109 bool SafeBrowsingProtocolParser::ParseUpdate(
110 const char* chunk_data,
111 int chunk_len,
112 int* next_update_sec,
113 bool* reset,
114 std::vector<SBChunkDelete>* deletes,
115 std::vector<ChunkUrl>* chunk_urls) {
116 DCHECK(next_update_sec);
117 DCHECK(deletes);
118 DCHECK(chunk_urls);
120 int length = chunk_len;
121 const char* data = chunk_data;
123 // Populated below.
124 std::string list_name;
126 while (length > 0) {
127 std::string cmd_line;
128 if (!GetLine(data, length, &cmd_line))
129 return false; // Error: bad list format!
131 std::vector<std::string> cmd_parts;
132 base::SplitString(cmd_line, ':', &cmd_parts);
133 if (cmd_parts.empty())
134 return false;
135 const std::string& command = cmd_parts[0];
136 if (cmd_parts.size() != 2 && command[0] != 'u')
137 return false;
139 const int consumed = static_cast<int>(cmd_line.size()) + 1;
140 data += consumed;
141 length -= consumed;
142 if (length < 0)
143 return false; // Parsing error.
145 // Differentiate on the first character of the command (which is usually
146 // only one character, with the exception of the 'ad' and 'sd' commands).
147 switch (command[0]) {
148 case 'a':
149 case 's': {
150 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
151 // have also parsed the list name before getting here, or the add-del
152 // or sub-del will have no context.
153 if (command.size() != 2 || command[1] != 'd' || list_name.empty())
154 return false;
155 SBChunkDelete chunk_delete;
156 chunk_delete.is_sub_del = command[0] == 's';
157 StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
158 chunk_delete.list_name = list_name;
159 deletes->push_back(chunk_delete);
160 break;
163 case 'i':
164 // The line providing the name of the list (i.e. 'goog-phish-shavar').
165 list_name = cmd_parts[1];
166 break;
168 case 'n':
169 // The line providing the next earliest time (in seconds) to re-query.
170 *next_update_sec = atoi(cmd_parts[1].c_str());
171 break;
173 case 'u': {
174 ChunkUrl chunk_url;
175 chunk_url.url = cmd_line.substr(2); // Skip the initial "u:".
176 chunk_url.list_name = list_name;
177 chunk_urls->push_back(chunk_url);
178 break;
181 case 'r':
182 if (cmd_parts[1] != "pleasereset")
183 return false;
184 *reset = true;
185 break;
187 default:
188 // According to the spec, we ignore commands we don't understand.
189 break;
193 return true;
196 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
197 const char* data,
198 int length,
199 SBChunkList* chunks) {
200 int remaining = length;
201 const char* chunk_data = data;
203 while (remaining > 0) {
204 std::string cmd_line;
205 if (!GetLine(chunk_data, remaining, &cmd_line))
206 return false; // Error: bad chunk format!
208 const int line_len = static_cast<int>(cmd_line.length()) + 1;
209 chunk_data += line_len;
210 remaining -= line_len;
211 std::vector<std::string> cmd_parts;
212 base::SplitString(cmd_line, ':', &cmd_parts);
213 if (cmd_parts.size() != 4) {
214 return false;
217 // Process the chunk data.
218 const int chunk_number = atoi(cmd_parts[1].c_str());
219 const int hash_len = atoi(cmd_parts[2].c_str());
220 if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
221 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
222 return false;
225 const int chunk_len = atoi(cmd_parts[3].c_str());
227 if (chunk_len < 0 || chunk_len > remaining)
228 return false; // parse error.
230 chunks->push_back(SBChunk());
231 chunks->back().chunk_number = chunk_number;
233 if (cmd_parts[0] == "a") {
234 chunks->back().is_add = true;
235 if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
236 &chunks->back().hosts))
237 return false; // Parse error.
238 } else if (cmd_parts[0] == "s") {
239 chunks->back().is_add = false;
240 if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
241 &chunks->back().hosts))
242 return false; // Parse error.
243 } else {
244 NOTREACHED();
245 return false;
248 chunk_data += chunk_len;
249 remaining -= chunk_len;
250 DCHECK_LE(0, remaining);
253 DCHECK(remaining == 0);
255 return true;
258 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
259 const char* data,
260 int data_len,
261 int hash_len,
262 std::deque<SBChunkHost>* hosts) {
263 const char* chunk_data = data;
264 int remaining = data_len;
265 int prefix_count;
266 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
267 SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
269 if (list_name == safe_browsing_util::kDownloadWhiteList ||
270 list_name == safe_browsing_util::kExtensionBlacklist ||
271 list_name == safe_browsing_util::kIPBlacklist) {
272 // These lists only contain prefixes, no HOSTKEY and COUNT.
273 DCHECK_EQ(0, remaining % hash_len);
274 prefix_count = remaining / hash_len;
275 SBChunkHost chunk_host;
276 chunk_host.host = 0;
277 chunk_host.entry = SBEntry::Create(type, prefix_count);
278 hosts->push_back(chunk_host);
279 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
280 prefix_count)) {
281 DVLOG(2) << "Unable to read chunk data for list: " << list_name;
282 return false;
284 DCHECK_GE(remaining, 0);
285 } else {
286 SBPrefix host;
287 const int min_size = sizeof(SBPrefix) + 1;
288 while (remaining >= min_size) {
289 if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
290 &host, &prefix_count)) {
291 return false;
293 DCHECK_GE(remaining, 0);
294 SBChunkHost chunk_host;
295 chunk_host.host = host;
296 chunk_host.entry = SBEntry::Create(type, prefix_count);
297 hosts->push_back(chunk_host);
298 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
299 prefix_count))
300 return false;
301 DCHECK_GE(remaining, 0);
304 return remaining == 0;
307 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
308 const char* data,
309 int data_len,
310 int hash_len,
311 std::deque<SBChunkHost>* hosts) {
312 int remaining = data_len;
313 const char* chunk_data = data;
314 int prefix_count;
315 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
316 SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
318 if (list_name == safe_browsing_util::kDownloadWhiteList ||
319 list_name == safe_browsing_util::kExtensionBlacklist ||
320 list_name == safe_browsing_util::kIPBlacklist) {
321 SBChunkHost chunk_host;
322 // Set host to 0 and it won't be used.
323 chunk_host.host = 0;
324 // lists only contain (add_chunk_number, prefix) pairs, no HOSTKEY
325 // and COUNT. |add_chunk_number| is int32.
326 prefix_count = remaining / (sizeof(int32) + hash_len);
327 chunk_host.entry = SBEntry::Create(type, prefix_count);
328 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
329 return false;
330 DCHECK_GE(remaining, 0);
331 hosts->push_back(chunk_host);
332 } else {
333 SBPrefix host;
334 const int min_size = 2 * sizeof(SBPrefix) + 1;
335 while (remaining >= min_size) {
336 if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
337 &host, &prefix_count)) {
338 return false;
340 DCHECK_GE(remaining, 0);
341 SBChunkHost chunk_host;
342 chunk_host.host = host;
343 chunk_host.entry = SBEntry::Create(type, prefix_count);
344 hosts->push_back(chunk_host);
345 if (prefix_count == 0) {
346 // There is only an add chunk number (no prefixes).
347 int chunk_id;
348 if (!ReadChunkId(&chunk_data, &remaining, &chunk_id))
349 return false;
350 DCHECK_GE(remaining, 0);
351 chunk_host.entry->set_chunk_id(chunk_id);
352 continue;
354 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
355 prefix_count))
356 return false;
357 DCHECK_GE(remaining, 0);
360 return remaining == 0;
363 bool SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
364 const char** data, int* remaining, SBPrefix* host, int* count) {
365 if (static_cast<size_t>(*remaining) < sizeof(SBPrefix) + 1)
366 return false;
367 // Next 4 bytes are the host prefix.
368 memcpy(host, *data, sizeof(SBPrefix));
369 *data += sizeof(SBPrefix);
370 *remaining -= sizeof(SBPrefix);
372 // Next 1 byte is the prefix count (could be zero, but never negative).
373 *count = static_cast<unsigned char>(**data);
374 *data += 1;
375 *remaining -= 1;
376 DCHECK_GE(*remaining, 0);
377 return true;
380 bool SafeBrowsingProtocolParser::ReadChunkId(
381 const char** data, int* remaining, int* chunk_id) {
382 // Protocol says four bytes, not sizeof(int). Make sure those
383 // values are the same.
384 DCHECK_EQ(sizeof(*chunk_id), 4u);
385 if (static_cast<size_t>(*remaining) < sizeof(*chunk_id))
386 return false;
387 memcpy(chunk_id, *data, sizeof(*chunk_id));
388 *data += sizeof(*chunk_id);
389 *remaining -= sizeof(*chunk_id);
390 *chunk_id = base::HostToNet32(*chunk_id);
391 DCHECK_GE(*remaining, 0);
392 return true;
395 bool SafeBrowsingProtocolParser::ReadPrefixes(
396 const char** data, int* remaining, SBEntry* entry, int count) {
397 int hash_len = entry->HashLen();
398 for (int i = 0; i < count; ++i) {
399 if (entry->IsSub()) {
400 int chunk_id;
401 if (!ReadChunkId(data, remaining, &chunk_id))
402 return false;
403 DCHECK_GE(*remaining, 0);
404 entry->SetChunkIdAtPrefix(i, chunk_id);
407 if (*remaining < hash_len)
408 return false;
409 if (entry->IsPrefix()) {
410 SBPrefix prefix;
411 DCHECK_EQ(hash_len, (int)sizeof(prefix));
412 memcpy(&prefix, *data, sizeof(prefix));
413 entry->SetPrefixAt(i, prefix);
414 } else {
415 SBFullHash hash;
416 DCHECK_EQ(hash_len, (int)sizeof(hash));
417 memcpy(&hash, *data, sizeof(hash));
418 entry->SetFullHashAt(i, hash);
420 *data += hash_len;
421 *remaining -= hash_len;
422 DCHECK_GE(*remaining, 0);
425 return true;