NaCl: Update revision in DEPS, r12770 -> r12773
[chromium-blink-merge.git] / chrome / browser / safe_browsing / protocol_parser.cc
blob035de5cd4837bb41c7e52c53772d0b5f9e9ab1f4
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
7 #include <stdlib.h>
9 #include "base/format_macros.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/sys_byteorder.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/safe_browsing/protocol_parser.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
18 namespace {
19 // Helper function for quick scans of a line oriented protocol. Note that we use
20 // std::string::assign(const charT* s, size_type n)
21 // to copy data into 'line'. This form of 'assign' does not call strlen on
22 // 'input', which is binary data and is not NULL terminated. 'input' may also
23 // contain valid NULL bytes in the payload, which a strlen based copy would
24 // truncate.
25 bool GetLine(const char* input, int input_len, std::string* line) {
26 const char* pos = input;
27 while (pos && (pos - input < input_len)) {
28 if (*pos == '\n') {
29 line->assign(input, pos - input);
30 return true;
32 ++pos;
34 return false;
36 } // namespace
38 //------------------------------------------------------------------------------
39 // SafeBrowsingParser implementation
41 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
44 bool SafeBrowsingProtocolParser::ParseGetHash(
45 const char* chunk_data,
46 int chunk_len,
47 std::vector<SBFullHashResult>* full_hashes) {
48 full_hashes->clear();
49 int length = chunk_len;
50 const char* data = chunk_data;
52 int offset;
53 std::string line;
54 while (length > 0) {
55 if (!GetLine(data, length, &line))
56 return false;
58 offset = static_cast<int>(line.size()) + 1;
59 data += offset;
60 length -= offset;
62 std::vector<std::string> cmd_parts;
63 base::SplitString(line, ':', &cmd_parts);
64 if (cmd_parts.size() != 3)
65 return false;
67 SBFullHashResult full_hash;
68 full_hash.list_name = cmd_parts[0];
69 full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
70 int full_hash_len = atoi(cmd_parts[2].c_str());
72 // Ignore hash results from lists we don't recognize.
73 if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
74 data += full_hash_len;
75 length -= full_hash_len;
76 continue;
79 while (full_hash_len > 0) {
80 DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
81 memcpy(&full_hash.hash, data, sizeof(SBFullHash));
82 full_hashes->push_back(full_hash);
83 data += sizeof(SBFullHash);
84 length -= sizeof(SBFullHash);
85 full_hash_len -= sizeof(SBFullHash);
89 return length == 0;
92 void SafeBrowsingProtocolParser::FormatGetHash(
93 const std::vector<SBPrefix>& prefixes, std::string* request) {
94 DCHECK(request);
96 // Format the request for GetHash.
97 request->append(base::StringPrintf("%" PRIuS ":%" PRIuS "\n",
98 sizeof(SBPrefix),
99 sizeof(SBPrefix) * prefixes.size()));
100 for (size_t i = 0; i < prefixes.size(); ++i) {
101 request->append(reinterpret_cast<const char*>(&prefixes[i]),
102 sizeof(SBPrefix));
106 bool SafeBrowsingProtocolParser::ParseUpdate(
107 const char* chunk_data,
108 int chunk_len,
109 int* next_update_sec,
110 bool* reset,
111 std::vector<SBChunkDelete>* deletes,
112 std::vector<ChunkUrl>* chunk_urls) {
113 DCHECK(next_update_sec);
114 DCHECK(deletes);
115 DCHECK(chunk_urls);
117 int length = chunk_len;
118 const char* data = chunk_data;
120 // Populated below.
121 std::string list_name;
123 while (length > 0) {
124 std::string cmd_line;
125 if (!GetLine(data, length, &cmd_line))
126 return false; // Error: bad list format!
128 std::vector<std::string> cmd_parts;
129 base::SplitString(cmd_line, ':', &cmd_parts);
130 if (cmd_parts.empty())
131 return false;
132 const std::string& command = cmd_parts[0];
133 if (cmd_parts.size() != 2 && command[0] != 'u')
134 return false;
136 const int consumed = static_cast<int>(cmd_line.size()) + 1;
137 data += consumed;
138 length -= consumed;
139 if (length < 0)
140 return false; // Parsing error.
142 // Differentiate on the first character of the command (which is usually
143 // only one character, with the exception of the 'ad' and 'sd' commands).
144 switch (command[0]) {
145 case 'a':
146 case 's': {
147 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
148 // have also parsed the list name before getting here, or the add-del
149 // or sub-del will have no context.
150 if (command.size() != 2 || command[1] != 'd' || list_name.empty())
151 return false;
152 SBChunkDelete chunk_delete;
153 chunk_delete.is_sub_del = command[0] == 's';
154 StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
155 chunk_delete.list_name = list_name;
156 deletes->push_back(chunk_delete);
157 break;
160 case 'i':
161 // The line providing the name of the list (i.e. 'goog-phish-shavar').
162 list_name = cmd_parts[1];
163 break;
165 case 'n':
166 // The line providing the next earliest time (in seconds) to re-query.
167 *next_update_sec = atoi(cmd_parts[1].c_str());
168 break;
170 case 'u': {
171 ChunkUrl chunk_url;
172 chunk_url.url = cmd_line.substr(2); // Skip the initial "u:".
173 chunk_url.list_name = list_name;
174 chunk_urls->push_back(chunk_url);
175 break;
178 case 'r':
179 if (cmd_parts[1] != "pleasereset")
180 return false;
181 *reset = true;
182 break;
184 default:
185 // According to the spec, we ignore commands we don't understand.
186 break;
190 return true;
193 bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
194 const char* data,
195 int length,
196 SBChunkList* chunks) {
197 int remaining = length;
198 const char* chunk_data = data;
200 while (remaining > 0) {
201 std::string cmd_line;
202 if (!GetLine(chunk_data, remaining, &cmd_line))
203 return false; // Error: bad chunk format!
205 const int line_len = static_cast<int>(cmd_line.length()) + 1;
206 chunk_data += line_len;
207 remaining -= line_len;
208 std::vector<std::string> cmd_parts;
209 base::SplitString(cmd_line, ':', &cmd_parts);
210 if (cmd_parts.size() != 4) {
211 return false;
214 // Process the chunk data.
215 const int chunk_number = atoi(cmd_parts[1].c_str());
216 const int hash_len = atoi(cmd_parts[2].c_str());
217 if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
218 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
219 return false;
222 const int chunk_len = atoi(cmd_parts[3].c_str());
224 if (remaining < chunk_len)
225 return false; // parse error.
227 chunks->push_back(SBChunk());
228 chunks->back().chunk_number = chunk_number;
230 if (cmd_parts[0] == "a") {
231 chunks->back().is_add = true;
232 if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
233 &chunks->back().hosts))
234 return false; // Parse error.
235 } else if (cmd_parts[0] == "s") {
236 chunks->back().is_add = false;
237 if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
238 &chunks->back().hosts))
239 return false; // Parse error.
240 } else {
241 NOTREACHED();
242 return false;
245 chunk_data += chunk_len;
246 remaining -= chunk_len;
247 DCHECK_LE(0, remaining);
250 DCHECK(remaining == 0);
252 return true;
255 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
256 const char* data,
257 int data_len,
258 int hash_len,
259 std::deque<SBChunkHost>* hosts) {
260 const char* chunk_data = data;
261 int remaining = data_len;
262 int prefix_count;
263 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
264 SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;
266 if (list_name == safe_browsing_util::kBinHashList ||
267 list_name == safe_browsing_util::kDownloadWhiteList ||
268 list_name == safe_browsing_util::kExtensionBlacklist ||
269 list_name == safe_browsing_util::kIPBlacklist) {
270 // These lists only contain prefixes, no HOSTKEY and COUNT.
271 DCHECK_EQ(0, remaining % hash_len);
272 prefix_count = remaining / hash_len;
273 SBChunkHost chunk_host;
274 chunk_host.host = 0;
275 chunk_host.entry = SBEntry::Create(type, prefix_count);
276 hosts->push_back(chunk_host);
277 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
278 prefix_count)) {
279 DVLOG(2) << "Unable to read chunk data for list: " << list_name;
280 return false;
282 DCHECK_GE(remaining, 0);
283 } else {
284 SBPrefix host;
285 const int min_size = sizeof(SBPrefix) + 1;
286 while (remaining >= min_size) {
287 if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
288 &host, &prefix_count)) {
289 return false;
291 DCHECK_GE(remaining, 0);
292 SBChunkHost chunk_host;
293 chunk_host.host = host;
294 chunk_host.entry = SBEntry::Create(type, prefix_count);
295 hosts->push_back(chunk_host);
296 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
297 prefix_count))
298 return false;
299 DCHECK_GE(remaining, 0);
302 return remaining == 0;
305 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
306 const char* data,
307 int data_len,
308 int hash_len,
309 std::deque<SBChunkHost>* hosts) {
310 int remaining = data_len;
311 const char* chunk_data = data;
312 int prefix_count;
313 SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
314 SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;
316 if (list_name == safe_browsing_util::kBinHashList ||
317 list_name == safe_browsing_util::kDownloadWhiteList ||
318 list_name == safe_browsing_util::kExtensionBlacklist ||
319 list_name == safe_browsing_util::kIPBlacklist) {
320 SBChunkHost chunk_host;
321 // Set host to 0 and it won't be used for kBinHashList.
322 chunk_host.host = 0;
323 // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
324 // and COUNT. |add_chunk_number| is int32.
325 prefix_count = remaining / (sizeof(int32) + hash_len);
326 chunk_host.entry = SBEntry::Create(type, prefix_count);
327 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
328 return false;
329 DCHECK_GE(remaining, 0);
330 hosts->push_back(chunk_host);
331 } else {
332 SBPrefix host;
333 const int min_size = 2 * sizeof(SBPrefix) + 1;
334 while (remaining >= min_size) {
335 if (!ReadHostAndPrefixCount(&chunk_data, &remaining,
336 &host, &prefix_count)) {
337 return false;
339 DCHECK_GE(remaining, 0);
340 SBChunkHost chunk_host;
341 chunk_host.host = host;
342 chunk_host.entry = SBEntry::Create(type, prefix_count);
343 hosts->push_back(chunk_host);
344 if (prefix_count == 0) {
345 // There is only an add chunk number (no prefixes).
346 int chunk_id;
347 if (!ReadChunkId(&chunk_data, &remaining, &chunk_id))
348 return false;
349 DCHECK_GE(remaining, 0);
350 chunk_host.entry->set_chunk_id(chunk_id);
351 continue;
353 if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
354 prefix_count))
355 return false;
356 DCHECK_GE(remaining, 0);
359 return remaining == 0;
362 bool SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
363 const char** data, int* remaining, SBPrefix* host, int* count) {
364 if (static_cast<size_t>(*remaining) < sizeof(SBPrefix) + 1)
365 return false;
366 // Next 4 bytes are the host prefix.
367 memcpy(host, *data, sizeof(SBPrefix));
368 *data += sizeof(SBPrefix);
369 *remaining -= sizeof(SBPrefix);
371 // Next 1 byte is the prefix count (could be zero, but never negative).
372 *count = static_cast<unsigned char>(**data);
373 *data += 1;
374 *remaining -= 1;
375 DCHECK_GE(*remaining, 0);
376 return true;
379 bool SafeBrowsingProtocolParser::ReadChunkId(
380 const char** data, int* remaining, int* chunk_id) {
381 // Protocol says four bytes, not sizeof(int). Make sure those
382 // values are the same.
383 DCHECK_EQ(sizeof(*chunk_id), 4u);
384 if (static_cast<size_t>(*remaining) < sizeof(*chunk_id))
385 return false;
386 memcpy(chunk_id, *data, sizeof(*chunk_id));
387 *data += sizeof(*chunk_id);
388 *remaining -= sizeof(*chunk_id);
389 *chunk_id = base::HostToNet32(*chunk_id);
390 DCHECK_GE(*remaining, 0);
391 return true;
394 bool SafeBrowsingProtocolParser::ReadPrefixes(
395 const char** data, int* remaining, SBEntry* entry, int count) {
396 int hash_len = entry->HashLen();
397 for (int i = 0; i < count; ++i) {
398 if (entry->IsSub()) {
399 int chunk_id;
400 if (!ReadChunkId(data, remaining, &chunk_id))
401 return false;
402 DCHECK_GE(*remaining, 0);
403 entry->SetChunkIdAtPrefix(i, chunk_id);
406 if (*remaining < hash_len)
407 return false;
408 if (entry->IsPrefix()) {
409 SBPrefix prefix;
410 DCHECK_EQ(hash_len, (int)sizeof(prefix));
411 memcpy(&prefix, *data, sizeof(prefix));
412 entry->SetPrefixAt(i, prefix);
413 } else {
414 SBFullHash hash;
415 DCHECK_EQ(hash_len, (int)sizeof(hash));
416 memcpy(&hash, *data, sizeof(hash));
417 entry->SetFullHashAt(i, hash);
419 *data += hash_len;
420 *remaining -= hash_len;
421 DCHECK_GE(*remaining, 0);
424 return true;