1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
9 #include "base/format_macros.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/sys_byteorder.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/safe_browsing/protocol_parser.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
19 // Helper function for quick scans of a line oriented protocol. Note that we use
20 // std::string::assign(const charT* s, size_type n)
21 // to copy data into 'line'. This form of 'assign' does not call strlen on
22 // 'input', which is binary data and is not NULL terminated. 'input' may also
23 // contain valid NULL bytes in the payload, which a strlen based copy would
25 bool GetLine(const char* input
, int input_len
, std::string
* line
) {
26 const char* pos
= input
;
27 while (pos
&& (pos
- input
< input_len
)) {
29 line
->assign(input
, pos
- input
);
38 //------------------------------------------------------------------------------
39 // SafeBrowsingParser implementation
41 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
44 bool SafeBrowsingProtocolParser::ParseGetHash(
45 const char* chunk_data
,
47 std::vector
<SBFullHashResult
>* full_hashes
) {
49 int length
= chunk_len
;
50 const char* data
= chunk_data
;
55 if (!GetLine(data
, length
, &line
))
58 offset
= static_cast<int>(line
.size()) + 1;
62 std::vector
<std::string
> cmd_parts
;
63 base::SplitString(line
, ':', &cmd_parts
);
64 if (cmd_parts
.size() != 3)
67 SBFullHashResult full_hash
;
68 full_hash
.list_name
= cmd_parts
[0];
69 full_hash
.add_chunk_id
= atoi(cmd_parts
[1].c_str());
70 int full_hash_len
= atoi(cmd_parts
[2].c_str());
72 // Ignore hash results from lists we don't recognize.
73 if (safe_browsing_util::GetListId(full_hash
.list_name
) < 0) {
74 data
+= full_hash_len
;
75 length
-= full_hash_len
;
79 while (full_hash_len
> 0) {
80 DCHECK(static_cast<size_t>(full_hash_len
) >= sizeof(SBFullHash
));
81 memcpy(&full_hash
.hash
, data
, sizeof(SBFullHash
));
82 full_hashes
->push_back(full_hash
);
83 data
+= sizeof(SBFullHash
);
84 length
-= sizeof(SBFullHash
);
85 full_hash_len
-= sizeof(SBFullHash
);
92 void SafeBrowsingProtocolParser::FormatGetHash(
93 const std::vector
<SBPrefix
>& prefixes
, std::string
* request
) {
96 // Format the request for GetHash.
97 request
->append(base::StringPrintf("%" PRIuS
":%" PRIuS
"\n",
99 sizeof(SBPrefix
) * prefixes
.size()));
100 for (size_t i
= 0; i
< prefixes
.size(); ++i
) {
101 request
->append(reinterpret_cast<const char*>(&prefixes
[i
]),
106 bool SafeBrowsingProtocolParser::ParseUpdate(
107 const char* chunk_data
,
109 int* next_update_sec
,
111 std::vector
<SBChunkDelete
>* deletes
,
112 std::vector
<ChunkUrl
>* chunk_urls
) {
113 DCHECK(next_update_sec
);
117 int length
= chunk_len
;
118 const char* data
= chunk_data
;
121 std::string list_name
;
124 std::string cmd_line
;
125 if (!GetLine(data
, length
, &cmd_line
))
126 return false; // Error: bad list format!
128 std::vector
<std::string
> cmd_parts
;
129 base::SplitString(cmd_line
, ':', &cmd_parts
);
130 if (cmd_parts
.empty())
132 const std::string
& command
= cmd_parts
[0];
133 if (cmd_parts
.size() != 2 && command
[0] != 'u')
136 const int consumed
= static_cast<int>(cmd_line
.size()) + 1;
140 return false; // Parsing error.
142 // Differentiate on the first character of the command (which is usually
143 // only one character, with the exception of the 'ad' and 'sd' commands).
144 switch (command
[0]) {
147 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
148 // have also parsed the list name before getting here, or the add-del
149 // or sub-del will have no context.
150 if (command
.size() != 2 || command
[1] != 'd' || list_name
.empty())
152 SBChunkDelete chunk_delete
;
153 chunk_delete
.is_sub_del
= command
[0] == 's';
154 StringToRanges(cmd_parts
[1], &chunk_delete
.chunk_del
);
155 chunk_delete
.list_name
= list_name
;
156 deletes
->push_back(chunk_delete
);
161 // The line providing the name of the list (i.e. 'goog-phish-shavar').
162 list_name
= cmd_parts
[1];
166 // The line providing the next earliest time (in seconds) to re-query.
167 *next_update_sec
= atoi(cmd_parts
[1].c_str());
172 chunk_url
.url
= cmd_line
.substr(2); // Skip the initial "u:".
173 chunk_url
.list_name
= list_name
;
174 chunk_urls
->push_back(chunk_url
);
179 if (cmd_parts
[1] != "pleasereset")
185 // According to the spec, we ignore commands we don't understand.
193 bool SafeBrowsingProtocolParser::ParseChunk(const std::string
& list_name
,
196 SBChunkList
* chunks
) {
197 int remaining
= length
;
198 const char* chunk_data
= data
;
200 while (remaining
> 0) {
201 std::string cmd_line
;
202 if (!GetLine(chunk_data
, remaining
, &cmd_line
))
203 return false; // Error: bad chunk format!
205 const int line_len
= static_cast<int>(cmd_line
.length()) + 1;
206 chunk_data
+= line_len
;
207 remaining
-= line_len
;
208 std::vector
<std::string
> cmd_parts
;
209 base::SplitString(cmd_line
, ':', &cmd_parts
);
210 if (cmd_parts
.size() != 4) {
214 // Process the chunk data.
215 const int chunk_number
= atoi(cmd_parts
[1].c_str());
216 const int hash_len
= atoi(cmd_parts
[2].c_str());
217 if (hash_len
!= sizeof(SBPrefix
) && hash_len
!= sizeof(SBFullHash
)) {
218 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len
;
222 const int chunk_len
= atoi(cmd_parts
[3].c_str());
224 if (remaining
< chunk_len
)
225 return false; // parse error.
227 chunks
->push_back(SBChunk());
228 chunks
->back().chunk_number
= chunk_number
;
230 if (cmd_parts
[0] == "a") {
231 chunks
->back().is_add
= true;
232 if (!ParseAddChunk(list_name
, chunk_data
, chunk_len
, hash_len
,
233 &chunks
->back().hosts
))
234 return false; // Parse error.
235 } else if (cmd_parts
[0] == "s") {
236 chunks
->back().is_add
= false;
237 if (!ParseSubChunk(list_name
, chunk_data
, chunk_len
, hash_len
,
238 &chunks
->back().hosts
))
239 return false; // Parse error.
245 chunk_data
+= chunk_len
;
246 remaining
-= chunk_len
;
247 DCHECK_LE(0, remaining
);
250 DCHECK(remaining
== 0);
255 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string
& list_name
,
259 std::deque
<SBChunkHost
>* hosts
) {
260 const char* chunk_data
= data
;
261 int remaining
= data_len
;
263 SBEntry::Type type
= hash_len
== sizeof(SBPrefix
) ?
264 SBEntry::ADD_PREFIX
: SBEntry::ADD_FULL_HASH
;
266 if (list_name
== safe_browsing_util::kBinHashList
||
267 list_name
== safe_browsing_util::kDownloadWhiteList
||
268 list_name
== safe_browsing_util::kExtensionBlacklist
||
269 list_name
== safe_browsing_util::kIPBlacklist
) {
270 // These lists only contain prefixes, no HOSTKEY and COUNT.
271 DCHECK_EQ(0, remaining
% hash_len
);
272 prefix_count
= remaining
/ hash_len
;
273 SBChunkHost chunk_host
;
275 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
276 hosts
->push_back(chunk_host
);
277 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
,
279 DVLOG(2) << "Unable to read chunk data for list: " << list_name
;
282 DCHECK_GE(remaining
, 0);
285 const int min_size
= sizeof(SBPrefix
) + 1;
286 while (remaining
>= min_size
) {
287 if (!ReadHostAndPrefixCount(&chunk_data
, &remaining
,
288 &host
, &prefix_count
)) {
291 DCHECK_GE(remaining
, 0);
292 SBChunkHost chunk_host
;
293 chunk_host
.host
= host
;
294 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
295 hosts
->push_back(chunk_host
);
296 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
,
299 DCHECK_GE(remaining
, 0);
302 return remaining
== 0;
305 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string
& list_name
,
309 std::deque
<SBChunkHost
>* hosts
) {
310 int remaining
= data_len
;
311 const char* chunk_data
= data
;
313 SBEntry::Type type
= hash_len
== sizeof(SBPrefix
) ?
314 SBEntry::SUB_PREFIX
: SBEntry::SUB_FULL_HASH
;
316 if (list_name
== safe_browsing_util::kBinHashList
||
317 list_name
== safe_browsing_util::kDownloadWhiteList
||
318 list_name
== safe_browsing_util::kExtensionBlacklist
||
319 list_name
== safe_browsing_util::kIPBlacklist
) {
320 SBChunkHost chunk_host
;
321 // Set host to 0 and it won't be used for kBinHashList.
323 // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
324 // and COUNT. |add_chunk_number| is int32.
325 prefix_count
= remaining
/ (sizeof(int32
) + hash_len
);
326 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
327 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
, prefix_count
))
329 DCHECK_GE(remaining
, 0);
330 hosts
->push_back(chunk_host
);
333 const int min_size
= 2 * sizeof(SBPrefix
) + 1;
334 while (remaining
>= min_size
) {
335 if (!ReadHostAndPrefixCount(&chunk_data
, &remaining
,
336 &host
, &prefix_count
)) {
339 DCHECK_GE(remaining
, 0);
340 SBChunkHost chunk_host
;
341 chunk_host
.host
= host
;
342 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
343 hosts
->push_back(chunk_host
);
344 if (prefix_count
== 0) {
345 // There is only an add chunk number (no prefixes).
347 if (!ReadChunkId(&chunk_data
, &remaining
, &chunk_id
))
349 DCHECK_GE(remaining
, 0);
350 chunk_host
.entry
->set_chunk_id(chunk_id
);
353 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
,
356 DCHECK_GE(remaining
, 0);
359 return remaining
== 0;
362 bool SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
363 const char** data
, int* remaining
, SBPrefix
* host
, int* count
) {
364 if (static_cast<size_t>(*remaining
) < sizeof(SBPrefix
) + 1)
366 // Next 4 bytes are the host prefix.
367 memcpy(host
, *data
, sizeof(SBPrefix
));
368 *data
+= sizeof(SBPrefix
);
369 *remaining
-= sizeof(SBPrefix
);
371 // Next 1 byte is the prefix count (could be zero, but never negative).
372 *count
= static_cast<unsigned char>(**data
);
375 DCHECK_GE(*remaining
, 0);
379 bool SafeBrowsingProtocolParser::ReadChunkId(
380 const char** data
, int* remaining
, int* chunk_id
) {
381 // Protocol says four bytes, not sizeof(int). Make sure those
382 // values are the same.
383 DCHECK_EQ(sizeof(*chunk_id
), 4u);
384 if (static_cast<size_t>(*remaining
) < sizeof(*chunk_id
))
386 memcpy(chunk_id
, *data
, sizeof(*chunk_id
));
387 *data
+= sizeof(*chunk_id
);
388 *remaining
-= sizeof(*chunk_id
);
389 *chunk_id
= base::HostToNet32(*chunk_id
);
390 DCHECK_GE(*remaining
, 0);
394 bool SafeBrowsingProtocolParser::ReadPrefixes(
395 const char** data
, int* remaining
, SBEntry
* entry
, int count
) {
396 int hash_len
= entry
->HashLen();
397 for (int i
= 0; i
< count
; ++i
) {
398 if (entry
->IsSub()) {
400 if (!ReadChunkId(data
, remaining
, &chunk_id
))
402 DCHECK_GE(*remaining
, 0);
403 entry
->SetChunkIdAtPrefix(i
, chunk_id
);
406 if (*remaining
< hash_len
)
408 if (entry
->IsPrefix()) {
410 DCHECK_EQ(hash_len
, (int)sizeof(prefix
));
411 memcpy(&prefix
, *data
, sizeof(prefix
));
412 entry
->SetPrefixAt(i
, prefix
);
415 DCHECK_EQ(hash_len
, (int)sizeof(hash
));
416 memcpy(&hash
, *data
, sizeof(hash
));
417 entry
->SetFullHashAt(i
, hash
);
420 *remaining
-= hash_len
;
421 DCHECK_GE(*remaining
, 0);