1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
9 #include "base/format_macros.h"
10 #include "base/logging.h"
11 #include "base/strings/string_split.h"
12 #include "base/strings/stringprintf.h"
13 #include "base/sys_byteorder.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/safe_browsing/protocol_parser.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
19 // Helper function for quick scans of a line oriented protocol. Note that we use
20 // std::string::assign(const charT* s, size_type n)
21 // to copy data into 'line'. This form of 'assign' does not call strlen on
22 // 'input', which is binary data and is not NULL terminated. 'input' may also
23 // contain valid NULL bytes in the payload, which a strlen based copy would
25 bool GetLine(const char* input
, int input_len
, std::string
* line
) {
26 const char* pos
= input
;
27 while (pos
&& (pos
- input
< input_len
)) {
29 line
->assign(input
, pos
- input
);
38 //------------------------------------------------------------------------------
39 // SafeBrowsingParser implementation
41 SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
44 bool SafeBrowsingProtocolParser::ParseGetHash(
45 const char* chunk_data
,
47 std::vector
<SBFullHashResult
>* full_hashes
) {
49 int length
= chunk_len
;
50 const char* data
= chunk_data
;
55 if (!GetLine(data
, length
, &line
))
58 offset
= static_cast<int>(line
.size()) + 1;
62 std::vector
<std::string
> cmd_parts
;
63 base::SplitString(line
, ':', &cmd_parts
);
64 if (cmd_parts
.size() != 3)
67 SBFullHashResult full_hash
;
68 full_hash
.list_id
= safe_browsing_util::GetListId(cmd_parts
[0]);
69 // Ignore cmd_parts[1] (add_chunk_id), as we no longer use it with SB 2.3
71 int full_hash_len
= atoi(cmd_parts
[2].c_str());
73 if (full_hash_len
< 0 || full_hash_len
> length
)
76 // Ignore hash results from lists we don't recognize.
77 if (full_hash
.list_id
< 0) {
78 data
+= full_hash_len
;
79 length
-= full_hash_len
;
83 while (static_cast<size_t>(full_hash_len
) >= sizeof(SBFullHash
)) {
84 memcpy(&full_hash
.hash
, data
, sizeof(SBFullHash
));
85 full_hashes
->push_back(full_hash
);
86 data
+= sizeof(SBFullHash
);
87 length
-= sizeof(SBFullHash
);
88 full_hash_len
-= sizeof(SBFullHash
);
95 void SafeBrowsingProtocolParser::FormatGetHash(
96 const std::vector
<SBPrefix
>& prefixes
, std::string
* request
) {
99 // Format the request for GetHash.
100 request
->append(base::StringPrintf("%" PRIuS
":%" PRIuS
"\n",
102 sizeof(SBPrefix
) * prefixes
.size()));
103 for (size_t i
= 0; i
< prefixes
.size(); ++i
) {
104 request
->append(reinterpret_cast<const char*>(&prefixes
[i
]),
109 bool SafeBrowsingProtocolParser::ParseUpdate(
110 const char* chunk_data
,
112 int* next_update_sec
,
114 std::vector
<SBChunkDelete
>* deletes
,
115 std::vector
<ChunkUrl
>* chunk_urls
) {
116 DCHECK(next_update_sec
);
120 int length
= chunk_len
;
121 const char* data
= chunk_data
;
124 std::string list_name
;
127 std::string cmd_line
;
128 if (!GetLine(data
, length
, &cmd_line
))
129 return false; // Error: bad list format!
131 std::vector
<std::string
> cmd_parts
;
132 base::SplitString(cmd_line
, ':', &cmd_parts
);
133 if (cmd_parts
.empty())
135 const std::string
& command
= cmd_parts
[0];
136 if (cmd_parts
.size() != 2 && command
[0] != 'u')
139 const int consumed
= static_cast<int>(cmd_line
.size()) + 1;
143 return false; // Parsing error.
145 // Differentiate on the first character of the command (which is usually
146 // only one character, with the exception of the 'ad' and 'sd' commands).
147 switch (command
[0]) {
150 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
151 // have also parsed the list name before getting here, or the add-del
152 // or sub-del will have no context.
153 if (command
.size() != 2 || command
[1] != 'd' || list_name
.empty())
155 SBChunkDelete chunk_delete
;
156 chunk_delete
.is_sub_del
= command
[0] == 's';
157 StringToRanges(cmd_parts
[1], &chunk_delete
.chunk_del
);
158 chunk_delete
.list_name
= list_name
;
159 deletes
->push_back(chunk_delete
);
164 // The line providing the name of the list (i.e. 'goog-phish-shavar').
165 list_name
= cmd_parts
[1];
169 // The line providing the next earliest time (in seconds) to re-query.
170 *next_update_sec
= atoi(cmd_parts
[1].c_str());
175 chunk_url
.url
= cmd_line
.substr(2); // Skip the initial "u:".
176 chunk_url
.list_name
= list_name
;
177 chunk_urls
->push_back(chunk_url
);
182 if (cmd_parts
[1] != "pleasereset")
188 // According to the spec, we ignore commands we don't understand.
196 bool SafeBrowsingProtocolParser::ParseChunk(const std::string
& list_name
,
199 SBChunkList
* chunks
) {
200 int remaining
= length
;
201 const char* chunk_data
= data
;
203 while (remaining
> 0) {
204 std::string cmd_line
;
205 if (!GetLine(chunk_data
, remaining
, &cmd_line
))
206 return false; // Error: bad chunk format!
208 const int line_len
= static_cast<int>(cmd_line
.length()) + 1;
209 chunk_data
+= line_len
;
210 remaining
-= line_len
;
211 std::vector
<std::string
> cmd_parts
;
212 base::SplitString(cmd_line
, ':', &cmd_parts
);
213 if (cmd_parts
.size() != 4) {
217 // Process the chunk data.
218 const int chunk_number
= atoi(cmd_parts
[1].c_str());
219 const int hash_len
= atoi(cmd_parts
[2].c_str());
220 if (hash_len
!= sizeof(SBPrefix
) && hash_len
!= sizeof(SBFullHash
)) {
221 VLOG(1) << "ParseChunk got unknown hashlen " << hash_len
;
225 const int chunk_len
= atoi(cmd_parts
[3].c_str());
227 if (chunk_len
< 0 || chunk_len
> remaining
)
228 return false; // parse error.
230 chunks
->push_back(SBChunk());
231 chunks
->back().chunk_number
= chunk_number
;
233 if (cmd_parts
[0] == "a") {
234 chunks
->back().is_add
= true;
235 if (!ParseAddChunk(list_name
, chunk_data
, chunk_len
, hash_len
,
236 &chunks
->back().hosts
))
237 return false; // Parse error.
238 } else if (cmd_parts
[0] == "s") {
239 chunks
->back().is_add
= false;
240 if (!ParseSubChunk(list_name
, chunk_data
, chunk_len
, hash_len
,
241 &chunks
->back().hosts
))
242 return false; // Parse error.
248 chunk_data
+= chunk_len
;
249 remaining
-= chunk_len
;
250 DCHECK_LE(0, remaining
);
253 DCHECK(remaining
== 0);
258 bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string
& list_name
,
262 std::deque
<SBChunkHost
>* hosts
) {
263 const char* chunk_data
= data
;
264 int remaining
= data_len
;
266 SBEntry::Type type
= hash_len
== sizeof(SBPrefix
) ?
267 SBEntry::ADD_PREFIX
: SBEntry::ADD_FULL_HASH
;
269 if (list_name
== safe_browsing_util::kDownloadWhiteList
||
270 list_name
== safe_browsing_util::kExtensionBlacklist
||
271 list_name
== safe_browsing_util::kIPBlacklist
) {
272 // These lists only contain prefixes, no HOSTKEY and COUNT.
273 DCHECK_EQ(0, remaining
% hash_len
);
274 prefix_count
= remaining
/ hash_len
;
275 SBChunkHost chunk_host
;
277 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
278 hosts
->push_back(chunk_host
);
279 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
,
281 DVLOG(2) << "Unable to read chunk data for list: " << list_name
;
284 DCHECK_GE(remaining
, 0);
287 const int min_size
= sizeof(SBPrefix
) + 1;
288 while (remaining
>= min_size
) {
289 if (!ReadHostAndPrefixCount(&chunk_data
, &remaining
,
290 &host
, &prefix_count
)) {
293 DCHECK_GE(remaining
, 0);
294 SBChunkHost chunk_host
;
295 chunk_host
.host
= host
;
296 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
297 hosts
->push_back(chunk_host
);
298 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
,
301 DCHECK_GE(remaining
, 0);
304 return remaining
== 0;
307 bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string
& list_name
,
311 std::deque
<SBChunkHost
>* hosts
) {
312 int remaining
= data_len
;
313 const char* chunk_data
= data
;
315 SBEntry::Type type
= hash_len
== sizeof(SBPrefix
) ?
316 SBEntry::SUB_PREFIX
: SBEntry::SUB_FULL_HASH
;
318 if (list_name
== safe_browsing_util::kDownloadWhiteList
||
319 list_name
== safe_browsing_util::kExtensionBlacklist
||
320 list_name
== safe_browsing_util::kIPBlacklist
) {
321 SBChunkHost chunk_host
;
322 // Set host to 0 and it won't be used.
324 // lists only contain (add_chunk_number, prefix) pairs, no HOSTKEY
325 // and COUNT. |add_chunk_number| is int32.
326 prefix_count
= remaining
/ (sizeof(int32
) + hash_len
);
327 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
328 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
, prefix_count
))
330 DCHECK_GE(remaining
, 0);
331 hosts
->push_back(chunk_host
);
334 const int min_size
= 2 * sizeof(SBPrefix
) + 1;
335 while (remaining
>= min_size
) {
336 if (!ReadHostAndPrefixCount(&chunk_data
, &remaining
,
337 &host
, &prefix_count
)) {
340 DCHECK_GE(remaining
, 0);
341 SBChunkHost chunk_host
;
342 chunk_host
.host
= host
;
343 chunk_host
.entry
= SBEntry::Create(type
, prefix_count
);
344 hosts
->push_back(chunk_host
);
345 if (prefix_count
== 0) {
346 // There is only an add chunk number (no prefixes).
348 if (!ReadChunkId(&chunk_data
, &remaining
, &chunk_id
))
350 DCHECK_GE(remaining
, 0);
351 chunk_host
.entry
->set_chunk_id(chunk_id
);
354 if (!ReadPrefixes(&chunk_data
, &remaining
, chunk_host
.entry
,
357 DCHECK_GE(remaining
, 0);
360 return remaining
== 0;
363 bool SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
364 const char** data
, int* remaining
, SBPrefix
* host
, int* count
) {
365 if (static_cast<size_t>(*remaining
) < sizeof(SBPrefix
) + 1)
367 // Next 4 bytes are the host prefix.
368 memcpy(host
, *data
, sizeof(SBPrefix
));
369 *data
+= sizeof(SBPrefix
);
370 *remaining
-= sizeof(SBPrefix
);
372 // Next 1 byte is the prefix count (could be zero, but never negative).
373 *count
= static_cast<unsigned char>(**data
);
376 DCHECK_GE(*remaining
, 0);
380 bool SafeBrowsingProtocolParser::ReadChunkId(
381 const char** data
, int* remaining
, int* chunk_id
) {
382 // Protocol says four bytes, not sizeof(int). Make sure those
383 // values are the same.
384 DCHECK_EQ(sizeof(*chunk_id
), 4u);
385 if (static_cast<size_t>(*remaining
) < sizeof(*chunk_id
))
387 memcpy(chunk_id
, *data
, sizeof(*chunk_id
));
388 *data
+= sizeof(*chunk_id
);
389 *remaining
-= sizeof(*chunk_id
);
390 *chunk_id
= base::HostToNet32(*chunk_id
);
391 DCHECK_GE(*remaining
, 0);
395 bool SafeBrowsingProtocolParser::ReadPrefixes(
396 const char** data
, int* remaining
, SBEntry
* entry
, int count
) {
397 int hash_len
= entry
->HashLen();
398 for (int i
= 0; i
< count
; ++i
) {
399 if (entry
->IsSub()) {
401 if (!ReadChunkId(data
, remaining
, &chunk_id
))
403 DCHECK_GE(*remaining
, 0);
404 entry
->SetChunkIdAtPrefix(i
, chunk_id
);
407 if (*remaining
< hash_len
)
409 if (entry
->IsPrefix()) {
411 DCHECK_EQ(hash_len
, (int)sizeof(prefix
));
412 memcpy(&prefix
, *data
, sizeof(prefix
));
413 entry
->SetPrefixAt(i
, prefix
);
416 DCHECK_EQ(hash_len
, (int)sizeof(hash
));
417 memcpy(&hash
, *data
, sizeof(hash
));
418 entry
->SetFullHashAt(i
, hash
);
421 *remaining
-= hash_len
;
422 DCHECK_GE(*remaining
, 0);