1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
7 // TODOv3(shess): Review these changes carefully.
11 #include "base/format_macros.h"
12 #include "base/logging.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/stringprintf.h"
16 #include "base/sys_byteorder.h"
17 #include "base/time/time.h"
18 #include "build/build_config.h"
19 #include "chrome/browser/safe_browsing/protocol_parser.h"
20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
24 // Helper class for scanning a buffer.
27 BufferReader(const char* data
, size_t length
)
32 // Return info about remaining buffer data.
33 size_t length() const {
36 const char* data() const {
43 // Remove |l| characters from the buffer.
44 void Advance(size_t l
) {
45 DCHECK_LE(l
, length());
50 // Get a reference to data in the buffer.
51 // TODO(shess): I'm not sure I like this. Fill out a StringPiece instead?
52 bool RefData(const void** pptr
, size_t l
) {
54 Advance(length()); // poison
63 // Copy data out of the buffer.
64 bool GetData(void* ptr
, size_t l
) {
66 if (!RefData(&buf_ptr
, l
))
69 memcpy(ptr
, buf_ptr
, l
);
73 // Read a 32-bit integer in network byte order into a local uint32.
74 bool GetNet32(uint32
* i
) {
75 if (!GetData(i
, sizeof(*i
)))
78 *i
= base::NetToHost32(*i
);
82 // Returns false if there is no data, otherwise fills |*line| with a reference
83 // to the next line of data in the buffer.
84 bool GetLine(base::StringPiece
* line
) {
88 // Find the end of the line, or the end of the input.
90 while (eol
< length_
&& data_
[eol
] != '\n') {
93 line
->set(data_
, eol
);
96 // Skip the newline if present.
97 if (length_
&& data_
[0] == '\n')
103 // Read out |c| colon-separated pieces from the next line. The resulting
104 // pieces point into the original data buffer.
105 bool GetPieces(size_t c
, std::vector
<base::StringPiece
>* pieces
) {
106 base::StringPiece line
;
110 // Find the parts separated by ':'.
111 while (pieces
->size() + 1 < c
) {
112 size_t colon_ofs
= line
.find(':');
113 if (colon_ofs
== base::StringPiece::npos
) {
118 pieces
->push_back(line
.substr(0, colon_ofs
));
119 line
.remove_prefix(colon_ofs
+ 1);
122 // The last piece runs to the end of the line.
123 pieces
->push_back(line
);
131 DISALLOW_COPY_AND_ASSIGN(BufferReader
);
136 namespace safe_browsing
{
138 // BODY = CACHELIFETIME LF HASHENTRY* EOF
139 // CACHELIFETIME = DIGIT+
140 // HASHENTRY = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
141 // HASHDATA (METADATALEN LF METADATA)*
142 // HASHSIZE = DIGIT+ # Length of each full hash
143 // NUMRESPONSES = DIGIT+ # Number of full hashes in HASHDATA
144 // HASHDATA = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
145 // METADATALEN = DIGIT+
146 // METADATA = <METADATALEN number of unsigned bytes>
147 bool ParseGetHash(const char* chunk_data
,
149 base::TimeDelta
* cache_lifetime
,
150 std::vector
<SBFullHashResult
>* full_hashes
) {
151 full_hashes
->clear();
152 BufferReader
reader(chunk_data
, chunk_len
);
154 // Parse out cache lifetime.
156 base::StringPiece line
;
157 if (!reader
.GetLine(&line
))
160 int64_t cache_lifetime_seconds
;
161 if (!base::StringToInt64(line
, &cache_lifetime_seconds
))
164 // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
165 // either. Maybe there should be a threshold involved.
166 if (cache_lifetime_seconds
< 0)
169 *cache_lifetime
= base::TimeDelta::FromSeconds(cache_lifetime_seconds
);
172 while (!reader
.empty()) {
173 std::vector
<base::StringPiece
> cmd_parts
;
174 if (!reader
.GetPieces(3, &cmd_parts
))
177 SBFullHashResult full_hash
;
178 full_hash
.list_id
= safe_browsing_util::GetListId(cmd_parts
[0]);
181 if (!base::StringToSizeT(cmd_parts
[1], &hash_len
))
184 // TODO(shess): Is this possible? If not, why the length present?
185 if (hash_len
!= sizeof(SBFullHash
))
188 // Metadata is indicated by an optional ":m" at the end of the line.
189 bool has_metadata
= false;
190 base::StringPiece hash_count_string
= cmd_parts
[2];
191 size_t optional_colon
= hash_count_string
.find(':', 0);
192 if (optional_colon
!= base::StringPiece::npos
) {
193 if (hash_count_string
.substr(optional_colon
) != ":m")
196 hash_count_string
.remove_suffix(2);
200 if (!base::StringToSizeT(hash_count_string
, &hash_count
))
203 if (hash_len
* hash_count
> reader
.length())
206 // Ignore hash results from lists we don't recognize.
207 if (full_hash
.list_id
< 0) {
208 reader
.Advance(hash_len
* hash_count
);
212 for (size_t i
= 0; i
< hash_count
; ++i
) {
213 if (!reader
.GetData(&full_hash
.hash
, hash_len
))
215 full_hashes
->push_back(full_hash
);
218 // Discard the metadata for now.
220 for (size_t i
= 0; i
< hash_count
; ++i
) {
221 base::StringPiece line
;
222 if (!reader
.GetLine(&line
))
225 size_t meta_data_len
;
226 if (!base::StringToSizeT(line
, &meta_data_len
))
229 const void* meta_data
;
230 if (!reader
.RefData(&meta_data
, meta_data_len
))
236 return reader
.empty();
239 // BODY = HEADER LF PREFIXES EOF
240 // HEADER = PREFIXSIZE ":" LENGTH
241 // PREFIXSIZE = DIGIT+ # Size of each prefix in bytes
242 // LENGTH = DIGIT+ # Size of PREFIXES in bytes
243 std::string
FormatGetHash(const std::vector
<SBPrefix
>& prefixes
) {
245 request
.append(base::Uint64ToString(sizeof(SBPrefix
)));
247 request
.append(base::Uint64ToString(sizeof(SBPrefix
) * prefixes
.size()));
248 request
.append("\n");
250 // SBPrefix values are read without concern for byte order, so write back the
252 for (size_t i
= 0; i
< prefixes
.size(); ++i
) {
253 request
.append(reinterpret_cast<const char*>(&prefixes
[i
]),
260 bool ParseUpdate(const char* chunk_data
,
262 size_t* next_update_sec
,
264 std::vector
<SBChunkDelete
>* deletes
,
265 std::vector
<ChunkUrl
>* chunk_urls
) {
266 DCHECK(next_update_sec
);
270 BufferReader
reader(chunk_data
, chunk_len
);
273 std::string list_name
;
275 while (!reader
.empty()) {
276 std::vector
<base::StringPiece
> pieces
;
277 if (!reader
.GetPieces(2, &pieces
))
280 base::StringPiece
& command
= pieces
[0];
282 // Differentiate on the first character of the command (which is usually
283 // only one character, with the exception of the 'ad' and 'sd' commands).
284 switch (command
[0]) {
287 // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
288 // have also parsed the list name before getting here, or the add-del
289 // or sub-del will have no context.
290 if (list_name
.empty() || (command
!= "ad" && command
!= "sd"))
292 SBChunkDelete chunk_delete
;
293 chunk_delete
.is_sub_del
= command
[0] == 's';
294 StringToRanges(pieces
[1].as_string(), &chunk_delete
.chunk_del
);
295 chunk_delete
.list_name
= list_name
;
296 deletes
->push_back(chunk_delete
);
301 // The line providing the name of the list (i.e. 'goog-phish-shavar').
302 list_name
= pieces
[1].as_string();
306 // The line providing the next earliest time (in seconds) to re-query.
307 if (!base::StringToSizeT(pieces
[1], next_update_sec
))
313 chunk_url
.url
= pieces
[1].as_string(); // Skip the initial "u:".
314 chunk_url
.list_name
= list_name
;
315 chunk_urls
->push_back(chunk_url
);
320 if (pieces
[1] != "pleasereset")
326 // According to the spec, we ignore commands we don't understand.
327 // TODO(shess): Does this apply to r:unknown or n:not-integer?
335 // BODY = (UINT32 CHUNKDATA)+
336 // UINT32 = Unsigned 32-bit integer in network byte order
337 // CHUNKDATA = Encoded ChunkData protocol message
338 bool ParseChunk(const char* data
,
340 ScopedVector
<SBChunkData
>* chunks
) {
341 BufferReader
reader(data
, length
);
343 while (!reader
.empty()) {
345 if (!reader
.GetNet32(&l
) || l
== 0 || l
> reader
.length())
348 const void* p
= NULL
;
349 if (!reader
.RefData(&p
, l
))
352 scoped_ptr
<SBChunkData
> chunk(new SBChunkData());
353 if (!chunk
->ParseFrom(reinterpret_cast<const unsigned char*>(p
), l
))
356 chunks
->push_back(chunk
.release());
359 DCHECK(reader
.empty());
363 // LIST = LISTNAME ";" LISTINFO (":" LISTINFO)*
364 // LISTINFO = CHUNKTYPE ":" CHUNKLIST
365 // CHUNKTYPE = "a" | "s"
366 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
368 // RANGE = NUMBER "-" NUMBER
369 std::string
FormatList(const SBListChunkRanges
& list
) {
370 std::string formatted_results
= list
.name
;
371 formatted_results
.append(";");
373 if (!list
.adds
.empty())
374 formatted_results
.append("a:").append(list
.adds
);
375 if (!list
.adds
.empty() && !list
.subs
.empty())
376 formatted_results
.append(":");
377 if (!list
.subs
.empty())
378 formatted_results
.append("s:").append(list
.subs
);
379 formatted_results
.append("\n");
381 return formatted_results
;
384 } // namespace safe_browsing