chrome/browser/safe_browsing/protocol_parser.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4 //
   5 // Parse the data returned from the SafeBrowsing v2.1 protocol response.
   6
   7 // TODOv3(shess): Review these changes carefully.
   8
   9 #include <stdlib.h>
  10
  11 #include "base/format_macros.h"
  12 #include "base/logging.h"
  13 #include "base/strings/string_number_conversions.h"
  14 #include "base/strings/string_split.h"
  15 #include "base/strings/stringprintf.h"
  16 #include "base/sys_byteorder.h"
  17 #include "base/time/time.h"
  18 #include "build/build_config.h"
  19 #include "chrome/browser/safe_browsing/protocol_parser.h"
  20 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
  21
  22 namespace {
  23
  24 // Helper class for scanning a buffer.
  25 class BufferReader {
  26  public:
  27   BufferReader(const char* data, size_t length)
  28       : data_(data),
  29         length_(length) {
  30   }
  31
  32   // Return info about remaining buffer data.
  33   size_t length() const {
  34     return length_;
  35   }
  36   const char* data() const {
  37     return data_;
  38   }
  39   bool empty() const {
  40     return length_ == 0;
  41   }
  42
  43   // Remove |l| characters from the buffer.
  44   void Advance(size_t l) {
  45     DCHECK_LE(l, length());
  46     data_ += l;
  47     length_ -= l;
  48   }
  49
  50   // Get a reference to data in the buffer.
  51   // TODO(shess): I'm not sure I like this.  Fill out a StringPiece instead?
  52   bool RefData(const void** pptr, size_t l) {
  53     if (length() < l) {
  54       Advance(length());  // poison
  55       return false;
  56     }
  57
  58     *pptr = data();
  59     Advance(l);
  60     return true;
  61   }
  62
  63   // Copy data out of the buffer.
  64   bool GetData(void* ptr, size_t l) {
  65     const void* buf_ptr;
  66     if (!RefData(&buf_ptr, l))
  67       return false;
  68
  69     memcpy(ptr, buf_ptr, l);
  70     return true;
  71   }
  72
  73   // Read a 32-bit integer in network byte order into a local uint32.
  74   bool GetNet32(uint32* i) {
  75     if (!GetData(i, sizeof(*i)))
  76       return false;
  77
  78     *i = base::NetToHost32(*i);
  79     return true;
  80   }
  81
  82   // Returns false if there is no data, otherwise fills |*line| with a reference
  83   // to the next line of data in the buffer.
  84   bool GetLine(base::StringPiece* line) {
  85     if (!length_)
  86       return false;
  87
  88     // Find the end of the line, or the end of the input.
  89     size_t eol = 0;
  90     while (eol < length_ && data_[eol] != '\n') {
  91       ++eol;
  92     }
  93     line->set(data_, eol);
  94     Advance(eol);
  95
  96     // Skip the newline if present.
  97     if (length_ && data_[0] == '\n')
  98       Advance(1);
  99
 100     return true;
 101   }
 102
 103   // Read out |c| colon-separated pieces from the next line.  The resulting
 104   // pieces point into the original data buffer.
 105   bool GetPieces(size_t c, std::vector<base::StringPiece>* pieces) {
 106     base::StringPiece line;
 107     if (!GetLine(&line))
 108       return false;
 109
 110     // Find the parts separated by ':'.
 111     while (pieces->size() + 1 < c) {
 112       size_t colon_ofs = line.find(':');
 113       if (colon_ofs == base::StringPiece::npos) {
 114         Advance(length_);
 115         return false;
 116       }
 117
 118       pieces->push_back(line.substr(0, colon_ofs));
 119       line.remove_prefix(colon_ofs + 1);
 120     }
 121
 122     // The last piece runs to the end of the line.
 123     pieces->push_back(line);
 124     return true;
 125   }
 126
 127  private:
 128   const char* data_;
 129   size_t length_;
 130
 131   DISALLOW_COPY_AND_ASSIGN(BufferReader);
 132 };
 133
 134 bool ParseGetHashMetadata(size_t hash_count,
 135                           BufferReader* reader,
 136                           std::vector<SBFullHashResult>* full_hashes) {
 137   for (size_t i = 0; i < hash_count; ++i) {
 138     base::StringPiece line;
 139     if (!reader->GetLine(&line))
 140       return false;
 141
 142     size_t meta_data_len;
 143     if (!base::StringToSizeT(line, &meta_data_len))
 144       return false;
 145
 146     const void* meta_data;
 147     if (!reader->RefData(&meta_data, meta_data_len))
 148       return false;
 149
 150     if (full_hashes) {
 151       (*full_hashes)[full_hashes->size() - hash_count + i].metadata.assign(
 152           reinterpret_cast<const char*>(meta_data), meta_data_len);
 153     }
 154   }
 155   return true;
 156 }
 157
 158 }  // namespace
 159
 160 namespace safe_browsing {
 161
 162 // BODY          = CACHELIFETIME LF HASHENTRY* EOF
 163 // CACHELIFETIME = DIGIT+
 164 // HASHENTRY     = LISTNAME ":" HASHSIZE ":" NUMRESPONSES [":m"] LF
 165 //                 HASHDATA (METADATALEN LF METADATA)*
 166 // HASHSIZE      = DIGIT+                  # Length of each full hash
 167 // NUMRESPONSES  = DIGIT+                  # Number of full hashes in HASHDATA
 168 // HASHDATA      = <HASHSIZE*NUMRESPONSES number of unsigned bytes>
 169 // METADATALEN   = DIGIT+
 170 // METADATA      = <METADATALEN number of unsigned bytes>
 171 bool ParseGetHash(const char* chunk_data,
 172                   size_t chunk_len,
 173                   base::TimeDelta* cache_lifetime,
 174                   std::vector<SBFullHashResult>* full_hashes) {
 175   full_hashes->clear();
 176   BufferReader reader(chunk_data, chunk_len);
 177
 178   // Parse out cache lifetime.
 179   {
 180     base::StringPiece line;
 181     if (!reader.GetLine(&line))
 182       return false;
 183
 184     int64_t cache_lifetime_seconds;
 185     if (!base::StringToInt64(line, &cache_lifetime_seconds))
 186       return false;
 187
 188     // TODO(shess): Zero also doesn't make sense, but isn't clearly forbidden,
 189     // either.  Maybe there should be a threshold involved.
 190     if (cache_lifetime_seconds < 0)
 191       return false;
 192
 193     *cache_lifetime = base::TimeDelta::FromSeconds(cache_lifetime_seconds);
 194   }
 195
 196   while (!reader.empty()) {
 197     std::vector<base::StringPiece> cmd_parts;
 198     if (!reader.GetPieces(3, &cmd_parts))
 199       return false;
 200
 201     SBFullHashResult full_hash;
 202     full_hash.list_id = safe_browsing_util::GetListId(cmd_parts[0]);
 203
 204     size_t hash_len;
 205     if (!base::StringToSizeT(cmd_parts[1], &hash_len))
 206       return false;
 207
 208     // TODO(shess): Is this possible?  If not, why the length present?
 209     if (hash_len != sizeof(SBFullHash))
 210       return false;
 211
 212     // Metadata is indicated by an optional ":m" at the end of the line.
 213     bool has_metadata = false;
 214     base::StringPiece hash_count_string = cmd_parts[2];
 215     size_t optional_colon = hash_count_string.find(':', 0);
 216     if (optional_colon != base::StringPiece::npos) {
 217       if (hash_count_string.substr(optional_colon) != ":m")
 218         return false;
 219       has_metadata = true;
 220       hash_count_string.remove_suffix(2);
 221     }
 222
 223     size_t hash_count;
 224     if (!base::StringToSizeT(hash_count_string, &hash_count))
 225       return false;
 226
 227     if (hash_len * hash_count > reader.length())
 228       return false;
 229
 230     // Ignore hash results from lists we don't recognize.
 231     if (full_hash.list_id < 0) {
 232       reader.Advance(hash_len * hash_count);
 233       if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, NULL))
 234         return false;
 235       continue;
 236     }
 237
 238     for (size_t i = 0; i < hash_count; ++i) {
 239       if (!reader.GetData(&full_hash.hash, hash_len))
 240         return false;
 241       full_hashes->push_back(full_hash);
 242     }
 243
 244     if (has_metadata && !ParseGetHashMetadata(hash_count, &reader, full_hashes))
 245       return false;
 246   }
 247
 248   return reader.empty();
 249 }
 250
 251 // BODY       = HEADER LF PREFIXES EOF
 252 // HEADER     = PREFIXSIZE ":" LENGTH
 253 // PREFIXSIZE = DIGIT+         # Size of each prefix in bytes
 254 // LENGTH     = DIGIT+         # Size of PREFIXES in bytes
 255 std::string FormatGetHash(const std::vector<SBPrefix>& prefixes) {
 256   std::string request;
 257   request.append(base::Uint64ToString(sizeof(SBPrefix)));
 258   request.append(":");
 259   request.append(base::Uint64ToString(sizeof(SBPrefix) * prefixes.size()));
 260   request.append("\n");
 261
 262   // SBPrefix values are read without concern for byte order, so write back the
 263   // same way.
 264   for (size_t i = 0; i < prefixes.size(); ++i) {
 265     request.append(reinterpret_cast<const char*>(&prefixes[i]),
 266                    sizeof(SBPrefix));
 267   }
 268
 269   return request;
 270 }
 271
 272 bool ParseUpdate(const char* chunk_data,
 273                  size_t chunk_len,
 274                  size_t* next_update_sec,
 275                  bool* reset,
 276                  std::vector<SBChunkDelete>* deletes,
 277                  std::vector<ChunkUrl>* chunk_urls) {
 278   DCHECK(next_update_sec);
 279   DCHECK(deletes);
 280   DCHECK(chunk_urls);
 281
 282   BufferReader reader(chunk_data, chunk_len);
 283
 284   // Populated below.
 285   std::string list_name;
 286
 287   while (!reader.empty()) {
 288     std::vector<base::StringPiece> pieces;
 289     if (!reader.GetPieces(2, &pieces))
 290       return false;
 291
 292     base::StringPiece& command = pieces[0];
 293
 294     // Differentiate on the first character of the command (which is usually
 295     // only one character, with the exception of the 'ad' and 'sd' commands).
 296     switch (command[0]) {
 297       case 'a':
 298       case 's': {
 299         // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
 300         // have also parsed the list name before getting here, or the add-del
 301         // or sub-del will have no context.
 302         if (list_name.empty() || (command != "ad" && command != "sd"))
 303           return false;
 304         SBChunkDelete chunk_delete;
 305         chunk_delete.is_sub_del = command[0] == 's';
 306         StringToRanges(pieces[1].as_string(), &chunk_delete.chunk_del);
 307         chunk_delete.list_name = list_name;
 308         deletes->push_back(chunk_delete);
 309         break;
 310       }
 311
 312       case 'i':
 313         // The line providing the name of the list (i.e. 'goog-phish-shavar').
 314         list_name = pieces[1].as_string();
 315         break;
 316
 317       case 'n':
 318         // The line providing the next earliest time (in seconds) to re-query.
 319         if (!base::StringToSizeT(pieces[1], next_update_sec))
 320           return false;
 321         break;
 322
 323       case 'u': {
 324         ChunkUrl chunk_url;
 325         chunk_url.url = pieces[1].as_string();  // Skip the initial "u:".
 326         chunk_url.list_name = list_name;
 327         chunk_urls->push_back(chunk_url);
 328         break;
 329       }
 330
 331       case 'r':
 332         if (pieces[1] != "pleasereset")
 333           return false;
 334         *reset = true;
 335         break;
 336
 337       default:
 338         // According to the spec, we ignore commands we don't understand.
 339         // TODO(shess): Does this apply to r:unknown or n:not-integer?
 340         break;
 341     }
 342   }
 343
 344   return true;
 345 }
 346
 347 // BODY      = (UINT32 CHUNKDATA)+
 348 // UINT32    = Unsigned 32-bit integer in network byte order
 349 // CHUNKDATA = Encoded ChunkData protocol message
 350 bool ParseChunk(const char* data,
 351                 size_t length,
 352                 ScopedVector<SBChunkData>* chunks) {
 353   BufferReader reader(data, length);
 354
 355   while (!reader.empty()) {
 356     uint32 l = 0;
 357     if (!reader.GetNet32(&l) || l == 0 || l > reader.length())
 358       return false;
 359
 360     const void* p = NULL;
 361     if (!reader.RefData(&p, l))
 362       return false;
 363
 364     scoped_ptr<SBChunkData> chunk(new SBChunkData());
 365     if (!chunk->ParseFrom(reinterpret_cast<const unsigned char*>(p), l))
 366       return false;
 367
 368     chunks->push_back(chunk.release());
 369   }
 370
 371   DCHECK(reader.empty());
 372   return true;
 373 }
 374
 375 // LIST      = LISTNAME ";" LISTINFO (":" LISTINFO)*
 376 // LISTINFO  = CHUNKTYPE ":" CHUNKLIST
 377 // CHUNKTYPE = "a" | "s"
 378 // CHUNKLIST = (RANGE | NUMBER) ["," CHUNKLIST]
 379 // NUMBER    = DIGIT+
 380 // RANGE     = NUMBER "-" NUMBER
 381 std::string FormatList(const SBListChunkRanges& list) {
 382   std::string formatted_results = list.name;
 383   formatted_results.append(";");
 384
 385   if (!list.adds.empty())
 386     formatted_results.append("a:").append(list.adds);
 387   if (!list.adds.empty() && !list.subs.empty())
 388     formatted_results.append(":");
 389   if (!list.subs.empty())
 390     formatted_results.append("s:").append(list.subs);
 391   formatted_results.append("\n");
 392
 393   return formatted_results;
 394 }
 395
 396 }  // namespace safe_browsing