Disable view source for Developer Tools.
[chromium-blink-merge.git] / chrome / browser / safe_browsing / safe_browsing_util.h
blobf9a12d75910df2a2138ef56f4ff67decc9147f54
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Utilities for the SafeBrowsing code.
7 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
8 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
10 #include <cstring>
11 #include <deque>
12 #include <set>
13 #include <string>
14 #include <vector>
16 #include "base/basictypes.h"
17 #include "chrome/browser/safe_browsing/chunk_range.h"
19 class GURL;
21 class SBEntry;
23 // A truncated hash's type.
24 typedef int32 SBPrefix;
26 // Container for holding a chunk URL and the list it belongs to.
27 struct ChunkUrl {
28 std::string url;
29 std::string list_name;
32 // A full hash.
33 union SBFullHash {
34 char full_hash[32];
35 SBPrefix prefix;
38 inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) {
39 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0;
42 inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) {
43 return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0;
46 // Container for information about a specific host in an add/sub chunk.
47 struct SBChunkHost {
48 SBPrefix host;
49 SBEntry* entry;
52 // Container for an add/sub chunk.
53 struct SBChunk {
54 SBChunk();
55 ~SBChunk();
57 int chunk_number;
58 int list_id;
59 bool is_add;
60 std::deque<SBChunkHost> hosts;
63 // Container for a set of chunks. Interim wrapper to replace use of
64 // |std::deque<SBChunk>| with something having safer memory semantics.
65 // management.
66 // TODO(shess): |SBEntry| is currently a very roundabout way to hold
67 // things pending storage. It could be replaced with the structures
68 // used in SafeBrowsingStore, then lots of bridging code could
69 // dissappear.
70 class SBChunkList {
71 public:
72 SBChunkList();
73 ~SBChunkList();
75 // Implement that subset of the |std::deque<>| interface which
76 // callers expect.
77 bool empty() const { return chunks_.empty(); }
78 size_t size() { return chunks_.size(); }
80 void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
81 SBChunk& back() { return chunks_.back(); }
82 SBChunk& front() { return chunks_.front(); }
83 const SBChunk& front() const { return chunks_.front(); }
85 typedef std::vector<SBChunk>::const_iterator const_iterator;
86 const_iterator begin() const { return chunks_.begin(); }
87 const_iterator end() const { return chunks_.end(); }
89 typedef std::vector<SBChunk>::iterator iterator;
90 iterator begin() { return chunks_.begin(); }
91 iterator end() { return chunks_.end(); }
93 SBChunk& operator[](size_t n) { return chunks_[n]; }
94 const SBChunk& operator[](size_t n) const { return chunks_[n]; }
96 // Calls |SBEvent::Destroy()| before clearing |chunks_|.
97 void clear();
99 private:
100 std::vector<SBChunk> chunks_;
102 DISALLOW_COPY_AND_ASSIGN(SBChunkList);
105 // Used when we get a gethash response.
106 struct SBFullHashResult {
107 SBFullHash hash;
108 std::string list_name;
109 int add_chunk_id;
112 // Contains information about a list in the database.
113 struct SBListChunkRanges {
114 explicit SBListChunkRanges(const std::string& n);
116 std::string name; // The list name.
117 std::string adds; // The ranges for add chunks.
118 std::string subs; // The ranges for sub chunks.
121 // Container for deleting chunks from the database.
122 struct SBChunkDelete {
123 SBChunkDelete();
124 ~SBChunkDelete();
126 std::string list_name;
127 bool is_sub_del;
128 std::vector<ChunkRange> chunk_del;
131 // Different types of threats that SafeBrowsing protects against.
132 enum SBThreatType {
133 // No threat at all.
134 SB_THREAT_TYPE_SAFE,
136 // The URL is being used for phishing.
137 SB_THREAT_TYPE_URL_PHISHING,
139 // The URL hosts malware.
140 SB_THREAT_TYPE_URL_MALWARE,
142 // The download URL is malware.
143 SB_THREAT_TYPE_BINARY_MALWARE_URL,
145 // The hash of the download contents is malware.
146 SB_THREAT_TYPE_BINARY_MALWARE_HASH,
148 // Url detected by the client-side phishing model. Note that unlike the
149 // above values, this does not correspond to a downloaded list.
150 SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL,
152 // The Chrome extension or app (given by its ID) is malware.
153 SB_THREAT_TYPE_EXTENSION,
155 // Url detected by the client-side malware IP list. This IP list is part
156 // of the client side detection model.
157 SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL,
160 // SBEntry ---------------------------------------------------------------------
162 // Holds information about the prefixes for a hostkey. prefixes can either be
163 // 4 bytes (truncated hash) or 32 bytes (full hash).
164 // For adds:
165 // [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
166 // For subs:
167 // [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
168 // [add chunk][prefix][add chunk][prefix]
169 class SBEntry {
170 public:
171 enum Type {
172 ADD_PREFIX, // 4 byte add entry.
173 SUB_PREFIX, // 4 byte sub entry.
174 ADD_FULL_HASH, // 32 byte add entry.
175 SUB_FULL_HASH, // 32 byte sub entry.
178 // Creates a SBEntry with the necessary size for the given number of prefixes.
179 // Caller ownes the object and needs to free it by calling Destroy.
180 static SBEntry* Create(Type type, int prefix_count);
182 // Frees the entry's memory.
183 void Destroy();
185 void set_list_id(int list_id) { data_.list_id = list_id; }
186 int list_id() const { return data_.list_id; }
187 void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
188 int chunk_id() const { return data_.chunk_id; }
189 int prefix_count() const { return data_.prefix_count; }
191 // Returns true if this is a prefix as opposed to a full hash.
192 bool IsPrefix() const {
193 return type() == ADD_PREFIX || type() == SUB_PREFIX;
196 // Returns true if this is an add entry.
197 bool IsAdd() const {
198 return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
201 // Returns true if this is a sub entry.
202 bool IsSub() const {
203 return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
206 // Helper to return the size of the prefixes.
207 int HashLen() const {
208 return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
211 // For add entries, returns the add chunk id. For sub entries, returns the
212 // add_chunk id for the prefix at the given index.
213 int ChunkIdAtPrefix(int index) const;
215 // Used for sub chunks to set the chunk id at a given index.
216 void SetChunkIdAtPrefix(int index, int chunk_id);
218 // Return the prefix/full hash at the given index. Caller is expected to
219 // call the right function based on the hash length.
220 const SBPrefix& PrefixAt(int index) const;
221 const SBFullHash& FullHashAt(int index) const;
223 // Return the prefix/full hash at the given index. Caller is expected to
224 // call the right function based on the hash length.
225 void SetPrefixAt(int index, const SBPrefix& prefix);
226 void SetFullHashAt(int index, const SBFullHash& full_hash);
228 private:
229 // Container for a sub prefix.
230 struct SBSubPrefix {
231 int add_chunk;
232 SBPrefix prefix;
235 // Container for a sub full hash.
236 struct SBSubFullHash {
237 int add_chunk;
238 SBFullHash prefix;
241 // Keep the fixed data together in one struct so that we can get its size
242 // easily. If any of this is modified, the database will have to be cleared.
243 struct Data {
244 int list_id;
245 // For adds, this is the add chunk number.
246 // For subs: if prefix_count is 0 then this is the add chunk that this sub
247 // refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes
248 // or sub_full_hashes is used for each corresponding prefix.
249 int chunk_id;
250 Type type;
251 int prefix_count;
254 SBEntry();
255 ~SBEntry();
257 // Helper to return the size of each prefix entry (i.e. for subs this
258 // includes an add chunk id).
259 static int PrefixSize(Type type);
261 // Helper to return how much memory a given Entry would require.
262 static int Size(Type type, int prefix_count);
264 // Returns how many bytes this entry is.
265 int Size() const;
267 Type type() const { return data_.type; }
269 void set_prefix_count(int count) { data_.prefix_count = count; }
270 void set_type(Type type) { data_.type = type; }
272 // The prefixes union must follow the fixed data so that they're contiguous
273 // in memory.
274 Data data_;
275 union {
276 SBPrefix add_prefixes_[1];
277 SBSubPrefix sub_prefixes_[1];
278 SBFullHash add_full_hashes_[1];
279 SBSubFullHash sub_full_hashes_[1];
284 // Utility functions -----------------------------------------------------------
286 namespace safe_browsing_util {
288 // SafeBrowsing list names.
289 extern const char kMalwareList[];
290 extern const char kPhishingList[];
291 // Binary Download list names.
292 extern const char kBinUrlList[];
293 extern const char kBinHashList[];
294 // SafeBrowsing client-side detection whitelist list name.
295 extern const char kCsdWhiteList[];
296 // SafeBrowsing download whitelist list name.
297 extern const char kDownloadWhiteList[];
298 // SafeBrowsing extension list name.
299 extern const char kExtensionBlacklist[];
300 // SafeBrowsing side-effect free whitelist name.
301 extern const char kSideEffectFreeWhitelist[];
302 // SafeBrowsing csd malware IP blacklist name.
303 extern const char kIPBlacklist[];
305 // This array must contain all Safe Browsing lists.
306 extern const char* kAllLists[10];
308 enum ListType {
309 INVALID = -1,
310 MALWARE = 0,
311 PHISH = 1,
312 BINURL = 2,
313 BINHASH = 3,
314 CSDWHITELIST = 4,
315 // SafeBrowsing lists are stored in pairs. Keep ListType 5
316 // available for a potential second list that we would store in the
317 // csd-whitelist store file.
318 DOWNLOADWHITELIST = 6,
319 // See above comment. Leave 7 available.
320 EXTENSIONBLACKLIST = 8,
321 // See above comment. Leave 9 available.
322 SIDEEFFECTFREEWHITELIST = 10,
323 // See above comment. Leave 11 available.
324 IPBLACKLIST = 12,
325 // See above comment. Leave 13 available.
328 // Maps a list name to ListType.
329 ListType GetListId(const std::string& name);
331 // Maps a ListId to list name. Return false if fails.
332 bool GetListName(ListType list_id, std::string* list);
334 // Canonicalizes url as per Google Safe Browsing Specification.
335 // See section 6.1 in
336 // http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
337 void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
338 std::string* canonicalized_path,
339 std::string* canonicalized_query);
341 // Given a URL, returns all the hosts we need to check. They are returned
342 // in order of size (i.e. b.c is first, then a.b.c).
343 void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
345 // Given a URL, returns all the paths we need to check.
346 void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
348 // Given a URL, returns all the patterns we need to check.
349 void GeneratePatternsToCheck(const GURL& url, std::vector<std::string>* urls);
351 int GetHashIndex(const SBFullHash& hash,
352 const std::vector<SBFullHashResult>& full_hashes);
354 // Given a URL, compare all the possible host + path full hashes to the set of
355 // provided full hashes. Returns the index of the match if one is found, or -1
356 // otherwise.
357 int GetUrlHashIndex(const GURL& url,
358 const std::vector<SBFullHashResult>& full_hashes);
360 bool IsPhishingList(const std::string& list_name);
361 bool IsMalwareList(const std::string& list_name);
362 bool IsBadbinurlList(const std::string& list_name);
363 bool IsBadbinhashList(const std::string& list_name);
364 bool IsExtensionList(const std::string& list_name);
366 GURL GeneratePhishingReportUrl(const std::string& report_page,
367 const std::string& url_to_report,
368 bool is_client_side_detection);
370 SBFullHash StringToSBFullHash(const std::string& hash_in);
371 std::string SBFullHashToString(const SBFullHash& hash_out);
373 } // namespace safe_browsing_util
375 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_