1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Provides global database of differential decompression dictionaries for the
6 // SDCH filter (processes sdch enconded content).
8 // Exactly one instance of SdchManager is built, and all references are made
9 // into that collection.
11 // The SdchManager maintains a collection of memory resident dictionaries. It
12 // can find a dictionary (based on a server specification of a hash), store a
13 // dictionary, and make judgements about what URLs can use, set, etc. a
16 // These dictionaries are acquired over the net, and include a header
17 // (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
18 // module) to decompress data.
20 #ifndef NET_BASE_SDCH_MANAGER_H_
21 #define NET_BASE_SDCH_MANAGER_H_
27 #include "base/gtest_prod_util.h"
28 #include "base/memory/ref_counted.h"
29 #include "base/memory/scoped_ptr.h"
30 #include "base/threading/non_thread_safe.h"
31 #include "base/time/time.h"
32 #include "net/base/net_export.h"
37 //------------------------------------------------------------------------------
38 // Create a public interface to help us load SDCH dictionaries.
39 // The SdchManager class allows registration to support this interface.
40 // A browser may register a fetcher that is used by the dictionary managers to
41 // get data from a specified URL. This allows us to use very high level browser
42 // functionality in this base (when the functionaity can be provided).
43 class NET_EXPORT SdchFetcher
{
46 virtual ~SdchFetcher() {}
48 // The Schedule() method is called when there is a need to get a dictionary
49 // from a server. The callee is responsible for getting that dictionary_text,
50 // and then calling back to AddSdchDictionary() to the SdchManager instance.
51 virtual void Schedule(const GURL
& dictionary_url
) = 0;
53 // The Cancel() method is called to cancel all pending dictionary fetches.
54 // This is used for implementation of ClearData() below.
55 virtual void Cancel() = 0;
58 DISALLOW_COPY_AND_ASSIGN(SdchFetcher
);
61 //------------------------------------------------------------------------------
63 class NET_EXPORT SdchManager
: public NON_EXPORTED_BASE(base::NonThreadSafe
) {
65 // A list of errors that appeared and were either resolved, or used to turn
70 // Content-encoding correction problems.
71 ADDED_CONTENT_ENCODING
= 1,
72 FIXED_CONTENT_ENCODING
= 2,
73 FIXED_CONTENT_ENCODINGS
= 3,
75 // Content decoding errors.
76 DECODE_HEADER_ERROR
= 4,
77 DECODE_BODY_ERROR
= 5,
79 // More content-encoding correction problems.
80 OPTIONAL_GUNZIP_ENCODING_ADDED
= 6,
82 // Content encoding correction when we're not even tagged as HTML!?!
83 BINARY_ADDED_CONTENT_ENCODING
= 7,
84 BINARY_FIXED_CONTENT_ENCODING
= 8,
85 BINARY_FIXED_CONTENT_ENCODINGS
= 9,
87 // Dictionary selection for use problems.
88 DICTIONARY_FOUND_HAS_WRONG_DOMAIN
= 10,
89 DICTIONARY_FOUND_HAS_WRONG_PORT_LIST
= 11,
90 DICTIONARY_FOUND_HAS_WRONG_PATH
= 12,
91 DICTIONARY_FOUND_HAS_WRONG_SCHEME
= 13,
92 DICTIONARY_HASH_NOT_FOUND
= 14,
93 DICTIONARY_HASH_MALFORMED
= 15,
95 // Dictionary saving problems.
96 DICTIONARY_HAS_NO_HEADER
= 20,
97 DICTIONARY_HEADER_LINE_MISSING_COLON
= 21,
98 DICTIONARY_MISSING_DOMAIN_SPECIFIER
= 22,
99 DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN
= 23,
100 DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL
= 24,
101 DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL
= 25,
102 DICTIONARY_HAS_NO_TEXT
= 26,
103 DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX
= 27,
105 // Dictionary loading problems.
106 DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST
= 30,
107 DICTIONARY_SELECTED_FOR_SSL
= 31,
108 DICTIONARY_ALREADY_LOADED
= 32,
109 DICTIONARY_SELECTED_FROM_NON_HTTP
= 33,
110 DICTIONARY_IS_TOO_LARGE
= 34,
111 DICTIONARY_COUNT_EXCEEDED
= 35,
112 DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD
= 36,
113 DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD
= 37,
116 ATTEMPT_TO_DECODE_NON_HTTP_DATA
= 40,
119 // Content-Encoding problems detected, with no action taken.
120 MULTIENCODING_FOR_NON_SDCH_REQUEST
= 50,
121 SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST
= 51,
123 // Dictionary manager issues.
124 DOMAIN_BLACKLIST_INCLUDES_TARGET
= 61,
126 // Problematic decode recovery methods.
127 META_REFRESH_RECOVERY
= 70, // Dictionary not found.
128 // defunct = 71, // Almost the same as META_REFRESH_UNSUPPORTED.
129 // defunct = 72, // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
130 // defunct = 73, // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
131 META_REFRESH_UNSUPPORTED
= 74, // Unrecoverable error.
132 CACHED_META_REFRESH_UNSUPPORTED
= 75, // As above, but pulled from cache.
133 PASSING_THROUGH_NON_SDCH
= 76, // Tagged sdch but missing dictionary-hash.
134 INCOMPLETE_SDCH_CONTENT
= 77, // Last window was not completely decoded.
135 PASS_THROUGH_404_CODE
= 78, // URL not found message passing through.
137 // This next report is very common, and not really an error scenario, but
138 // it exercises the error recovery logic.
139 PASS_THROUGH_OLD_CACHED
= 79, // Back button got pre-SDCH cached content.
141 // Common decoded recovery methods.
142 META_REFRESH_CACHED_RECOVERY
= 80, // Probably startup tab loading.
143 DISCARD_TENTATIVE_SDCH
= 81, // Server decided not to use sdch.
145 // Non SDCH problems, only accounted for to make stat counting complete
146 // (i.e., be able to be sure all dictionary advertisements are accounted
149 UNFLUSHED_CONTENT
= 90, // Possible error in filter chaining.
150 // defunct = 91, // MISSING_TIME_STATS (Should never happen.)
151 CACHE_DECODED
= 92, // No timing stats recorded.
152 // defunct = 93, // OVER_10_MINUTES (No timing stats recorded.)
153 UNINITIALIZED
= 94, // Filter never even got initialized.
154 PRIOR_TO_DICTIONARY
= 95, // We hadn't even parsed a dictionary selector.
155 DECODE_ERROR
= 96, // Something went wrong during decode.
157 // Problem during the latency test.
158 LATENCY_TEST_DISALLOWED
= 100, // SDCH now failing, but it worked before!
160 MAX_PROBLEM_CODE
// Used to bound histogram.
163 // Use the following static limits to block DOS attacks until we implement
164 // a cached dictionary evicition strategy.
165 static const size_t kMaxDictionarySize
;
166 static const size_t kMaxDictionaryCount
;
168 // There is one instance of |Dictionary| for each memory-cached SDCH
170 class NET_EXPORT_PRIVATE Dictionary
: public base::RefCounted
<Dictionary
> {
172 // Sdch filters can get our text to use in decoding compressed data.
173 const std::string
& text() const { return text_
; }
176 friend class base::RefCounted
<Dictionary
>;
177 friend class SdchManager
; // Only manager can construct an instance.
178 FRIEND_TEST_ALL_PREFIXES(SdchManagerTest
, PathMatch
);
180 // Construct a vc-diff usable dictionary from the dictionary_text starting
181 // at the given offset. The supplied client_hash should be used to
182 // advertise the dictionary's availability relative to the suppplied URL.
183 Dictionary(const std::string
& dictionary_text
,
185 const std::string
& client_hash
,
187 const std::string
& domain
,
188 const std::string
& path
,
189 const base::Time
& expiration
,
190 const std::set
<int>& ports
);
193 const GURL
& url() const { return url_
; }
194 const std::string
& client_hash() const { return client_hash_
; }
196 // Security method to check if we can advertise this dictionary for use
197 // if the |target_url| returns SDCH compressed data.
198 bool CanAdvertise(const GURL
& target_url
);
200 // Security methods to check if we can establish a new dictionary with the
201 // given data, that arrived in response to get of dictionary_url.
202 static bool CanSet(const std::string
& domain
, const std::string
& path
,
203 const std::set
<int>& ports
, const GURL
& dictionary_url
);
205 // Security method to check if we can use a dictionary to decompress a
206 // target that arrived with a reference to this dictionary.
207 bool CanUse(const GURL
& referring_url
);
209 // Compare paths to see if they "match" for dictionary use.
210 static bool PathMatch(const std::string
& path
,
211 const std::string
& restriction
);
213 // Compare domains to see if the "match" for dictionary use.
214 static bool DomainMatch(const GURL
& url
, const std::string
& restriction
);
217 // The actual text of the dictionary.
220 // Part of the hash of text_ that the client uses to advertise the fact that
221 // it has a specific dictionary pre-cached.
222 std::string client_hash_
;
224 // The GURL that arrived with the text_ in a URL request to specify where
225 // this dictionary may be used.
228 // Metadate "headers" in before dictionary text contained the following:
229 // Each dictionary payload consists of several headers, followed by the text
230 // of the dictionary. The following are the known headers.
231 const std::string domain_
;
232 const std::string path_
;
233 const base::Time expiration_
; // Implied by max-age.
234 const std::set
<int> ports_
;
236 DISALLOW_COPY_AND_ASSIGN(Dictionary
);
242 // Clear data (for browser data removal).
245 // Record stats on various errors.
246 static void SdchErrorRecovery(ProblemCodes problem
);
248 // Register a fetcher that this class can use to obtain dictionaries.
249 void set_sdch_fetcher(scoped_ptr
<SdchFetcher
> fetcher
);
251 // Enables or disables SDCH compression.
252 static void EnableSdchSupport(bool enabled
);
254 static bool sdch_enabled() { return g_sdch_enabled_
; }
256 // Enables or disables SDCH compression over secure connection.
257 static void EnableSecureSchemeSupport(bool enabled
);
259 static bool secure_scheme_supported() { return g_secure_scheme_supported_
; }
261 // Briefly prevent further advertising of SDCH on this domain (if SDCH is
262 // enabled). After enough calls to IsInSupportedDomain() the blacklisting
263 // will be removed. Additional blacklists take exponentially more calls
264 // to IsInSupportedDomain() before the blacklisting is undone.
265 // Used when filter errors are found from a given domain, but it is plausible
266 // that the cause is temporary (such as application startup, where cached
267 // entries are used, but a dictionary is not yet loaded).
268 void BlacklistDomain(const GURL
& url
, ProblemCodes blacklist_reason
);
270 // Used when SEVERE filter errors are found from a given domain, to prevent
271 // further use of SDCH on that domain.
272 void BlacklistDomainForever(const GURL
& url
, ProblemCodes blacklist_reason
);
274 // Unit test only, this function resets enabling of sdch, and clears the
276 void ClearBlacklistings();
278 // Unit test only, this function resets the blacklisting count for a domain.
279 void ClearDomainBlacklisting(const std::string
& domain
);
281 // Unit test only: indicate how many more times a domain will be blacklisted.
282 int BlackListDomainCount(const std::string
& domain
);
284 // Unit test only: Indicate what current blacklist increment is for a domain.
285 int BlacklistDomainExponential(const std::string
& domain
);
287 // Check to see if SDCH is enabled (globally), and the given URL is in a
288 // supported domain (i.e., not blacklisted, and either the specific supported
289 // domain, or all domains were assumed supported). If it is blacklist, reduce
290 // by 1 the number of times it will be reported as blacklisted.
291 bool IsInSupportedDomain(const GURL
& url
);
293 // Schedule the URL fetching to load a dictionary. This will always return
294 // before the dictionary is actually loaded and added.
295 // After the implied task does completes, the dictionary will have been
297 void FetchDictionary(const GURL
& request_url
, const GURL
& dictionary_url
);
299 // Security test function used before initiating a FetchDictionary.
300 // Return true if fetch is legal.
301 bool CanFetchDictionary(const GURL
& referring_url
,
302 const GURL
& dictionary_url
) const;
304 // Add an SDCH dictionary to our list of availible dictionaries. This addition
305 // will fail (return false) if addition is illegal (data in the dictionary is
306 // not acceptable from the dictionary_url; dictionary already added, etc.).
307 bool AddSdchDictionary(const std::string
& dictionary_text
,
308 const GURL
& dictionary_url
);
310 // Find the vcdiff dictionary (the body of the sdch dictionary that appears
311 // after the meta-data headers like Domain:...) with the given |server_hash|
312 // to use to decompreses data that arrived as SDCH encoded content. Check to
313 // be sure the returned |dictionary| can be used for decoding content supplied
314 // in response to a request for |referring_url|.
315 // Return null in |dictionary| if there is no matching legal dictionary.
316 void GetVcdiffDictionary(const std::string
& server_hash
,
317 const GURL
& referring_url
,
318 scoped_refptr
<Dictionary
>* dictionary
);
320 // Get list of available (pre-cached) dictionaries that we have already loaded
321 // into memory. The list is a comma separated list of (client) hashes per
323 void GetAvailDictionaryList(const GURL
& target_url
, std::string
* list
);
325 // Construct the pair of hashes for client and server to identify an SDCH
326 // dictionary. This is only made public to facilitate unit testing, but is
328 static void GenerateHash(const std::string
& dictionary_text
,
329 std::string
* client_hash
, std::string
* server_hash
);
331 // For Latency testing only, we need to know if we've succeeded in doing a
332 // round trip before starting our comparative tests. If ever we encounter
333 // problems with SDCH, we opt-out of the test unless/until we perform a
334 // complete SDCH decoding.
335 bool AllowLatencyExperiment(const GURL
& url
) const;
337 void SetAllowLatencyExperiment(const GURL
& url
, bool enable
);
339 int GetFetchesCountForTesting() const {
340 return fetches_count_for_testing_
;
344 struct BlacklistInfo
{
347 exponential_count(0),
348 reason(MIN_PROBLEM_CODE
) {}
350 int count
; // # of times to refuse SDCH advertisement.
351 int exponential_count
; // Current exponential backoff ratchet.
352 ProblemCodes reason
; // Why domain was blacklisted.
355 typedef std::map
<std::string
, BlacklistInfo
> DomainBlacklistInfo
;
356 typedef std::set
<std::string
> ExperimentSet
;
358 // A map of dictionaries info indexed by the hash that the server provides.
359 typedef std::map
<std::string
, scoped_refptr
<Dictionary
> > DictionaryMap
;
361 // Support SDCH compression, by advertising in headers.
362 static bool g_sdch_enabled_
;
364 // Support SDCH compression for HTTPS requests and responses. When supported,
365 // HTTPS applicable dictionaries MUST have been acquired securely via HTTPS.
366 static bool g_secure_scheme_supported_
;
368 // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
369 static void UrlSafeBase64Encode(const std::string
& input
,
370 std::string
* output
);
371 DictionaryMap dictionaries_
;
373 // An instance that can fetch a dictionary given a URL.
374 scoped_ptr
<SdchFetcher
> fetcher_
;
376 // List domains where decode failures have required disabling sdch.
377 DomainBlacklistInfo blacklisted_domains_
;
379 // List of hostnames for which a latency experiment is allowed (because a
380 // round trip test has recently passed).
381 ExperimentSet allow_latency_experiment_
;
383 int fetches_count_for_testing_
;
385 DISALLOW_COPY_AND_ASSIGN(SdchManager
);
390 #endif // NET_BASE_SDCH_MANAGER_H_