Roll libvpx 861f35:1fff3e
[chromium-blink-merge.git] / net / filter / sdch_filter.cc
blob2540ee4cd61699ece4e394a369f575ba4ad6f2d6
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/filter/sdch_filter.h"
7 #include <ctype.h>
8 #include <limits.h>
10 #include <algorithm>
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "base/values.h"
15 #include "net/base/sdch_manager.h"
16 #include "net/base/sdch_net_log_params.h"
17 #include "net/base/sdch_problem_codes.h"
18 #include "net/url_request/url_request_context.h"
20 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
22 namespace net {
24 namespace {
26 // Disambiguate various types of responses that trigger a meta-refresh,
27 // failure, or fallback to pass-through.
28 enum ResponseCorruptionDetectionCause {
29 RESPONSE_NONE,
31 // 404 Http Response Code
32 RESPONSE_404 = 1,
34 // Not a 200 Http Response Code
35 RESPONSE_NOT_200 = 2,
37 // Cached before dictionary retrieved.
38 RESPONSE_OLD_UNENCODED = 3,
40 // Speculative but incorrect SDCH filtering was added added.
41 RESPONSE_TENTATIVE_SDCH = 4,
43 // Missing correct dict for decoding.
44 RESPONSE_NO_DICTIONARY = 5,
46 // Not an SDCH response but should be.
47 RESPONSE_CORRUPT_SDCH = 6,
49 // No dictionary was advertised with the request, the server claims
50 // to have encoded with SDCH anyway, but it isn't an SDCH response.
51 RESPONSE_ENCODING_LIE = 7,
53 RESPONSE_MAX,
56 const char* ResponseCorruptionDetectionCauseToString(
57 ResponseCorruptionDetectionCause cause) {
58 const char* cause_string = "<unknown>";
59 switch (cause) {
60 case RESPONSE_NONE:
61 cause_string = "NONE";
62 break;
63 case RESPONSE_404:
64 cause_string = "404";
65 break;
66 case RESPONSE_NOT_200:
67 cause_string = "NOT_200";
68 break;
69 case RESPONSE_OLD_UNENCODED:
70 cause_string = "OLD_UNENCODED";
71 break;
72 case RESPONSE_TENTATIVE_SDCH:
73 cause_string = "TENTATIVE_SDCH";
74 break;
75 case RESPONSE_NO_DICTIONARY:
76 cause_string = "NO_DICTIONARY";
77 break;
78 case RESPONSE_CORRUPT_SDCH:
79 cause_string = "CORRUPT_SDCH";
80 break;
81 case RESPONSE_ENCODING_LIE:
82 cause_string = "ENCODING_LIE";
83 break;
84 case RESPONSE_MAX:
85 cause_string = "<Error: max enum value>";
86 break;
88 return cause_string;
91 base::Value* NetLogSdchResponseCorruptionDetectionCallback(
92 ResponseCorruptionDetectionCause cause,
93 bool cached,
94 NetLog::LogLevel log_level) {
95 base::DictionaryValue* dict = new base::DictionaryValue();
96 dict->SetString("cause", ResponseCorruptionDetectionCauseToString(cause));
97 dict->SetBoolean("cached", cached);
98 return dict;
101 } // namespace
103 SdchFilter::SdchFilter(FilterType type, const FilterContext& filter_context)
104 : Filter(type),
105 filter_context_(filter_context),
106 decoding_status_(DECODING_UNINITIALIZED),
107 dictionary_hash_(),
108 dictionary_hash_is_plausible_(false),
109 dictionary_(NULL),
110 url_request_context_(filter_context.GetURLRequestContext()),
111 dest_buffer_excess_(),
112 dest_buffer_excess_index_(0),
113 source_bytes_(0),
114 output_bytes_(0),
115 possible_pass_through_(false) {
116 bool success = filter_context.GetMimeType(&mime_type_);
117 DCHECK(success);
118 success = filter_context.GetURL(&url_);
119 DCHECK(success);
120 DCHECK(url_request_context_->sdch_manager());
123 SdchFilter::~SdchFilter() {
124 // All code here is for gathering stats, and can be removed when SDCH is
125 // considered stable.
127 static int filter_use_count = 0;
128 ++filter_use_count;
129 if (META_REFRESH_RECOVERY == decoding_status_) {
130 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
133 if (vcdiff_streaming_decoder_.get()) {
134 if (!vcdiff_streaming_decoder_->FinishDecoding()) {
135 decoding_status_ = DECODING_ERROR;
136 LogSdchProblem(SDCH_INCOMPLETE_SDCH_CONTENT);
137 // Make it possible for the user to hit reload, and get non-sdch content.
138 // Note this will "wear off" quickly enough, and is just meant to assure
139 // in some rare case that the user is not stuck.
140 url_request_context_->sdch_manager()->BlacklistDomain(
141 url_, SDCH_INCOMPLETE_SDCH_CONTENT);
142 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
143 static_cast<int>(filter_context_.GetByteReadCount()));
144 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
145 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
149 if (!dest_buffer_excess_.empty()) {
150 // Filter chaining error, or premature teardown.
151 LogSdchProblem(SDCH_UNFLUSHED_CONTENT);
152 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
153 static_cast<int>(filter_context_.GetByteReadCount()));
154 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
155 dest_buffer_excess_.size());
156 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
157 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
160 if (filter_context_.IsCachedContent()) {
161 // Not a real error, but it is useful to have this tally.
162 // TODO(jar): Remove this stat after SDCH stability is validated.
163 LogSdchProblem(SDCH_CACHE_DECODED);
164 return; // We don't need timing stats, and we aready got ratios.
167 switch (decoding_status_) {
168 case DECODING_IN_PROGRESS: {
169 if (output_bytes_) {
170 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
171 static_cast<int>(
172 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
173 UMA_HISTOGRAM_COUNTS("Sdch3.NetworkBytesSavedByCompression",
174 output_bytes_ - source_bytes_);
176 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
177 output_bytes_);
178 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
180 // Allow latency experiments to proceed.
181 url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
182 url_, true);
184 // Notify successful dictionary usage.
185 url_request_context_->sdch_manager()->OnDictionaryUsed(
186 dictionary_->server_hash());
188 return;
190 case PASS_THROUGH: {
191 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
192 return;
194 case DECODING_UNINITIALIZED: {
195 LogSdchProblem(SDCH_UNINITIALIZED);
196 return;
198 case WAITING_FOR_DICTIONARY_SELECTION: {
199 LogSdchProblem(SDCH_PRIOR_TO_DICTIONARY);
200 return;
202 case DECODING_ERROR: {
203 LogSdchProblem(SDCH_DECODE_ERROR);
204 return;
206 case META_REFRESH_RECOVERY: {
207 // Already accounted for when set.
208 return;
210 } // end of switch.
213 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
214 if (decoding_status_ != DECODING_UNINITIALIZED)
215 return false;
217 // Handle case where sdch filter is guessed, but not required.
218 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
219 possible_pass_through_ = true;
221 // Initialize decoder only after we have a dictionary in hand.
222 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
223 return true;
226 #ifndef NDEBUG
227 static const char* kDecompressionErrorHtml =
228 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
229 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
230 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
231 "font-size:10pt;foreground-color:black;background-color:white\">"
232 "An error occurred. This page will be reloaded shortly. "
233 "Or press the \"reload\" button now to reload it immediately."
234 "</div>";
235 #else
236 static const char* kDecompressionErrorHtml =
237 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
238 #endif
240 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
241 int* dest_len) {
242 int available_space = *dest_len;
243 *dest_len = 0; // Nothing output yet.
245 if (!dest_buffer || available_space <= 0)
246 return FILTER_ERROR;
248 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
249 FilterStatus status = InitializeDictionary();
250 if (FILTER_NEED_MORE_DATA == status)
251 return FILTER_NEED_MORE_DATA;
252 if (FILTER_ERROR == status) {
253 DCHECK_EQ(DECODING_ERROR, decoding_status_);
254 DCHECK_EQ(0u, dest_buffer_excess_index_);
255 DCHECK(dest_buffer_excess_.empty());
256 // This is where we try very hard to do error recovery, and make this
257 // protocol robust in the face of proxies that do many different things.
258 // If we decide that things are looking very bad (too hard to recover),
259 // we may even issue a "meta-refresh" to reload the page without an SDCH
260 // advertisement (so that we are sure we're not hurting anything).
262 // Watch out for an error page inserted by the proxy as part of a 40x
263 // error response. When we see such content molestation, we certainly
264 // need to fall into the meta-refresh case.
265 ResponseCorruptionDetectionCause cause = RESPONSE_NONE;
266 if (filter_context_.GetResponseCode() == 404) {
267 // We could be more generous, but for now, only a "NOT FOUND" code will
268 // cause a pass through. All other bad codes will fall into a
269 // meta-refresh.
270 LogSdchProblem(SDCH_PASS_THROUGH_404_CODE);
271 cause = RESPONSE_404;
272 decoding_status_ = PASS_THROUGH;
273 } else if (filter_context_.GetResponseCode() != 200) {
274 // We need to meta-refresh, with SDCH disabled.
275 cause = RESPONSE_NOT_200;
276 } else if (filter_context_.IsCachedContent()
277 && !dictionary_hash_is_plausible_) {
278 // We must have hit the back button, and gotten content that was fetched
279 // before we *really* advertised SDCH and a dictionary.
280 LogSdchProblem(SDCH_PASS_THROUGH_OLD_CACHED);
281 decoding_status_ = PASS_THROUGH;
282 cause = RESPONSE_OLD_UNENCODED;
283 } else if (possible_pass_through_) {
284 // This is the potentially most graceful response. There really was no
285 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
286 // We added the sdch coding tag, and it should not have been added.
287 // This can happen in server experiments, where the server decides
288 // not to use sdch, even though there is a dictionary. To be
289 // conservative, we locally added the tentative sdch (fearing that a
290 // proxy stripped it!) and we must now recant (pass through).
292 // However.... just to be sure we don't get burned by proxies that
293 // re-compress with gzip or other system, we can sniff to see if this
294 // is compressed data etc. For now, we do nothing, which gets us into
295 // the meta-refresh result.
296 // TODO(jar): Improve robustness by sniffing for valid text that we can
297 // actual use re: decoding_status_ = PASS_THROUGH;
298 cause = RESPONSE_TENTATIVE_SDCH;
299 } else if (dictionary_hash_is_plausible_) {
300 // We need a meta-refresh since we don't have the dictionary.
301 // The common cause is a restart of the browser, where we try to render
302 // cached content that was saved when we had a dictionary.
303 cause = RESPONSE_NO_DICTIONARY;
304 } else if (filter_context_.SdchDictionariesAdvertised()) {
305 // This is a very corrupt SDCH request response. We can't decode it.
306 // We'll use a meta-refresh, and get content without asking for SDCH.
307 // This will also progressively disable SDCH for this domain.
308 cause = RESPONSE_CORRUPT_SDCH;
309 } else {
310 // One of the first 9 bytes precluded consideration as a hash.
311 // This can't be an SDCH payload, even though the server said it was.
312 // This is a major error, as the server or proxy tagged this SDCH even
313 // though it is not!
314 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
315 // Worse yet, meta-refresh could lead to an infinite refresh loop.
316 LogSdchProblem(SDCH_PASSING_THROUGH_NON_SDCH);
317 decoding_status_ = PASS_THROUGH;
318 // ... but further back-off on advertising SDCH support.
319 url_request_context_->sdch_manager()->BlacklistDomain(
320 url_, SDCH_PASSING_THROUGH_NON_SDCH);
321 cause = RESPONSE_ENCODING_LIE;
323 DCHECK_NE(RESPONSE_NONE, cause);
325 // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION
326 // caches the histogram name based on the call site.
327 if (filter_context_.IsCachedContent()) {
328 UMA_HISTOGRAM_ENUMERATION(
329 "Sdch3.ResponseCorruptionDetection.Cached", cause, RESPONSE_MAX);
330 } else {
331 UMA_HISTOGRAM_ENUMERATION(
332 "Sdch3.ResponseCorruptionDetection.Uncached", cause, RESPONSE_MAX);
334 filter_context_.GetNetLog().AddEvent(
335 NetLog::TYPE_SDCH_RESPONSE_CORRUPTION_DETECTION,
336 base::Bind(&NetLogSdchResponseCorruptionDetectionCallback, cause,
337 filter_context_.IsCachedContent()));
339 if (decoding_status_ == PASS_THROUGH) {
340 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned.
341 } else {
342 // This is where we try to do the expensive meta-refresh.
343 if (std::string::npos == mime_type_.find("text/html")) {
344 // Since we can't do a meta-refresh (along with an exponential
345 // backoff), we'll just make sure this NEVER happens again.
346 SdchProblemCode problem = (filter_context_.IsCachedContent()
347 ? SDCH_CACHED_META_REFRESH_UNSUPPORTED
348 : SDCH_META_REFRESH_UNSUPPORTED);
349 url_request_context_->sdch_manager()->BlacklistDomainForever(
350 url_, problem);
351 LogSdchProblem(problem);
352 return FILTER_ERROR;
354 // HTML content means we can issue a meta-refresh, and get the content
355 // again, perhaps without SDCH (to be safe).
356 if (filter_context_.IsCachedContent()) {
357 // Cached content is probably a startup tab, so we'll just get fresh
358 // content and try again, without disabling sdch.
359 LogSdchProblem(SDCH_META_REFRESH_CACHED_RECOVERY);
360 } else {
361 // Since it wasn't in the cache, we definately need at least some
362 // period of blacklisting to get the correct content.
363 url_request_context_->sdch_manager()->BlacklistDomain(
364 url_, SDCH_META_REFRESH_RECOVERY);
365 LogSdchProblem(SDCH_META_REFRESH_RECOVERY);
367 decoding_status_ = META_REFRESH_RECOVERY;
368 // Issue a meta redirect with SDCH disabled.
369 dest_buffer_excess_ = kDecompressionErrorHtml;
371 } else {
372 DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
376 int amount = OutputBufferExcess(dest_buffer, available_space);
377 *dest_len += amount;
378 dest_buffer += amount;
379 available_space -= amount;
380 DCHECK_GE(available_space, 0);
382 if (available_space <= 0)
383 return FILTER_OK;
384 DCHECK(dest_buffer_excess_.empty());
385 DCHECK_EQ(0u, dest_buffer_excess_index_);
387 if (decoding_status_ != DECODING_IN_PROGRESS) {
388 if (META_REFRESH_RECOVERY == decoding_status_) {
389 // Absorb all input data. We've already output page reload HTML.
390 next_stream_data_ = NULL;
391 stream_data_len_ = 0;
392 return FILTER_NEED_MORE_DATA;
394 if (PASS_THROUGH == decoding_status_) {
395 // We must pass in available_space, but it will be changed to bytes_used.
396 FilterStatus result = CopyOut(dest_buffer, &available_space);
397 // Accumulate the returned count of bytes_used (a.k.a., available_space).
398 *dest_len += available_space;
399 return result;
401 DCHECK(false);
402 decoding_status_ = DECODING_ERROR;
403 return FILTER_ERROR;
406 if (!next_stream_data_ || stream_data_len_ <= 0)
407 return FILTER_NEED_MORE_DATA;
409 // A note on accounting: DecodeChunk() appends to its output buffer, so any
410 // preexisting data in |dest_buffer_excess_| could skew the value of
411 // |output_bytes_|. However, OutputBufferExcess guarantees that it will
412 // consume all of |dest_buffer_excess_| when called above unless the
413 // destination buffer runs out of space, and if the destination buffer runs
414 // out of space, this code returns FILTER_OK early above. Therefore, if
415 // execution reaches this point, |dest_buffer_excess_| is empty, which is
416 // DCHECKed above.
417 bool ret = vcdiff_streaming_decoder_->DecodeChunk(
418 next_stream_data_, stream_data_len_, &dest_buffer_excess_);
419 // Assume all data was used in decoding.
420 next_stream_data_ = NULL;
421 source_bytes_ += stream_data_len_;
422 stream_data_len_ = 0;
423 output_bytes_ += dest_buffer_excess_.size();
424 if (!ret) {
425 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again.
426 decoding_status_ = DECODING_ERROR;
427 LogSdchProblem(SDCH_DECODE_BODY_ERROR);
428 return FILTER_ERROR;
431 amount = OutputBufferExcess(dest_buffer, available_space);
432 *dest_len += amount;
433 dest_buffer += amount;
434 available_space -= amount;
435 if (0 == available_space && !dest_buffer_excess_.empty())
436 return FILTER_OK;
437 return FILTER_NEED_MORE_DATA;
440 Filter::FilterStatus SdchFilter::InitializeDictionary() {
441 const size_t kServerIdLength = 9; // Dictionary hash plus null from server.
442 size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
443 DCHECK_GT(bytes_needed, 0u);
444 if (!next_stream_data_)
445 return FILTER_NEED_MORE_DATA;
446 if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
447 dictionary_hash_.append(next_stream_data_, stream_data_len_);
448 next_stream_data_ = NULL;
449 stream_data_len_ = 0;
450 return FILTER_NEED_MORE_DATA;
452 dictionary_hash_.append(next_stream_data_, bytes_needed);
453 DCHECK(kServerIdLength == dictionary_hash_.size());
454 stream_data_len_ -= bytes_needed;
455 DCHECK_LE(0, stream_data_len_);
456 if (stream_data_len_ > 0)
457 next_stream_data_ += bytes_needed;
458 else
459 next_stream_data_ = NULL;
461 DCHECK(!dictionary_);
462 dictionary_hash_is_plausible_ = true; // Assume plausible, but check.
464 SdchProblemCode rv = SDCH_OK;
465 if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
466 std::string server_hash(dictionary_hash_, 0, kServerIdLength - 1);
467 SdchManager::DictionarySet* handle =
468 filter_context_.SdchDictionariesAdvertised();
469 if (handle)
470 dictionary_ = handle->GetDictionary(server_hash);
471 if (!dictionary_) {
472 // This is a hack. Naively, the dictionaries available for
473 // decoding should be only the ones advertised. However, there are
474 // cases, specifically resources encoded with old dictionaries living
475 // in the cache, that mean the full set of dictionaries should be made
476 // available for decoding. It's not known how often this happens;
477 // if it happens rarely enough, this code can be removed.
479 // TODO(rdsmith): Long-term, a better solution is necessary, since
480 // an entry in the cache being encoded with the dictionary doesn't
481 // guarantee that the dictionary is present. That solution probably
482 // involves storing unencoded resources in the cache, but might
483 // involve evicting encoded resources on dictionary removal.
484 // See http://crbug.com/383405.
485 unexpected_dictionary_handle_ =
486 url_request_context_->sdch_manager()->GetDictionarySetByHash(
487 url_, server_hash, &rv);
488 if (unexpected_dictionary_handle_) {
489 dictionary_ = unexpected_dictionary_handle_->GetDictionary(server_hash);
490 // Override SDCH_OK rv; this is still worth logging.
491 rv = (filter_context_.IsCachedContent() ?
492 SDCH_UNADVERTISED_DICTIONARY_USED_CACHED :
493 SDCH_UNADVERTISED_DICTIONARY_USED);
494 } else {
495 // Since dictionary was not found, check to see if hash was
496 // even plausible.
497 DCHECK(dictionary_hash_.size() == kServerIdLength);
498 rv = SDCH_DICTIONARY_HASH_NOT_FOUND;
499 for (size_t i = 0; i < kServerIdLength - 1; ++i) {
500 char base64_char = dictionary_hash_[i];
501 if (!isalnum(base64_char) &&
502 '-' != base64_char && '_' != base64_char) {
503 dictionary_hash_is_plausible_ = false;
504 rv = SDCH_DICTIONARY_HASH_MALFORMED;
505 break;
510 } else {
511 dictionary_hash_is_plausible_ = false;
512 rv = SDCH_DICTIONARY_HASH_MALFORMED;
515 if (rv != SDCH_OK)
516 LogSdchProblem(rv);
518 if (!dictionary_) {
519 decoding_status_ = DECODING_ERROR;
520 return FILTER_ERROR;
523 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
524 vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
525 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
526 dictionary_->text().size());
527 decoding_status_ = DECODING_IN_PROGRESS;
528 return FILTER_OK;
531 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
532 size_t available_space) {
533 if (dest_buffer_excess_.empty())
534 return 0;
535 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
536 size_t amount = std::min(available_space,
537 dest_buffer_excess_.size() - dest_buffer_excess_index_);
538 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
539 amount);
540 dest_buffer_excess_index_ += amount;
541 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
542 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
543 dest_buffer_excess_.clear();
544 dest_buffer_excess_index_ = 0;
546 return amount;
549 void SdchFilter::LogSdchProblem(SdchProblemCode problem) {
550 SdchManager::SdchErrorRecovery(problem);
551 filter_context_.GetNetLog().AddEvent(
552 NetLog::TYPE_SDCH_DECODING_ERROR,
553 base::Bind(&NetLogSdchResourceProblemCallback, problem));
556 } // namespace net