1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/filter/sdch_filter.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "base/values.h"
15 #include "net/base/sdch_manager.h"
16 #include "net/base/sdch_net_log_params.h"
17 #include "net/base/sdch_problem_codes.h"
18 #include "net/url_request/url_request_context.h"
20 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
26 const size_t kServerIdLength
= 9; // Dictionary hash plus null from server.
28 // Disambiguate various types of responses that trigger a meta-refresh,
29 // failure, or fallback to pass-through.
30 enum ResponseCorruptionDetectionCause
{
33 // 404 Http Response Code
36 // Not a 200 Http Response Code
39 // Cached before dictionary retrieved.
40 RESPONSE_OLD_UNENCODED
= 3,
42 // Speculative but incorrect SDCH filtering was added added.
43 RESPONSE_TENTATIVE_SDCH
= 4,
45 // Missing correct dict for decoding.
46 RESPONSE_NO_DICTIONARY
= 5,
48 // Not an SDCH response but should be.
49 RESPONSE_CORRUPT_SDCH
= 6,
51 // No dictionary was advertised with the request, the server claims
52 // to have encoded with SDCH anyway, but it isn't an SDCH response.
53 RESPONSE_ENCODING_LIE
= 7,
58 const char* ResponseCorruptionDetectionCauseToString(
59 ResponseCorruptionDetectionCause cause
) {
60 const char* cause_string
= "<unknown>";
63 cause_string
= "NONE";
68 case RESPONSE_NOT_200
:
69 cause_string
= "NOT_200";
71 case RESPONSE_OLD_UNENCODED
:
72 cause_string
= "OLD_UNENCODED";
74 case RESPONSE_TENTATIVE_SDCH
:
75 cause_string
= "TENTATIVE_SDCH";
77 case RESPONSE_NO_DICTIONARY
:
78 cause_string
= "NO_DICTIONARY";
80 case RESPONSE_CORRUPT_SDCH
:
81 cause_string
= "CORRUPT_SDCH";
83 case RESPONSE_ENCODING_LIE
:
84 cause_string
= "ENCODING_LIE";
87 cause_string
= "<Error: max enum value>";
93 base::Value
* NetLogSdchResponseCorruptionDetectionCallback(
94 ResponseCorruptionDetectionCause cause
,
96 NetLogCaptureMode capture_mode
) {
97 base::DictionaryValue
* dict
= new base::DictionaryValue();
98 dict
->SetString("cause", ResponseCorruptionDetectionCauseToString(cause
));
99 dict
->SetBoolean("cached", cached
);
105 SdchFilter::SdchFilter(FilterType type
, const FilterContext
& filter_context
)
107 filter_context_(filter_context
),
108 decoding_status_(DECODING_UNINITIALIZED
),
110 dictionary_hash_is_plausible_(false),
111 url_request_context_(filter_context
.GetURLRequestContext()),
112 dest_buffer_excess_(),
113 dest_buffer_excess_index_(0),
116 possible_pass_through_(false) {
117 bool success
= filter_context
.GetMimeType(&mime_type_
);
119 success
= filter_context
.GetURL(&url_
);
121 DCHECK(url_request_context_
->sdch_manager());
124 SdchFilter::~SdchFilter() {
125 // All code here is for gathering stats, and can be removed when SDCH is
126 // considered stable.
128 // References to filter_context_ and vcdiff_streaming_decoder_ (which
129 // contains a reference to the dictionary text) are safe because
130 // ~URLRequestHttpJob calls URLRequestJob::DestroyFilters, destroying
131 // this object before the filter context in URLRequestHttpJob and its
132 // members go out of scope.
134 static int filter_use_count
= 0;
136 if (META_REFRESH_RECOVERY
== decoding_status_
) {
137 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count
);
140 if (vcdiff_streaming_decoder_
.get()) {
141 if (!vcdiff_streaming_decoder_
->FinishDecoding()) {
142 decoding_status_
= DECODING_ERROR
;
143 LogSdchProblem(SDCH_INCOMPLETE_SDCH_CONTENT
);
144 // Make it possible for the user to hit reload, and get non-sdch content.
145 // Note this will "wear off" quickly enough, and is just meant to assure
146 // in some rare case that the user is not stuck.
147 url_request_context_
->sdch_manager()->BlacklistDomain(
148 url_
, SDCH_INCOMPLETE_SDCH_CONTENT
);
149 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
150 static_cast<int>(filter_context_
.GetByteReadCount()));
151 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_
);
152 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_
);
156 if (!dest_buffer_excess_
.empty()) {
157 // Filter chaining error, or premature teardown.
158 LogSdchProblem(SDCH_UNFLUSHED_CONTENT
);
159 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
160 static_cast<int>(filter_context_
.GetByteReadCount()));
161 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
162 dest_buffer_excess_
.size());
163 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_
);
164 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_
);
167 if (filter_context_
.IsCachedContent()) {
168 // Not a real error, but it is useful to have this tally.
169 // TODO(jar): Remove this stat after SDCH stability is validated.
170 LogSdchProblem(SDCH_CACHE_DECODED
);
171 return; // We don't need timing stats, and we aready got ratios.
174 switch (decoding_status_
) {
175 case DECODING_IN_PROGRESS
: {
177 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
179 (filter_context_
.GetByteReadCount() * 100) / output_bytes_
));
180 UMA_HISTOGRAM_COUNTS("Sdch3.NetworkBytesSavedByCompression",
181 output_bytes_
- source_bytes_
);
183 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
185 filter_context_
.RecordPacketStats(FilterContext::SDCH_DECODE
);
187 // Allow latency experiments to proceed.
188 url_request_context_
->sdch_manager()->SetAllowLatencyExperiment(
191 // Notify successful dictionary usage.
192 url_request_context_
->sdch_manager()->OnDictionaryUsed(
193 std::string(dictionary_hash_
, 0, kServerIdLength
- 1));
198 filter_context_
.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH
);
201 case DECODING_UNINITIALIZED
: {
202 LogSdchProblem(SDCH_UNINITIALIZED
);
205 case WAITING_FOR_DICTIONARY_SELECTION
: {
206 LogSdchProblem(SDCH_PRIOR_TO_DICTIONARY
);
209 case DECODING_ERROR
: {
210 LogSdchProblem(SDCH_DECODE_ERROR
);
213 case META_REFRESH_RECOVERY
: {
214 // Already accounted for when set.
220 bool SdchFilter::InitDecoding(Filter::FilterType filter_type
) {
221 if (decoding_status_
!= DECODING_UNINITIALIZED
)
224 // Handle case where sdch filter is guessed, but not required.
225 if (FILTER_TYPE_SDCH_POSSIBLE
== filter_type
)
226 possible_pass_through_
= true;
228 // Initialize decoder only after we have a dictionary in hand.
229 decoding_status_
= WAITING_FOR_DICTIONARY_SELECTION
;
234 static const char* kDecompressionErrorHtml
=
235 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
236 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
237 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
238 "font-size:10pt;foreground-color:black;background-color:white\">"
239 "An error occurred. This page will be reloaded shortly. "
240 "Or press the \"reload\" button now to reload it immediately."
243 static const char* kDecompressionErrorHtml
=
244 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
247 Filter::FilterStatus
SdchFilter::ReadFilteredData(char* dest_buffer
,
249 int available_space
= *dest_len
;
250 *dest_len
= 0; // Nothing output yet.
252 if (!dest_buffer
|| available_space
<= 0)
255 if (WAITING_FOR_DICTIONARY_SELECTION
== decoding_status_
) {
256 FilterStatus status
= InitializeDictionary();
257 if (FILTER_NEED_MORE_DATA
== status
)
258 return FILTER_NEED_MORE_DATA
;
259 if (FILTER_ERROR
== status
) {
260 DCHECK_EQ(DECODING_ERROR
, decoding_status_
);
261 DCHECK_EQ(0u, dest_buffer_excess_index_
);
262 DCHECK(dest_buffer_excess_
.empty());
263 // This is where we try very hard to do error recovery, and make this
264 // protocol robust in the face of proxies that do many different things.
265 // If we decide that things are looking very bad (too hard to recover),
266 // we may even issue a "meta-refresh" to reload the page without an SDCH
267 // advertisement (so that we are sure we're not hurting anything).
269 // Watch out for an error page inserted by the proxy as part of a 40x
270 // error response. When we see such content molestation, we certainly
271 // need to fall into the meta-refresh case.
272 ResponseCorruptionDetectionCause cause
= RESPONSE_NONE
;
273 if (filter_context_
.GetResponseCode() == 404) {
274 // We could be more generous, but for now, only a "NOT FOUND" code will
275 // cause a pass through. All other bad codes will fall into a
277 LogSdchProblem(SDCH_PASS_THROUGH_404_CODE
);
278 cause
= RESPONSE_404
;
279 decoding_status_
= PASS_THROUGH
;
280 } else if (filter_context_
.GetResponseCode() != 200) {
281 // We need to meta-refresh, with SDCH disabled.
282 cause
= RESPONSE_NOT_200
;
283 } else if (filter_context_
.IsCachedContent()
284 && !dictionary_hash_is_plausible_
) {
285 // We must have hit the back button, and gotten content that was fetched
286 // before we *really* advertised SDCH and a dictionary.
287 LogSdchProblem(SDCH_PASS_THROUGH_OLD_CACHED
);
288 decoding_status_
= PASS_THROUGH
;
289 cause
= RESPONSE_OLD_UNENCODED
;
290 } else if (possible_pass_through_
) {
291 // This is the potentially most graceful response. There really was no
292 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
293 // We added the sdch coding tag, and it should not have been added.
294 // This can happen in server experiments, where the server decides
295 // not to use sdch, even though there is a dictionary. To be
296 // conservative, we locally added the tentative sdch (fearing that a
297 // proxy stripped it!) and we must now recant (pass through).
299 // However.... just to be sure we don't get burned by proxies that
300 // re-compress with gzip or other system, we can sniff to see if this
301 // is compressed data etc. For now, we do nothing, which gets us into
302 // the meta-refresh result.
303 // TODO(jar): Improve robustness by sniffing for valid text that we can
304 // actual use re: decoding_status_ = PASS_THROUGH;
305 cause
= RESPONSE_TENTATIVE_SDCH
;
306 } else if (dictionary_hash_is_plausible_
) {
307 // We need a meta-refresh since we don't have the dictionary.
308 // The common cause is a restart of the browser, where we try to render
309 // cached content that was saved when we had a dictionary.
310 cause
= RESPONSE_NO_DICTIONARY
;
311 } else if (filter_context_
.SdchDictionariesAdvertised()) {
312 // This is a very corrupt SDCH request response. We can't decode it.
313 // We'll use a meta-refresh, and get content without asking for SDCH.
314 // This will also progressively disable SDCH for this domain.
315 cause
= RESPONSE_CORRUPT_SDCH
;
317 // One of the first 9 bytes precluded consideration as a hash.
318 // This can't be an SDCH payload, even though the server said it was.
319 // This is a major error, as the server or proxy tagged this SDCH even
321 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
322 // Worse yet, meta-refresh could lead to an infinite refresh loop.
323 LogSdchProblem(SDCH_PASSING_THROUGH_NON_SDCH
);
324 decoding_status_
= PASS_THROUGH
;
325 // ... but further back-off on advertising SDCH support.
326 url_request_context_
->sdch_manager()->BlacklistDomain(
327 url_
, SDCH_PASSING_THROUGH_NON_SDCH
);
328 cause
= RESPONSE_ENCODING_LIE
;
330 DCHECK_NE(RESPONSE_NONE
, cause
);
332 // Use if statement rather than ?: because UMA_HISTOGRAM_ENUMERATION
333 // caches the histogram name based on the call site.
334 if (filter_context_
.IsCachedContent()) {
335 UMA_HISTOGRAM_ENUMERATION(
336 "Sdch3.ResponseCorruptionDetection.Cached", cause
, RESPONSE_MAX
);
338 UMA_HISTOGRAM_ENUMERATION(
339 "Sdch3.ResponseCorruptionDetection.Uncached", cause
, RESPONSE_MAX
);
341 filter_context_
.GetNetLog().AddEvent(
342 NetLog::TYPE_SDCH_RESPONSE_CORRUPTION_DETECTION
,
343 base::Bind(&NetLogSdchResponseCorruptionDetectionCallback
, cause
,
344 filter_context_
.IsCachedContent()));
346 if (decoding_status_
== PASS_THROUGH
) {
347 dest_buffer_excess_
= dictionary_hash_
; // Send what we scanned.
349 // This is where we try to do the expensive meta-refresh.
350 if (std::string::npos
== mime_type_
.find("text/html")) {
351 // Since we can't do a meta-refresh (along with an exponential
352 // backoff), we'll just make sure this NEVER happens again.
353 SdchProblemCode problem
= (filter_context_
.IsCachedContent()
354 ? SDCH_CACHED_META_REFRESH_UNSUPPORTED
355 : SDCH_META_REFRESH_UNSUPPORTED
);
356 url_request_context_
->sdch_manager()->BlacklistDomainForever(
358 LogSdchProblem(problem
);
361 // HTML content means we can issue a meta-refresh, and get the content
362 // again, perhaps without SDCH (to be safe).
363 if (filter_context_
.IsCachedContent()) {
364 // Cached content is probably a startup tab, so we'll just get fresh
365 // content and try again, without disabling sdch.
366 LogSdchProblem(SDCH_META_REFRESH_CACHED_RECOVERY
);
368 // Since it wasn't in the cache, we definately need at least some
369 // period of blacklisting to get the correct content.
370 url_request_context_
->sdch_manager()->BlacklistDomain(
371 url_
, SDCH_META_REFRESH_RECOVERY
);
372 LogSdchProblem(SDCH_META_REFRESH_RECOVERY
);
374 decoding_status_
= META_REFRESH_RECOVERY
;
375 // Issue a meta redirect with SDCH disabled.
376 dest_buffer_excess_
= kDecompressionErrorHtml
;
379 DCHECK_EQ(DECODING_IN_PROGRESS
, decoding_status_
);
383 int amount
= OutputBufferExcess(dest_buffer
, available_space
);
385 dest_buffer
+= amount
;
386 available_space
-= amount
;
387 DCHECK_GE(available_space
, 0);
389 if (available_space
<= 0)
391 DCHECK(dest_buffer_excess_
.empty());
392 DCHECK_EQ(0u, dest_buffer_excess_index_
);
394 if (decoding_status_
!= DECODING_IN_PROGRESS
) {
395 if (META_REFRESH_RECOVERY
== decoding_status_
) {
396 // Absorb all input data. We've already output page reload HTML.
397 next_stream_data_
= NULL
;
398 stream_data_len_
= 0;
399 return FILTER_NEED_MORE_DATA
;
401 if (PASS_THROUGH
== decoding_status_
) {
402 // We must pass in available_space, but it will be changed to bytes_used.
403 FilterStatus result
= CopyOut(dest_buffer
, &available_space
);
404 // Accumulate the returned count of bytes_used (a.k.a., available_space).
405 *dest_len
+= available_space
;
409 decoding_status_
= DECODING_ERROR
;
413 if (!next_stream_data_
|| stream_data_len_
<= 0)
414 return FILTER_NEED_MORE_DATA
;
416 // A note on accounting: DecodeChunk() appends to its output buffer, so any
417 // preexisting data in |dest_buffer_excess_| could skew the value of
418 // |output_bytes_|. However, OutputBufferExcess guarantees that it will
419 // consume all of |dest_buffer_excess_| when called above unless the
420 // destination buffer runs out of space, and if the destination buffer runs
421 // out of space, this code returns FILTER_OK early above. Therefore, if
422 // execution reaches this point, |dest_buffer_excess_| is empty, which is
424 bool ret
= vcdiff_streaming_decoder_
->DecodeChunk(
425 next_stream_data_
, stream_data_len_
, &dest_buffer_excess_
);
426 // Assume all data was used in decoding.
427 next_stream_data_
= NULL
;
428 source_bytes_
+= stream_data_len_
;
429 stream_data_len_
= 0;
430 output_bytes_
+= dest_buffer_excess_
.size();
432 vcdiff_streaming_decoder_
.reset(NULL
); // Don't call it again.
433 decoding_status_
= DECODING_ERROR
;
434 LogSdchProblem(SDCH_DECODE_BODY_ERROR
);
438 amount
= OutputBufferExcess(dest_buffer
, available_space
);
440 dest_buffer
+= amount
;
441 available_space
-= amount
;
442 if (0 == available_space
&& !dest_buffer_excess_
.empty())
444 return FILTER_NEED_MORE_DATA
;
447 Filter::FilterStatus
SdchFilter::InitializeDictionary() {
448 size_t bytes_needed
= kServerIdLength
- dictionary_hash_
.size();
449 DCHECK_GT(bytes_needed
, 0u);
450 if (!next_stream_data_
)
451 return FILTER_NEED_MORE_DATA
;
452 if (static_cast<size_t>(stream_data_len_
) < bytes_needed
) {
453 dictionary_hash_
.append(next_stream_data_
, stream_data_len_
);
454 next_stream_data_
= NULL
;
455 stream_data_len_
= 0;
456 return FILTER_NEED_MORE_DATA
;
458 dictionary_hash_
.append(next_stream_data_
, bytes_needed
);
459 DCHECK(kServerIdLength
== dictionary_hash_
.size());
460 stream_data_len_
-= bytes_needed
;
461 DCHECK_LE(0, stream_data_len_
);
462 if (stream_data_len_
> 0)
463 next_stream_data_
+= bytes_needed
;
465 next_stream_data_
= NULL
;
467 const std::string
* dictionary_text
= nullptr;
468 dictionary_hash_is_plausible_
= true; // Assume plausible, but check.
470 SdchProblemCode rv
= SDCH_OK
;
471 if ('\0' == dictionary_hash_
[kServerIdLength
- 1]) {
472 std::string
server_hash(dictionary_hash_
, 0, kServerIdLength
- 1);
473 SdchManager::DictionarySet
* handle
=
474 filter_context_
.SdchDictionariesAdvertised();
476 dictionary_text
= handle
->GetDictionaryText(server_hash
);
477 if (!dictionary_text
) {
478 // This is a hack. Naively, the dictionaries available for
479 // decoding should be only the ones advertised. However, there are
480 // cases, specifically resources encoded with old dictionaries living
481 // in the cache, that mean the full set of dictionaries should be made
482 // available for decoding. It's not known how often this happens;
483 // if it happens rarely enough, this code can be removed.
485 // TODO(rdsmith): Long-term, a better solution is necessary, since
486 // an entry in the cache being encoded with the dictionary doesn't
487 // guarantee that the dictionary is present. That solution probably
488 // involves storing unencoded resources in the cache, but might
489 // involve evicting encoded resources on dictionary removal.
490 // See http://crbug.com/383405.
491 unexpected_dictionary_handle_
=
492 url_request_context_
->sdch_manager()->GetDictionarySetByHash(
493 url_
, server_hash
, &rv
);
494 if (unexpected_dictionary_handle_
) {
496 unexpected_dictionary_handle_
->GetDictionaryText(server_hash
);
497 // Override SDCH_OK rv; this is still worth logging.
498 rv
= (filter_context_
.IsCachedContent() ?
499 SDCH_UNADVERTISED_DICTIONARY_USED_CACHED
:
500 SDCH_UNADVERTISED_DICTIONARY_USED
);
502 // Since dictionary was not found, check to see if hash was
504 DCHECK(dictionary_hash_
.size() == kServerIdLength
);
505 rv
= SDCH_DICTIONARY_HASH_NOT_FOUND
;
506 for (size_t i
= 0; i
< kServerIdLength
- 1; ++i
) {
507 char base64_char
= dictionary_hash_
[i
];
508 if (!isalnum(base64_char
) &&
509 '-' != base64_char
&& '_' != base64_char
) {
510 dictionary_hash_is_plausible_
= false;
511 rv
= SDCH_DICTIONARY_HASH_MALFORMED
;
518 dictionary_hash_is_plausible_
= false;
519 rv
= SDCH_DICTIONARY_HASH_MALFORMED
;
525 if (!dictionary_text
) {
526 decoding_status_
= DECODING_ERROR
;
530 vcdiff_streaming_decoder_
.reset(new open_vcdiff::VCDiffStreamingDecoder
);
531 vcdiff_streaming_decoder_
->SetAllowVcdTarget(false);
533 // The validity of the dictionary_text pointer is guaranteed for the
534 // lifetime of the SdchFilter by the ownership of the DictionarySet by
535 // the FilterContext/URLRequestHttpJob. All URLRequestJob filters are
536 // torn down in ~URLRequestHttpJob by a call to
537 // URLRequestJob::DestroyFilters.
538 vcdiff_streaming_decoder_
->StartDecoding(dictionary_text
->data(),
539 dictionary_text
->size());
540 decoding_status_
= DECODING_IN_PROGRESS
;
544 int SdchFilter::OutputBufferExcess(char* const dest_buffer
,
545 size_t available_space
) {
546 if (dest_buffer_excess_
.empty())
548 DCHECK(dest_buffer_excess_
.size() > dest_buffer_excess_index_
);
549 size_t amount
= std::min(available_space
,
550 dest_buffer_excess_
.size() - dest_buffer_excess_index_
);
551 memcpy(dest_buffer
, dest_buffer_excess_
.data() + dest_buffer_excess_index_
,
553 dest_buffer_excess_index_
+= amount
;
554 if (dest_buffer_excess_
.size() <= dest_buffer_excess_index_
) {
555 DCHECK(dest_buffer_excess_
.size() == dest_buffer_excess_index_
);
556 dest_buffer_excess_
.clear();
557 dest_buffer_excess_index_
= 0;
562 void SdchFilter::LogSdchProblem(SdchProblemCode problem
) {
563 SdchManager::SdchErrorRecovery(problem
);
564 filter_context_
.GetNetLog().AddEvent(
565 NetLog::TYPE_SDCH_DECODING_ERROR
,
566 base::Bind(&NetLogSdchResourceProblemCallback
, problem
));