1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_filter.h"
11 #include "base/logging.h"
12 #include "base/metrics/histogram.h"
13 #include "net/base/sdch_manager.h"
15 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
19 SdchFilter::SdchFilter(const FilterContext
& filter_context
)
20 : filter_context_(filter_context
),
21 decoding_status_(DECODING_UNINITIALIZED
),
23 dictionary_hash_is_plausible_(false),
25 dest_buffer_excess_(),
26 dest_buffer_excess_index_(0),
29 possible_pass_through_(false) {
30 bool success
= filter_context
.GetMimeType(&mime_type_
);
32 success
= filter_context
.GetURL(&url_
);
36 SdchFilter::~SdchFilter() {
37 // All code here is for gathering stats, and can be removed when SDCH is
40 static int filter_use_count
= 0;
42 if (META_REFRESH_RECOVERY
== decoding_status_
) {
43 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count
);
46 if (vcdiff_streaming_decoder_
.get()) {
47 if (!vcdiff_streaming_decoder_
->FinishDecoding()) {
48 decoding_status_
= DECODING_ERROR
;
49 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT
);
50 // Make it possible for the user to hit reload, and get non-sdch content.
51 // Note this will "wear off" quickly enough, and is just meant to assure
52 // in some rare case that the user is not stuck.
53 SdchManager::BlacklistDomain(url_
);
54 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
55 static_cast<int>(filter_context_
.GetByteReadCount()));
56 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_
);
57 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_
);
61 if (!dest_buffer_excess_
.empty()) {
62 // Filter chaining error, or premature teardown.
63 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT
);
64 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
65 static_cast<int>(filter_context_
.GetByteReadCount()));
66 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
67 dest_buffer_excess_
.size());
68 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_
);
69 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_
);
72 if (filter_context_
.IsCachedContent()) {
73 // Not a real error, but it is useful to have this tally.
74 // TODO(jar): Remove this stat after SDCH stability is validated.
75 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED
);
76 return; // We don't need timing stats, and we aready got ratios.
79 switch (decoding_status_
) {
80 case DECODING_IN_PROGRESS
: {
82 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
84 (filter_context_
.GetByteReadCount() * 100) / output_bytes_
));
85 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
87 filter_context_
.RecordPacketStats(FilterContext::SDCH_DECODE
);
89 // Allow latency experiments to proceed.
90 SdchManager::Global()->SetAllowLatencyExperiment(url_
, true);
94 filter_context_
.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH
);
97 case DECODING_UNINITIALIZED
: {
98 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED
);
101 case WAITING_FOR_DICTIONARY_SELECTION
: {
102 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY
);
105 case DECODING_ERROR
: {
106 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR
);
109 case META_REFRESH_RECOVERY
: {
110 // Already accounted for when set.
116 bool SdchFilter::InitDecoding(Filter::FilterType filter_type
) {
117 if (decoding_status_
!= DECODING_UNINITIALIZED
)
120 // Handle case where sdch filter is guessed, but not required.
121 if (FILTER_TYPE_SDCH_POSSIBLE
== filter_type
)
122 possible_pass_through_
= true;
124 // Initialize decoder only after we have a dictionary in hand.
125 decoding_status_
= WAITING_FOR_DICTIONARY_SELECTION
;
130 static const char* kDecompressionErrorHtml
=
131 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
132 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
133 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
134 "font-size:10pt;foreground-color:black;background-color:white\">"
135 "An error occurred. This page will be reloaded shortly. "
136 "Or press the \"reload\" button now to reload it immediately."
139 static const char* kDecompressionErrorHtml
=
140 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
143 Filter::FilterStatus
SdchFilter::ReadFilteredData(char* dest_buffer
,
145 int available_space
= *dest_len
;
146 *dest_len
= 0; // Nothing output yet.
148 if (!dest_buffer
|| available_space
<= 0)
151 if (WAITING_FOR_DICTIONARY_SELECTION
== decoding_status_
) {
152 FilterStatus status
= InitializeDictionary();
153 if (FILTER_NEED_MORE_DATA
== status
)
154 return FILTER_NEED_MORE_DATA
;
155 if (FILTER_ERROR
== status
) {
156 DCHECK_EQ(DECODING_ERROR
, decoding_status_
);
157 DCHECK_EQ(0u, dest_buffer_excess_index_
);
158 DCHECK(dest_buffer_excess_
.empty());
159 // This is where we try very hard to do error recovery, and make this
160 // protocol robust in the face of proxies that do many different things.
161 // If we decide that things are looking very bad (too hard to recover),
162 // we may even issue a "meta-refresh" to reload the page without an SDCH
163 // advertisement (so that we are sure we're not hurting anything).
165 // Watch out for an error page inserted by the proxy as part of a 40x
166 // error response. When we see such content molestation, we certainly
167 // need to fall into the meta-refresh case.
168 if (filter_context_
.GetResponseCode() == 404) {
169 // We could be more generous, but for now, only a "NOT FOUND" code will
170 // cause a pass through. All other bad codes will fall into a
172 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE
);
173 decoding_status_
= PASS_THROUGH
;
174 } else if (filter_context_
.GetResponseCode() != 200) {
175 // We need to meta-refresh, with SDCH disabled.
176 } else if (filter_context_
.IsCachedContent()
177 && !dictionary_hash_is_plausible_
) {
178 // We must have hit the back button, and gotten content that was fetched
179 // before we *really* advertised SDCH and a dictionary.
180 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED
);
181 decoding_status_
= PASS_THROUGH
;
182 } else if (possible_pass_through_
) {
183 // This is the potentially most graceful response. There really was no
184 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
185 // We added the sdch coding tag, and it should not have been added.
186 // This can happen in server experiments, where the server decides
187 // not to use sdch, even though there is a dictionary. To be
188 // conservative, we locally added the tentative sdch (fearing that a
189 // proxy stripped it!) and we must now recant (pass through).
190 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH
);
191 // However.... just to be sure we don't get burned by proxies that
192 // re-compress with gzip or other system, we can sniff to see if this
193 // is compressed data etc. For now, we do nothing, which gets us into
194 // the meta-refresh result.
195 // TODO(jar): Improve robustness by sniffing for valid text that we can
196 // actual use re: decoding_status_ = PASS_THROUGH;
197 } else if (dictionary_hash_is_plausible_
) {
198 // We need a meta-refresh since we don't have the dictionary.
199 // The common cause is a restart of the browser, where we try to render
200 // cached content that was saved when we had a dictionary.
201 } else if (filter_context_
.IsSdchResponse()) {
202 // This is a very corrupt SDCH request response. We can't decode it.
203 // We'll use a meta-refresh, and get content without asking for SDCH.
204 // This will also progressively disable SDCH for this domain.
206 // One of the first 9 bytes precluded consideration as a hash.
207 // This can't be an SDCH payload, even though the server said it was.
208 // This is a major error, as the server or proxy tagged this SDCH even
210 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
211 // Worse yet, meta-refresh could lead to an infinite refresh loop.
212 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH
);
213 decoding_status_
= PASS_THROUGH
;
214 // ... but further back-off on advertising SDCH support.
215 SdchManager::BlacklistDomain(url_
);
218 if (decoding_status_
== PASS_THROUGH
) {
219 dest_buffer_excess_
= dictionary_hash_
; // Send what we scanned.
221 // This is where we try to do the expensive meta-refresh.
222 if (std::string::npos
== mime_type_
.find("text/html")) {
223 // Since we can't do a meta-refresh (along with an exponential
224 // backoff), we'll just make sure this NEVER happens again.
225 SdchManager::BlacklistDomainForever(url_
);
226 if (filter_context_
.IsCachedContent())
227 SdchManager::SdchErrorRecovery(
228 SdchManager::CACHED_META_REFRESH_UNSUPPORTED
);
230 SdchManager::SdchErrorRecovery(
231 SdchManager::META_REFRESH_UNSUPPORTED
);
234 // HTML content means we can issue a meta-refresh, and get the content
235 // again, perhaps without SDCH (to be safe).
236 if (filter_context_
.IsCachedContent()) {
237 // Cached content is probably a startup tab, so we'll just get fresh
238 // content and try again, without disabling sdch.
239 SdchManager::SdchErrorRecovery(
240 SdchManager::META_REFRESH_CACHED_RECOVERY
);
242 // Since it wasn't in the cache, we definately need at least some
243 // period of blacklisting to get the correct content.
244 SdchManager::BlacklistDomain(url_
);
245 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY
);
247 decoding_status_
= META_REFRESH_RECOVERY
;
248 // Issue a meta redirect with SDCH disabled.
249 dest_buffer_excess_
= kDecompressionErrorHtml
;
252 DCHECK_EQ(DECODING_IN_PROGRESS
, decoding_status_
);
256 int amount
= OutputBufferExcess(dest_buffer
, available_space
);
258 dest_buffer
+= amount
;
259 available_space
-= amount
;
260 DCHECK_GE(available_space
, 0);
262 if (available_space
<= 0)
264 DCHECK(dest_buffer_excess_
.empty());
265 DCHECK_EQ(0u, dest_buffer_excess_index_
);
267 if (decoding_status_
!= DECODING_IN_PROGRESS
) {
268 if (META_REFRESH_RECOVERY
== decoding_status_
) {
269 // Absorb all input data. We've already output page reload HTML.
270 next_stream_data_
= NULL
;
271 stream_data_len_
= 0;
272 return FILTER_NEED_MORE_DATA
;
274 if (PASS_THROUGH
== decoding_status_
) {
275 // We must pass in available_space, but it will be changed to bytes_used.
276 FilterStatus result
= CopyOut(dest_buffer
, &available_space
);
277 // Accumulate the returned count of bytes_used (a.k.a., available_space).
278 *dest_len
+= available_space
;
282 decoding_status_
= DECODING_ERROR
;
286 if (!next_stream_data_
|| stream_data_len_
<= 0)
287 return FILTER_NEED_MORE_DATA
;
289 bool ret
= vcdiff_streaming_decoder_
->DecodeChunk(
290 next_stream_data_
, stream_data_len_
, &dest_buffer_excess_
);
291 // Assume all data was used in decoding.
292 next_stream_data_
= NULL
;
293 source_bytes_
+= stream_data_len_
;
294 stream_data_len_
= 0;
295 output_bytes_
+= dest_buffer_excess_
.size();
297 vcdiff_streaming_decoder_
.reset(NULL
); // Don't call it again.
298 decoding_status_
= DECODING_ERROR
;
299 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR
);
303 amount
= OutputBufferExcess(dest_buffer
, available_space
);
305 dest_buffer
+= amount
;
306 available_space
-= amount
;
307 if (0 == available_space
&& !dest_buffer_excess_
.empty())
309 return FILTER_NEED_MORE_DATA
;
312 Filter::FilterStatus
SdchFilter::InitializeDictionary() {
313 const size_t kServerIdLength
= 9; // Dictionary hash plus null from server.
314 size_t bytes_needed
= kServerIdLength
- dictionary_hash_
.size();
315 DCHECK_GT(bytes_needed
, 0u);
316 if (!next_stream_data_
)
317 return FILTER_NEED_MORE_DATA
;
318 if (static_cast<size_t>(stream_data_len_
) < bytes_needed
) {
319 dictionary_hash_
.append(next_stream_data_
, stream_data_len_
);
320 next_stream_data_
= NULL
;
321 stream_data_len_
= 0;
322 return FILTER_NEED_MORE_DATA
;
324 dictionary_hash_
.append(next_stream_data_
, bytes_needed
);
325 DCHECK(kServerIdLength
== dictionary_hash_
.size());
326 stream_data_len_
-= bytes_needed
;
327 DCHECK_LE(0, stream_data_len_
);
328 if (stream_data_len_
> 0)
329 next_stream_data_
+= bytes_needed
;
331 next_stream_data_
= NULL
;
333 DCHECK(!dictionary_
.get());
334 dictionary_hash_is_plausible_
= true; // Assume plausible, but check.
336 SdchManager::Dictionary
* dictionary
= NULL
;
337 if ('\0' == dictionary_hash_
[kServerIdLength
- 1])
338 SdchManager::Global()->GetVcdiffDictionary(std::string(dictionary_hash_
, 0,
339 kServerIdLength
- 1),
342 dictionary_hash_is_plausible_
= false;
345 DCHECK(dictionary_hash_
.size() == kServerIdLength
);
346 // Since dictionary was not found, check to see if hash was even plausible.
347 for (size_t i
= 0; i
< kServerIdLength
- 1; ++i
) {
348 char base64_char
= dictionary_hash_
[i
];
349 if (!isalnum(base64_char
) && '-' != base64_char
&& '_' != base64_char
) {
350 dictionary_hash_is_plausible_
= false;
354 if (dictionary_hash_is_plausible_
)
355 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND
);
357 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED
);
358 decoding_status_
= DECODING_ERROR
;
361 dictionary_
= dictionary
;
362 vcdiff_streaming_decoder_
.reset(new open_vcdiff::VCDiffStreamingDecoder
);
363 vcdiff_streaming_decoder_
->SetAllowVcdTarget(false);
364 vcdiff_streaming_decoder_
->StartDecoding(dictionary_
->text().data(),
365 dictionary_
->text().size());
366 decoding_status_
= DECODING_IN_PROGRESS
;
370 int SdchFilter::OutputBufferExcess(char* const dest_buffer
,
371 size_t available_space
) {
372 if (dest_buffer_excess_
.empty())
374 DCHECK(dest_buffer_excess_
.size() > dest_buffer_excess_index_
);
375 size_t amount
= std::min(available_space
,
376 dest_buffer_excess_
.size() - dest_buffer_excess_index_
);
377 memcpy(dest_buffer
, dest_buffer_excess_
.data() + dest_buffer_excess_index_
,
379 dest_buffer_excess_index_
+= amount
;
380 if (dest_buffer_excess_
.size() <= dest_buffer_excess_index_
) {
381 DCHECK(dest_buffer_excess_
.size() == dest_buffer_excess_index_
);
382 dest_buffer_excess_
.clear();
383 dest_buffer_excess_index_
= 0;