1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/filter/sdch_filter.h"
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15 #include "net/url_request/url_request_context.h"
17 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
21 SdchFilter::SdchFilter(const FilterContext
& filter_context
)
22 : filter_context_(filter_context
),
23 decoding_status_(DECODING_UNINITIALIZED
),
25 dictionary_hash_is_plausible_(false),
27 url_request_context_(filter_context
.GetURLRequestContext()),
28 dest_buffer_excess_(),
29 dest_buffer_excess_index_(0),
32 possible_pass_through_(false) {
33 bool success
= filter_context
.GetMimeType(&mime_type_
);
35 success
= filter_context
.GetURL(&url_
);
37 DCHECK(url_request_context_
->sdch_manager());
40 SdchFilter::~SdchFilter() {
41 // All code here is for gathering stats, and can be removed when SDCH is
44 static int filter_use_count
= 0;
46 if (META_REFRESH_RECOVERY
== decoding_status_
) {
47 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count
);
50 if (vcdiff_streaming_decoder_
.get()) {
51 if (!vcdiff_streaming_decoder_
->FinishDecoding()) {
52 decoding_status_
= DECODING_ERROR
;
53 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT
);
54 // Make it possible for the user to hit reload, and get non-sdch content.
55 // Note this will "wear off" quickly enough, and is just meant to assure
56 // in some rare case that the user is not stuck.
57 url_request_context_
->sdch_manager()->BlacklistDomain(
59 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
60 static_cast<int>(filter_context_
.GetByteReadCount()));
61 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_
);
62 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_
);
66 if (!dest_buffer_excess_
.empty()) {
67 // Filter chaining error, or premature teardown.
68 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT
);
69 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
70 static_cast<int>(filter_context_
.GetByteReadCount()));
71 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
72 dest_buffer_excess_
.size());
73 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_
);
74 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_
);
77 if (filter_context_
.IsCachedContent()) {
78 // Not a real error, but it is useful to have this tally.
79 // TODO(jar): Remove this stat after SDCH stability is validated.
80 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED
);
81 return; // We don't need timing stats, and we aready got ratios.
84 switch (decoding_status_
) {
85 case DECODING_IN_PROGRESS
: {
87 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
89 (filter_context_
.GetByteReadCount() * 100) / output_bytes_
));
90 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
92 filter_context_
.RecordPacketStats(FilterContext::SDCH_DECODE
);
94 // Allow latency experiments to proceed.
95 url_request_context_
->sdch_manager()->SetAllowLatencyExperiment(
100 filter_context_
.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH
);
103 case DECODING_UNINITIALIZED
: {
104 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED
);
107 case WAITING_FOR_DICTIONARY_SELECTION
: {
108 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY
);
111 case DECODING_ERROR
: {
112 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR
);
115 case META_REFRESH_RECOVERY
: {
116 // Already accounted for when set.
122 bool SdchFilter::InitDecoding(Filter::FilterType filter_type
) {
123 if (decoding_status_
!= DECODING_UNINITIALIZED
)
126 // Handle case where sdch filter is guessed, but not required.
127 if (FILTER_TYPE_SDCH_POSSIBLE
== filter_type
)
128 possible_pass_through_
= true;
130 // Initialize decoder only after we have a dictionary in hand.
131 decoding_status_
= WAITING_FOR_DICTIONARY_SELECTION
;
136 static const char* kDecompressionErrorHtml
=
137 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
138 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
139 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
140 "font-size:10pt;foreground-color:black;background-color:white\">"
141 "An error occurred. This page will be reloaded shortly. "
142 "Or press the \"reload\" button now to reload it immediately."
145 static const char* kDecompressionErrorHtml
=
146 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
149 Filter::FilterStatus
SdchFilter::ReadFilteredData(char* dest_buffer
,
151 int available_space
= *dest_len
;
152 *dest_len
= 0; // Nothing output yet.
154 if (!dest_buffer
|| available_space
<= 0)
157 if (WAITING_FOR_DICTIONARY_SELECTION
== decoding_status_
) {
158 FilterStatus status
= InitializeDictionary();
159 if (FILTER_NEED_MORE_DATA
== status
)
160 return FILTER_NEED_MORE_DATA
;
161 if (FILTER_ERROR
== status
) {
162 DCHECK_EQ(DECODING_ERROR
, decoding_status_
);
163 DCHECK_EQ(0u, dest_buffer_excess_index_
);
164 DCHECK(dest_buffer_excess_
.empty());
165 // This is where we try very hard to do error recovery, and make this
166 // protocol robust in the face of proxies that do many different things.
167 // If we decide that things are looking very bad (too hard to recover),
168 // we may even issue a "meta-refresh" to reload the page without an SDCH
169 // advertisement (so that we are sure we're not hurting anything).
171 // Watch out for an error page inserted by the proxy as part of a 40x
172 // error response. When we see such content molestation, we certainly
173 // need to fall into the meta-refresh case.
174 if (filter_context_
.GetResponseCode() == 404) {
175 // We could be more generous, but for now, only a "NOT FOUND" code will
176 // cause a pass through. All other bad codes will fall into a
178 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE
);
179 decoding_status_
= PASS_THROUGH
;
180 } else if (filter_context_
.GetResponseCode() != 200) {
181 // We need to meta-refresh, with SDCH disabled.
182 } else if (filter_context_
.IsCachedContent()
183 && !dictionary_hash_is_plausible_
) {
184 // We must have hit the back button, and gotten content that was fetched
185 // before we *really* advertised SDCH and a dictionary.
186 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED
);
187 decoding_status_
= PASS_THROUGH
;
188 } else if (possible_pass_through_
) {
189 // This is the potentially most graceful response. There really was no
190 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
191 // We added the sdch coding tag, and it should not have been added.
192 // This can happen in server experiments, where the server decides
193 // not to use sdch, even though there is a dictionary. To be
194 // conservative, we locally added the tentative sdch (fearing that a
195 // proxy stripped it!) and we must now recant (pass through).
196 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH
);
197 // However.... just to be sure we don't get burned by proxies that
198 // re-compress with gzip or other system, we can sniff to see if this
199 // is compressed data etc. For now, we do nothing, which gets us into
200 // the meta-refresh result.
201 // TODO(jar): Improve robustness by sniffing for valid text that we can
202 // actual use re: decoding_status_ = PASS_THROUGH;
203 } else if (dictionary_hash_is_plausible_
) {
204 // We need a meta-refresh since we don't have the dictionary.
205 // The common cause is a restart of the browser, where we try to render
206 // cached content that was saved when we had a dictionary.
207 } else if (filter_context_
.IsSdchResponse()) {
208 // This is a very corrupt SDCH request response. We can't decode it.
209 // We'll use a meta-refresh, and get content without asking for SDCH.
210 // This will also progressively disable SDCH for this domain.
212 // One of the first 9 bytes precluded consideration as a hash.
213 // This can't be an SDCH payload, even though the server said it was.
214 // This is a major error, as the server or proxy tagged this SDCH even
216 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
217 // Worse yet, meta-refresh could lead to an infinite refresh loop.
218 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH
);
219 decoding_status_
= PASS_THROUGH
;
220 // ... but further back-off on advertising SDCH support.
221 url_request_context_
->sdch_manager()->BlacklistDomain(url_
);
224 if (decoding_status_
== PASS_THROUGH
) {
225 dest_buffer_excess_
= dictionary_hash_
; // Send what we scanned.
227 // This is where we try to do the expensive meta-refresh.
228 if (std::string::npos
== mime_type_
.find("text/html")) {
229 // Since we can't do a meta-refresh (along with an exponential
230 // backoff), we'll just make sure this NEVER happens again.
231 url_request_context_
->sdch_manager()->BlacklistDomainForever(url_
);
232 if (filter_context_
.IsCachedContent())
233 SdchManager::SdchErrorRecovery(
234 SdchManager::CACHED_META_REFRESH_UNSUPPORTED
);
236 SdchManager::SdchErrorRecovery(
237 SdchManager::META_REFRESH_UNSUPPORTED
);
240 // HTML content means we can issue a meta-refresh, and get the content
241 // again, perhaps without SDCH (to be safe).
242 if (filter_context_
.IsCachedContent()) {
243 // Cached content is probably a startup tab, so we'll just get fresh
244 // content and try again, without disabling sdch.
245 SdchManager::SdchErrorRecovery(
246 SdchManager::META_REFRESH_CACHED_RECOVERY
);
248 // Since it wasn't in the cache, we definately need at least some
249 // period of blacklisting to get the correct content.
250 url_request_context_
->sdch_manager()->BlacklistDomain(url_
);
251 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY
);
253 decoding_status_
= META_REFRESH_RECOVERY
;
254 // Issue a meta redirect with SDCH disabled.
255 dest_buffer_excess_
= kDecompressionErrorHtml
;
258 DCHECK_EQ(DECODING_IN_PROGRESS
, decoding_status_
);
262 int amount
= OutputBufferExcess(dest_buffer
, available_space
);
264 dest_buffer
+= amount
;
265 available_space
-= amount
;
266 DCHECK_GE(available_space
, 0);
268 if (available_space
<= 0)
270 DCHECK(dest_buffer_excess_
.empty());
271 DCHECK_EQ(0u, dest_buffer_excess_index_
);
273 if (decoding_status_
!= DECODING_IN_PROGRESS
) {
274 if (META_REFRESH_RECOVERY
== decoding_status_
) {
275 // Absorb all input data. We've already output page reload HTML.
276 next_stream_data_
= NULL
;
277 stream_data_len_
= 0;
278 return FILTER_NEED_MORE_DATA
;
280 if (PASS_THROUGH
== decoding_status_
) {
281 // We must pass in available_space, but it will be changed to bytes_used.
282 FilterStatus result
= CopyOut(dest_buffer
, &available_space
);
283 // Accumulate the returned count of bytes_used (a.k.a., available_space).
284 *dest_len
+= available_space
;
288 decoding_status_
= DECODING_ERROR
;
292 if (!next_stream_data_
|| stream_data_len_
<= 0)
293 return FILTER_NEED_MORE_DATA
;
295 bool ret
= vcdiff_streaming_decoder_
->DecodeChunk(
296 next_stream_data_
, stream_data_len_
, &dest_buffer_excess_
);
297 // Assume all data was used in decoding.
298 next_stream_data_
= NULL
;
299 source_bytes_
+= stream_data_len_
;
300 stream_data_len_
= 0;
301 output_bytes_
+= dest_buffer_excess_
.size();
303 vcdiff_streaming_decoder_
.reset(NULL
); // Don't call it again.
304 decoding_status_
= DECODING_ERROR
;
305 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR
);
309 amount
= OutputBufferExcess(dest_buffer
, available_space
);
311 dest_buffer
+= amount
;
312 available_space
-= amount
;
313 if (0 == available_space
&& !dest_buffer_excess_
.empty())
315 return FILTER_NEED_MORE_DATA
;
318 Filter::FilterStatus
SdchFilter::InitializeDictionary() {
319 const size_t kServerIdLength
= 9; // Dictionary hash plus null from server.
320 size_t bytes_needed
= kServerIdLength
- dictionary_hash_
.size();
321 DCHECK_GT(bytes_needed
, 0u);
322 if (!next_stream_data_
)
323 return FILTER_NEED_MORE_DATA
;
324 if (static_cast<size_t>(stream_data_len_
) < bytes_needed
) {
325 dictionary_hash_
.append(next_stream_data_
, stream_data_len_
);
326 next_stream_data_
= NULL
;
327 stream_data_len_
= 0;
328 return FILTER_NEED_MORE_DATA
;
330 dictionary_hash_
.append(next_stream_data_
, bytes_needed
);
331 DCHECK(kServerIdLength
== dictionary_hash_
.size());
332 stream_data_len_
-= bytes_needed
;
333 DCHECK_LE(0, stream_data_len_
);
334 if (stream_data_len_
> 0)
335 next_stream_data_
+= bytes_needed
;
337 next_stream_data_
= NULL
;
339 DCHECK(!dictionary_
);
340 dictionary_hash_is_plausible_
= true; // Assume plausible, but check.
342 if ('\0' == dictionary_hash_
[kServerIdLength
- 1]) {
343 SdchManager
* manager(url_request_context_
->sdch_manager());
344 manager
->GetVcdiffDictionary(
345 std::string(dictionary_hash_
, 0, kServerIdLength
- 1),
348 dictionary_hash_is_plausible_
= false;
352 DCHECK(dictionary_hash_
.size() == kServerIdLength
);
353 // Since dictionary was not found, check to see if hash was even plausible.
354 for (size_t i
= 0; i
< kServerIdLength
- 1; ++i
) {
355 char base64_char
= dictionary_hash_
[i
];
356 if (!isalnum(base64_char
) && '-' != base64_char
&& '_' != base64_char
) {
357 dictionary_hash_is_plausible_
= false;
361 if (dictionary_hash_is_plausible_
)
362 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND
);
364 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED
);
365 decoding_status_
= DECODING_ERROR
;
368 vcdiff_streaming_decoder_
.reset(new open_vcdiff::VCDiffStreamingDecoder
);
369 vcdiff_streaming_decoder_
->SetAllowVcdTarget(false);
370 vcdiff_streaming_decoder_
->StartDecoding(dictionary_
->text().data(),
371 dictionary_
->text().size());
372 decoding_status_
= DECODING_IN_PROGRESS
;
376 int SdchFilter::OutputBufferExcess(char* const dest_buffer
,
377 size_t available_space
) {
378 if (dest_buffer_excess_
.empty())
380 DCHECK(dest_buffer_excess_
.size() > dest_buffer_excess_index_
);
381 size_t amount
= std::min(available_space
,
382 dest_buffer_excess_
.size() - dest_buffer_excess_index_
);
383 memcpy(dest_buffer
, dest_buffer_excess_
.data() + dest_buffer_excess_index_
,
385 dest_buffer_excess_index_
+= amount
;
386 if (dest_buffer_excess_
.size() <= dest_buffer_excess_index_
) {
387 DCHECK(dest_buffer_excess_
.size() == dest_buffer_excess_index_
);
388 dest_buffer_excess_
.clear();
389 dest_buffer_excess_index_
= 0;