Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / net / filter / sdch_filter.cc
blobc5384a4ef040abe18f50933b01f490b0ca5e4e89
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/filter/sdch_filter.h"
7 #include <ctype.h>
8 #include <limits.h>
10 #include <algorithm>
12 #include "base/logging.h"
13 #include "base/metrics/histogram.h"
14 #include "net/base/sdch_manager.h"
15 #include "net/url_request/url_request_context.h"
17 #include "sdch/open-vcdiff/src/google/vcdecoder.h"
19 namespace net {
21 SdchFilter::SdchFilter(const FilterContext& filter_context)
22 : filter_context_(filter_context),
23 decoding_status_(DECODING_UNINITIALIZED),
24 dictionary_hash_(),
25 dictionary_hash_is_plausible_(false),
26 dictionary_(NULL),
27 url_request_context_(filter_context.GetURLRequestContext()),
28 dest_buffer_excess_(),
29 dest_buffer_excess_index_(0),
30 source_bytes_(0),
31 output_bytes_(0),
32 possible_pass_through_(false) {
33 bool success = filter_context.GetMimeType(&mime_type_);
34 DCHECK(success);
35 success = filter_context.GetURL(&url_);
36 DCHECK(success);
37 DCHECK(url_request_context_->sdch_manager());
40 SdchFilter::~SdchFilter() {
41 // All code here is for gathering stats, and can be removed when SDCH is
42 // considered stable.
44 static int filter_use_count = 0;
45 ++filter_use_count;
46 if (META_REFRESH_RECOVERY == decoding_status_) {
47 UMA_HISTOGRAM_COUNTS("Sdch3.FilterUseBeforeDisabling", filter_use_count);
50 if (vcdiff_streaming_decoder_.get()) {
51 if (!vcdiff_streaming_decoder_->FinishDecoding()) {
52 decoding_status_ = DECODING_ERROR;
53 SdchManager::SdchErrorRecovery(SdchManager::INCOMPLETE_SDCH_CONTENT);
54 // Make it possible for the user to hit reload, and get non-sdch content.
55 // Note this will "wear off" quickly enough, and is just meant to assure
56 // in some rare case that the user is not stuck.
57 url_request_context_->sdch_manager()->BlacklistDomain(
58 url_, SdchManager::INCOMPLETE_SDCH_CONTENT);
59 UMA_HISTOGRAM_COUNTS("Sdch3.PartialBytesIn",
60 static_cast<int>(filter_context_.GetByteReadCount()));
61 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffIn", source_bytes_);
62 UMA_HISTOGRAM_COUNTS("Sdch3.PartialVcdiffOut", output_bytes_);
66 if (!dest_buffer_excess_.empty()) {
67 // Filter chaining error, or premature teardown.
68 SdchManager::SdchErrorRecovery(SdchManager::UNFLUSHED_CONTENT);
69 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBytesIn",
70 static_cast<int>(filter_context_.GetByteReadCount()));
71 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedBufferSize",
72 dest_buffer_excess_.size());
73 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffIn", source_bytes_);
74 UMA_HISTOGRAM_COUNTS("Sdch3.UnflushedVcdiffOut", output_bytes_);
77 if (filter_context_.IsCachedContent()) {
78 // Not a real error, but it is useful to have this tally.
79 // TODO(jar): Remove this stat after SDCH stability is validated.
80 SdchManager::SdchErrorRecovery(SdchManager::CACHE_DECODED);
81 return; // We don't need timing stats, and we aready got ratios.
84 switch (decoding_status_) {
85 case DECODING_IN_PROGRESS: {
86 if (output_bytes_)
87 UMA_HISTOGRAM_PERCENTAGE("Sdch3.Network_Decode_Ratio_a",
88 static_cast<int>(
89 (filter_context_.GetByteReadCount() * 100) / output_bytes_));
90 UMA_HISTOGRAM_COUNTS("Sdch3.Network_Decode_Bytes_VcdiffOut_a",
91 output_bytes_);
92 filter_context_.RecordPacketStats(FilterContext::SDCH_DECODE);
94 // Allow latency experiments to proceed.
95 url_request_context_->sdch_manager()->SetAllowLatencyExperiment(
96 url_, true);
97 return;
99 case PASS_THROUGH: {
100 filter_context_.RecordPacketStats(FilterContext::SDCH_PASSTHROUGH);
101 return;
103 case DECODING_UNINITIALIZED: {
104 SdchManager::SdchErrorRecovery(SdchManager::UNINITIALIZED);
105 return;
107 case WAITING_FOR_DICTIONARY_SELECTION: {
108 SdchManager::SdchErrorRecovery(SdchManager::PRIOR_TO_DICTIONARY);
109 return;
111 case DECODING_ERROR: {
112 SdchManager::SdchErrorRecovery(SdchManager::DECODE_ERROR);
113 return;
115 case META_REFRESH_RECOVERY: {
116 // Already accounted for when set.
117 return;
119 } // end of switch.
122 bool SdchFilter::InitDecoding(Filter::FilterType filter_type) {
123 if (decoding_status_ != DECODING_UNINITIALIZED)
124 return false;
126 // Handle case where sdch filter is guessed, but not required.
127 if (FILTER_TYPE_SDCH_POSSIBLE == filter_type)
128 possible_pass_through_ = true;
130 // Initialize decoder only after we have a dictionary in hand.
131 decoding_status_ = WAITING_FOR_DICTIONARY_SELECTION;
132 return true;
135 #ifndef NDEBUG
136 static const char* kDecompressionErrorHtml =
137 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>"
138 "<div style=\"position:fixed;top:0;left:0;width:100%;border-width:thin;"
139 "border-color:black;border-style:solid;text-align:left;font-family:arial;"
140 "font-size:10pt;foreground-color:black;background-color:white\">"
141 "An error occurred. This page will be reloaded shortly. "
142 "Or press the \"reload\" button now to reload it immediately."
143 "</div>";
144 #else
145 static const char* kDecompressionErrorHtml =
146 "<head><META HTTP-EQUIV=\"Refresh\" CONTENT=\"0\"></head>";
147 #endif
149 Filter::FilterStatus SdchFilter::ReadFilteredData(char* dest_buffer,
150 int* dest_len) {
151 int available_space = *dest_len;
152 *dest_len = 0; // Nothing output yet.
154 if (!dest_buffer || available_space <= 0)
155 return FILTER_ERROR;
157 if (WAITING_FOR_DICTIONARY_SELECTION == decoding_status_) {
158 FilterStatus status = InitializeDictionary();
159 if (FILTER_NEED_MORE_DATA == status)
160 return FILTER_NEED_MORE_DATA;
161 if (FILTER_ERROR == status) {
162 DCHECK_EQ(DECODING_ERROR, decoding_status_);
163 DCHECK_EQ(0u, dest_buffer_excess_index_);
164 DCHECK(dest_buffer_excess_.empty());
165 // This is where we try very hard to do error recovery, and make this
166 // protocol robust in the face of proxies that do many different things.
167 // If we decide that things are looking very bad (too hard to recover),
168 // we may even issue a "meta-refresh" to reload the page without an SDCH
169 // advertisement (so that we are sure we're not hurting anything).
171 // Watch out for an error page inserted by the proxy as part of a 40x
172 // error response. When we see such content molestation, we certainly
173 // need to fall into the meta-refresh case.
174 if (filter_context_.GetResponseCode() == 404) {
175 // We could be more generous, but for now, only a "NOT FOUND" code will
176 // cause a pass through. All other bad codes will fall into a
177 // meta-refresh.
178 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_404_CODE);
179 decoding_status_ = PASS_THROUGH;
180 } else if (filter_context_.GetResponseCode() != 200) {
181 // We need to meta-refresh, with SDCH disabled.
182 } else if (filter_context_.IsCachedContent()
183 && !dictionary_hash_is_plausible_) {
184 // We must have hit the back button, and gotten content that was fetched
185 // before we *really* advertised SDCH and a dictionary.
186 SdchManager::SdchErrorRecovery(SdchManager::PASS_THROUGH_OLD_CACHED);
187 decoding_status_ = PASS_THROUGH;
188 } else if (possible_pass_through_) {
189 // This is the potentially most graceful response. There really was no
190 // error. We were just overly cautious when we added a TENTATIVE_SDCH.
191 // We added the sdch coding tag, and it should not have been added.
192 // This can happen in server experiments, where the server decides
193 // not to use sdch, even though there is a dictionary. To be
194 // conservative, we locally added the tentative sdch (fearing that a
195 // proxy stripped it!) and we must now recant (pass through).
196 SdchManager::SdchErrorRecovery(SdchManager::DISCARD_TENTATIVE_SDCH);
197 // However.... just to be sure we don't get burned by proxies that
198 // re-compress with gzip or other system, we can sniff to see if this
199 // is compressed data etc. For now, we do nothing, which gets us into
200 // the meta-refresh result.
201 // TODO(jar): Improve robustness by sniffing for valid text that we can
202 // actual use re: decoding_status_ = PASS_THROUGH;
203 } else if (dictionary_hash_is_plausible_) {
204 // We need a meta-refresh since we don't have the dictionary.
205 // The common cause is a restart of the browser, where we try to render
206 // cached content that was saved when we had a dictionary.
207 } else if (filter_context_.IsSdchResponse()) {
208 // This is a very corrupt SDCH request response. We can't decode it.
209 // We'll use a meta-refresh, and get content without asking for SDCH.
210 // This will also progressively disable SDCH for this domain.
211 } else {
212 // One of the first 9 bytes precluded consideration as a hash.
213 // This can't be an SDCH payload, even though the server said it was.
214 // This is a major error, as the server or proxy tagged this SDCH even
215 // though it is not!
216 // Meta-refresh won't help, as we didn't advertise an SDCH dictionary!!
217 // Worse yet, meta-refresh could lead to an infinite refresh loop.
218 SdchManager::SdchErrorRecovery(SdchManager::PASSING_THROUGH_NON_SDCH);
219 decoding_status_ = PASS_THROUGH;
220 // ... but further back-off on advertising SDCH support.
221 url_request_context_->sdch_manager()->BlacklistDomain(
222 url_, SdchManager::PASSING_THROUGH_NON_SDCH);
225 if (decoding_status_ == PASS_THROUGH) {
226 dest_buffer_excess_ = dictionary_hash_; // Send what we scanned.
227 } else {
228 // This is where we try to do the expensive meta-refresh.
229 if (std::string::npos == mime_type_.find("text/html")) {
230 // Since we can't do a meta-refresh (along with an exponential
231 // backoff), we'll just make sure this NEVER happens again.
232 SdchManager::ProblemCodes problem =
233 (filter_context_.IsCachedContent() ?
234 SdchManager::CACHED_META_REFRESH_UNSUPPORTED :
235 SdchManager::META_REFRESH_UNSUPPORTED);
236 url_request_context_->sdch_manager()->BlacklistDomainForever(
237 url_, problem);
238 SdchManager::SdchErrorRecovery(problem);
239 return FILTER_ERROR;
241 // HTML content means we can issue a meta-refresh, and get the content
242 // again, perhaps without SDCH (to be safe).
243 if (filter_context_.IsCachedContent()) {
244 // Cached content is probably a startup tab, so we'll just get fresh
245 // content and try again, without disabling sdch.
246 SdchManager::SdchErrorRecovery(
247 SdchManager::META_REFRESH_CACHED_RECOVERY);
248 } else {
249 // Since it wasn't in the cache, we definately need at least some
250 // period of blacklisting to get the correct content.
251 url_request_context_->sdch_manager()->BlacklistDomain(
252 url_, SdchManager::META_REFRESH_RECOVERY);
253 SdchManager::SdchErrorRecovery(SdchManager::META_REFRESH_RECOVERY);
255 decoding_status_ = META_REFRESH_RECOVERY;
256 // Issue a meta redirect with SDCH disabled.
257 dest_buffer_excess_ = kDecompressionErrorHtml;
259 } else {
260 DCHECK_EQ(DECODING_IN_PROGRESS, decoding_status_);
264 int amount = OutputBufferExcess(dest_buffer, available_space);
265 *dest_len += amount;
266 dest_buffer += amount;
267 available_space -= amount;
268 DCHECK_GE(available_space, 0);
270 if (available_space <= 0)
271 return FILTER_OK;
272 DCHECK(dest_buffer_excess_.empty());
273 DCHECK_EQ(0u, dest_buffer_excess_index_);
275 if (decoding_status_ != DECODING_IN_PROGRESS) {
276 if (META_REFRESH_RECOVERY == decoding_status_) {
277 // Absorb all input data. We've already output page reload HTML.
278 next_stream_data_ = NULL;
279 stream_data_len_ = 0;
280 return FILTER_NEED_MORE_DATA;
282 if (PASS_THROUGH == decoding_status_) {
283 // We must pass in available_space, but it will be changed to bytes_used.
284 FilterStatus result = CopyOut(dest_buffer, &available_space);
285 // Accumulate the returned count of bytes_used (a.k.a., available_space).
286 *dest_len += available_space;
287 return result;
289 DCHECK(false);
290 decoding_status_ = DECODING_ERROR;
291 return FILTER_ERROR;
294 if (!next_stream_data_ || stream_data_len_ <= 0)
295 return FILTER_NEED_MORE_DATA;
297 bool ret = vcdiff_streaming_decoder_->DecodeChunk(
298 next_stream_data_, stream_data_len_, &dest_buffer_excess_);
299 // Assume all data was used in decoding.
300 next_stream_data_ = NULL;
301 source_bytes_ += stream_data_len_;
302 stream_data_len_ = 0;
303 output_bytes_ += dest_buffer_excess_.size();
304 if (!ret) {
305 vcdiff_streaming_decoder_.reset(NULL); // Don't call it again.
306 decoding_status_ = DECODING_ERROR;
307 SdchManager::SdchErrorRecovery(SdchManager::DECODE_BODY_ERROR);
308 return FILTER_ERROR;
311 amount = OutputBufferExcess(dest_buffer, available_space);
312 *dest_len += amount;
313 dest_buffer += amount;
314 available_space -= amount;
315 if (0 == available_space && !dest_buffer_excess_.empty())
316 return FILTER_OK;
317 return FILTER_NEED_MORE_DATA;
320 Filter::FilterStatus SdchFilter::InitializeDictionary() {
321 const size_t kServerIdLength = 9; // Dictionary hash plus null from server.
322 size_t bytes_needed = kServerIdLength - dictionary_hash_.size();
323 DCHECK_GT(bytes_needed, 0u);
324 if (!next_stream_data_)
325 return FILTER_NEED_MORE_DATA;
326 if (static_cast<size_t>(stream_data_len_) < bytes_needed) {
327 dictionary_hash_.append(next_stream_data_, stream_data_len_);
328 next_stream_data_ = NULL;
329 stream_data_len_ = 0;
330 return FILTER_NEED_MORE_DATA;
332 dictionary_hash_.append(next_stream_data_, bytes_needed);
333 DCHECK(kServerIdLength == dictionary_hash_.size());
334 stream_data_len_ -= bytes_needed;
335 DCHECK_LE(0, stream_data_len_);
336 if (stream_data_len_ > 0)
337 next_stream_data_ += bytes_needed;
338 else
339 next_stream_data_ = NULL;
341 DCHECK(!dictionary_.get());
342 dictionary_hash_is_plausible_ = true; // Assume plausible, but check.
344 if ('\0' == dictionary_hash_[kServerIdLength - 1]) {
345 SdchManager* manager(url_request_context_->sdch_manager());
346 manager->GetVcdiffDictionary(
347 std::string(dictionary_hash_, 0, kServerIdLength - 1),
348 url_, &dictionary_);
349 } else {
350 dictionary_hash_is_plausible_ = false;
353 if (!dictionary_.get()) {
354 DCHECK(dictionary_hash_.size() == kServerIdLength);
355 // Since dictionary was not found, check to see if hash was even plausible.
356 for (size_t i = 0; i < kServerIdLength - 1; ++i) {
357 char base64_char = dictionary_hash_[i];
358 if (!isalnum(base64_char) && '-' != base64_char && '_' != base64_char) {
359 dictionary_hash_is_plausible_ = false;
360 break;
363 if (dictionary_hash_is_plausible_)
364 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_NOT_FOUND);
365 else
366 SdchManager::SdchErrorRecovery(SdchManager::DICTIONARY_HASH_MALFORMED);
367 decoding_status_ = DECODING_ERROR;
368 return FILTER_ERROR;
370 vcdiff_streaming_decoder_.reset(new open_vcdiff::VCDiffStreamingDecoder);
371 vcdiff_streaming_decoder_->SetAllowVcdTarget(false);
372 vcdiff_streaming_decoder_->StartDecoding(dictionary_->text().data(),
373 dictionary_->text().size());
374 decoding_status_ = DECODING_IN_PROGRESS;
375 return FILTER_OK;
378 int SdchFilter::OutputBufferExcess(char* const dest_buffer,
379 size_t available_space) {
380 if (dest_buffer_excess_.empty())
381 return 0;
382 DCHECK(dest_buffer_excess_.size() > dest_buffer_excess_index_);
383 size_t amount = std::min(available_space,
384 dest_buffer_excess_.size() - dest_buffer_excess_index_);
385 memcpy(dest_buffer, dest_buffer_excess_.data() + dest_buffer_excess_index_,
386 amount);
387 dest_buffer_excess_index_ += amount;
388 if (dest_buffer_excess_.size() <= dest_buffer_excess_index_) {
389 DCHECK(dest_buffer_excess_.size() == dest_buffer_excess_index_);
390 dest_buffer_excess_.clear();
391 dest_buffer_excess_index_ = 0;
393 return amount;
396 } // namespace net