WebKit Roll 77251:77261.
[chromium-blink-merge.git] / courgette / ensemble_create.cc
blob98b7ba062aadcf12b7cf538734ef0a260454d75d
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The main idea in Courgette is to do patching *under a tranformation*. The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
8 //
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
12 // This file contains the code to create the patch.
15 #include "courgette/ensemble.h"
17 #include <vector>
18 #include <limits>
20 #include "base/basictypes.h"
21 #include "base/logging.h"
22 #include "base/time.h"
24 #include "courgette/third_party/bsdiff.h"
25 #include "courgette/crc.h"
26 #include "courgette/difference_estimator.h"
27 #include "courgette/image_info.h"
28 #include "courgette/streams.h"
29 #include "courgette/region.h"
30 #include "courgette/simple_delta.h"
32 #include "courgette/win32_x86_patcher.h"
33 #include "courgette/win32_x86_generator.h"
35 namespace courgette {
37 TransformationPatchGenerator::TransformationPatchGenerator(
38 Element* old_element,
39 Element* new_element,
40 TransformationPatcher* patcher)
41 : old_element_(old_element),
42 new_element_(new_element),
43 patcher_(patcher) {
46 TransformationPatchGenerator::~TransformationPatchGenerator() {
47 delete patcher_;
50 // The default implementation of PredictTransformParameters delegates to the
51 // patcher.
52 Status TransformationPatchGenerator::PredictTransformParameters(
53 SinkStreamSet* prediction) {
54 return patcher_->PredictTransformParameters(prediction);
57 // The default implementation of Reform delegates to the patcher.
58 Status TransformationPatchGenerator::Reform(
59 SourceStreamSet* transformed_element,
60 SinkStream* reformed_element) {
61 return patcher_->Reform(transformed_element, reformed_element);
64 // Makes a TransformationPatchGenerator of the appropriate variety for the
65 // Element kind.
66 TransformationPatchGenerator* MakeGenerator(Element* old_element,
67 Element* new_element) {
68 if (new_element->kind() == Element::WIN32_X86_WITH_CODE) {
69 CourgetteWin32X86PatchGenerator* generator =
70 new CourgetteWin32X86PatchGenerator(
71 old_element,
72 new_element,
73 new CourgetteWin32X86Patcher(old_element->region()));
74 return generator;
75 } else {
76 LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
77 return NULL;
81 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element
82 // from 'old' is matched as the closest element to multiple elements from 'new'.
83 // Each time this happens, the old element is transformed and serialized. This
84 // is a problem when the old element is huge compared with the new element
85 // because the mutliple serialized copies can be much bigger than the size of
86 // either ensemble.
88 // The right way to avoid this is to ensure any one element from 'old' is
89 // serialized once, which requires matching code in the patch application.
91 // This is a quick hack to avoid the problem by prohibiting a big difference in
92 // size between matching elements.
93 bool UnsafeDifference(Element* old_element, Element* new_element) {
94 double kMaxBloat = 2.0;
95 size_t kMinWorrysomeDifference = 2 << 20; // 2MB
96 size_t old_size = old_element->region().length();
97 size_t new_size = new_element->region().length();
98 size_t low_size = std::min(old_size, new_size);
99 size_t high_size = std::max(old_size, new_size);
100 if (high_size - low_size < kMinWorrysomeDifference) return false;
101 if (high_size < low_size * kMaxBloat) return false;
102 return true;
105 // FindGenerators finds TransformationPatchGenerators for the elements of
106 // |new_ensemble|. For each element of |new_ensemble| we find the closest
107 // matching element from |old_ensemble| and use that as the basis for
108 // differential compression. The elements have to be the same kind so as to
109 // support transformation into the same kind of 'new representation'.
111 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
112 std::vector<TransformationPatchGenerator*>* generators) {
113 base::Time start_find_time = base::Time::Now();
114 old_ensemble->FindEmbeddedElements();
115 new_ensemble->FindEmbeddedElements();
116 VLOG(1) << "done FindEmbeddedElements "
117 << (base::Time::Now() - start_find_time).InSecondsF();
119 std::vector<Element*> old_elements(old_ensemble->elements());
120 std::vector<Element*> new_elements(new_ensemble->elements());
122 VLOG(1) << "old has " << old_elements.size() << " elements";
123 VLOG(1) << "new has " << new_elements.size() << " elements";
125 DifferenceEstimator difference_estimator;
126 std::vector<DifferenceEstimator::Base*> bases;
128 base::Time start_bases_time = base::Time::Now();
129 for (size_t i = 0; i < old_elements.size(); ++i) {
130 bases.push_back(
131 difference_estimator.MakeBase(old_elements[i]->region()));
133 VLOG(1) << "done make bases "
134 << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
136 for (size_t new_index = 0; new_index < new_elements.size(); ++new_index) {
137 Element* new_element = new_elements[new_index];
138 DifferenceEstimator::Subject* new_subject =
139 difference_estimator.MakeSubject(new_element->region());
141 // Search through old elements to find the best match.
143 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
144 // new_ensemble probably have a very similar structure. We can make the
145 // search faster by making the comparison provided by DifferenceEstimator
146 // more nuanced, returning early if the measured difference is greater than
147 // the current best. This will be most effective if we can arrange that the
148 // first elements we try to match are likely the 'right' ones. We could
149 // prioritize elements that are of a similar size or similar position in the
150 // sequence of elements.
152 Element* best_old_element = NULL;
153 size_t best_difference = std::numeric_limits<size_t>::max();
154 for (size_t old_index = 0; old_index < old_elements.size(); ++old_index) {
155 Element* old_element = old_elements[old_index];
156 // Elements of different kinds are incompatible.
157 if (old_element->kind() != new_element->kind())
158 continue;
160 if (UnsafeDifference(old_element, new_element))
161 continue;
163 base::Time start_compare = base::Time::Now();
164 DifferenceEstimator::Base* old_base = bases[old_index];
165 size_t difference = difference_estimator.Measure(old_base, new_subject);
167 VLOG(1) << "Compare " << old_element->Name()
168 << " to " << new_element->Name()
169 << " --> " << difference
170 << " in " << (base::Time::Now() - start_compare).InSecondsF()
171 << "s";
172 if (difference == 0) {
173 VLOG(1) << "Skip " << new_element->Name()
174 << " - identical to " << old_element->Name();
175 best_difference = 0;
176 best_old_element = NULL;
177 break;
179 if (difference < best_difference) {
180 best_difference = difference;
181 best_old_element = old_element;
185 if (best_old_element) {
186 VLOG(1) << "Matched " << best_old_element->Name()
187 << " to " << new_element->Name()
188 << " --> " << best_difference;
189 TransformationPatchGenerator* generator =
190 MakeGenerator(best_old_element, new_element);
191 if (generator)
192 generators->push_back(generator);
196 VLOG(1) << "done FindGenerators found " << generators->size()
197 << " in " << (base::Time::Now() - start_find_time).InSecondsF()
198 << "s";
200 return C_OK;
203 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
204 for (size_t i = 0; i < generators->size(); ++i) {
205 delete (*generators)[i];
207 generators->clear();
210 ////////////////////////////////////////////////////////////////////////////////
212 Status GenerateEnsemblePatch(SourceStream* base,
213 SourceStream* update,
214 SinkStream* final_patch) {
215 VLOG(1) << "start GenerateEnsemblePatch";
216 base::Time start_time = base::Time::Now();
218 Region old_region(base->Buffer(), base->Remaining());
219 Region new_region(update->Buffer(), update->Remaining());
220 Ensemble old_ensemble(old_region, "old");
221 Ensemble new_ensemble(new_region, "new");
222 std::vector<TransformationPatchGenerator*> generators;
223 Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
224 &generators);
225 if (generators_status != C_OK)
226 return generators_status;
228 SinkStreamSet patch_streams;
230 SinkStream* tranformation_descriptions = patch_streams.stream(0);
231 SinkStream* parameter_correction = patch_streams.stream(1);
232 SinkStream* transformed_elements_correction = patch_streams.stream(2);
233 SinkStream* ensemble_correction = patch_streams.stream(3);
235 size_t number_of_transformations = generators.size();
236 tranformation_descriptions->WriteSizeVarint32(number_of_transformations);
238 for (size_t i = 0; i < number_of_transformations; ++i) {
239 CourgettePatchFile::TransformationMethodId kind = generators[i]->Kind();
240 tranformation_descriptions->WriteVarint32(kind);
243 for (size_t i = 0; i < number_of_transformations; ++i) {
244 Status status =
245 generators[i]->WriteInitialParameters(tranformation_descriptions);
246 if (status != C_OK)
247 return status;
251 // Generate sub-patch for parameters.
253 SinkStreamSet predicted_parameters_sink;
254 SinkStreamSet corrected_parameters_sink;
256 for (size_t i = 0; i < number_of_transformations; ++i) {
257 SinkStreamSet single_predicted_parameters;
258 Status status;
259 status = generators[i]->PredictTransformParameters(
260 &single_predicted_parameters);
261 if (status != C_OK)
262 return status;
263 if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
264 return C_STREAM_ERROR;
266 SinkStreamSet single_corrected_parameters;
267 status = generators[i]->CorrectedTransformParameters(
268 &single_corrected_parameters);
269 if (status != C_OK)
270 return status;
271 if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
272 return C_STREAM_ERROR;
275 SinkStream linearized_predicted_parameters;
276 SinkStream linearized_corrected_parameters;
278 if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
279 return C_STREAM_ERROR;
280 if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
281 return C_STREAM_ERROR;
283 SourceStream predicted_parameters_source;
284 SourceStream corrected_parameters_source;
285 predicted_parameters_source.Init(linearized_predicted_parameters);
286 corrected_parameters_source.Init(linearized_corrected_parameters);
288 Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
289 &corrected_parameters_source,
290 parameter_correction);
291 if (delta1_status != C_OK)
292 return delta1_status;
295 // Generate sub-patch for elements.
297 corrected_parameters_source.Init(linearized_corrected_parameters);
298 SourceStreamSet corrected_parameters_source_set;
299 if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
300 return C_STREAM_ERROR;
302 SinkStreamSet predicted_transformed_elements;
303 SinkStreamSet corrected_transformed_elements;
305 for (size_t i = 0; i < number_of_transformations; ++i) {
306 SourceStreamSet single_parameters;
307 if (!corrected_parameters_source_set.ReadSet(&single_parameters))
308 return C_STREAM_ERROR;
309 SinkStreamSet single_predicted_transformed_element;
310 SinkStreamSet single_corrected_transformed_element;
311 Status status = generators[i]->Transform(
312 &single_parameters,
313 &single_predicted_transformed_element,
314 &single_corrected_transformed_element);
315 if (status != C_OK)
316 return status;
317 if (!single_parameters.Empty())
318 return C_STREAM_NOT_CONSUMED;
319 if (!predicted_transformed_elements.WriteSet(
320 &single_predicted_transformed_element))
321 return C_STREAM_ERROR;
322 if (!corrected_transformed_elements.WriteSet(
323 &single_corrected_transformed_element))
324 return C_STREAM_ERROR;
327 if (!corrected_parameters_source_set.Empty())
328 return C_STREAM_NOT_CONSUMED;
330 SinkStream linearized_predicted_transformed_elements;
331 SinkStream linearized_corrected_transformed_elements;
333 if (!predicted_transformed_elements.CopyTo(
334 &linearized_predicted_transformed_elements))
335 return C_STREAM_ERROR;
336 if (!corrected_transformed_elements.CopyTo(
337 &linearized_corrected_transformed_elements))
338 return C_STREAM_ERROR;
340 SourceStream predicted_transformed_elements_source;
341 SourceStream corrected_transformed_elements_source;
342 predicted_transformed_elements_source
343 .Init(linearized_predicted_transformed_elements);
344 corrected_transformed_elements_source
345 .Init(linearized_corrected_transformed_elements);
347 Status delta2_status =
348 GenerateSimpleDelta(&predicted_transformed_elements_source,
349 &corrected_transformed_elements_source,
350 transformed_elements_correction);
351 if (delta2_status != C_OK)
352 return delta2_status;
355 // Generate sub-patch for whole enchilada.
357 SinkStream predicted_ensemble;
359 predicted_ensemble.Write(base->Buffer(), base->Remaining());
361 SourceStreamSet corrected_transformed_elements_source_set;
362 corrected_transformed_elements_source
363 .Init(linearized_corrected_transformed_elements);
364 if (!corrected_transformed_elements_source_set
365 .Init(&corrected_transformed_elements_source))
366 return C_STREAM_ERROR;
368 for (size_t i = 0; i < number_of_transformations; ++i) {
369 SourceStreamSet single_corrected_transformed_element;
370 if (!corrected_transformed_elements_source_set.ReadSet(
371 &single_corrected_transformed_element))
372 return C_STREAM_ERROR;
373 Status status = generators[i]->Reform(&single_corrected_transformed_element,
374 &predicted_ensemble);
375 if (status != C_OK)
376 return status;
377 if (!single_corrected_transformed_element.Empty())
378 return C_STREAM_NOT_CONSUMED;
381 if (!corrected_transformed_elements_source_set.Empty())
382 return C_STREAM_NOT_CONSUMED;
384 FreeGenerators(&generators);
386 SourceStream predicted_ensemble_source;
387 predicted_ensemble_source.Init(predicted_ensemble);
388 Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
389 update,
390 ensemble_correction);
391 if (delta3_status != C_OK)
392 return delta3_status;
395 // Final output stream has a header followed by a StreamSet.
397 final_patch->WriteVarint32(CourgettePatchFile::kMagic);
398 final_patch->WriteVarint32(CourgettePatchFile::kVersion);
400 final_patch->WriteVarint32(
401 CalculateCrc(old_region.start(), old_region.length()));
402 final_patch->WriteVarint32(
403 CalculateCrc(new_region.start(), new_region.length()));
405 if (!patch_streams.CopyTo(final_patch))
406 return C_STREAM_ERROR;
408 VLOG(1) << "done GenerateEnsemblePatch "
409 << (base::Time::Now() - start_time).InSecondsF() << "s";
411 return C_OK;
414 } // namespace