Update V8 to version 4.7.21.
[chromium-blink-merge.git] / courgette / ensemble_create.cc
bloba0c2e0bc2a037f2fa923d4cc108bde770af7e985
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The main idea in Courgette is to do patching *under a tranformation*. The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
8 //
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
12 // This file contains the code to create the patch.
15 #include "courgette/ensemble.h"
17 #include <limits>
18 #include <vector>
20 #include "base/basictypes.h"
21 #include "base/logging.h"
22 #include "base/time/time.h"
24 #include "courgette/courgette_config.h"
25 #include "courgette/crc.h"
26 #include "courgette/difference_estimator.h"
27 #include "courgette/region.h"
28 #include "courgette/simple_delta.h"
29 #include "courgette/streams.h"
30 #include "courgette/third_party/bsdiff.h"
32 #include "courgette/patcher_x86_32.h"
33 #include "courgette/patch_generator_x86_32.h"
35 namespace courgette {
37 TransformationPatchGenerator::TransformationPatchGenerator(
38 Element* old_element,
39 Element* new_element,
40 TransformationPatcher* patcher)
41 : old_element_(old_element),
42 new_element_(new_element),
43 patcher_(patcher) {
46 TransformationPatchGenerator::~TransformationPatchGenerator() {
47 delete patcher_;
50 // The default implementation of PredictTransformParameters delegates to the
51 // patcher.
52 Status TransformationPatchGenerator::PredictTransformParameters(
53 SinkStreamSet* prediction) {
54 return patcher_->PredictTransformParameters(prediction);
57 // The default implementation of Reform delegates to the patcher.
58 Status TransformationPatchGenerator::Reform(
59 SourceStreamSet* transformed_element,
60 SinkStream* reformed_element) {
61 return patcher_->Reform(transformed_element, reformed_element);
64 // Makes a TransformationPatchGenerator of the appropriate variety for the
65 // Element kind.
66 TransformationPatchGenerator* MakeGenerator(Element* old_element,
67 Element* new_element) {
68 switch (new_element->kind()) {
69 case EXE_UNKNOWN:
70 break;
71 case EXE_WIN_32_X86: {
72 TransformationPatchGenerator* generator =
73 new PatchGeneratorX86_32(
74 old_element,
75 new_element,
76 new PatcherX86_32(old_element->region()),
77 EXE_WIN_32_X86);
78 return generator;
80 case EXE_ELF_32_X86: {
81 TransformationPatchGenerator* generator =
82 new PatchGeneratorX86_32(
83 old_element,
84 new_element,
85 new PatcherX86_32(old_element->region()),
86 EXE_ELF_32_X86);
87 return generator;
89 case EXE_ELF_32_ARM: {
90 TransformationPatchGenerator* generator =
91 new PatchGeneratorX86_32(
92 old_element,
93 new_element,
94 new PatcherX86_32(old_element->region()),
95 EXE_ELF_32_ARM);
96 return generator;
98 case EXE_WIN_32_X64: {
99 TransformationPatchGenerator* generator =
100 new PatchGeneratorX86_32(
101 old_element,
102 new_element,
103 new PatcherX86_32(old_element->region()),
104 EXE_WIN_32_X64);
105 return generator;
109 LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
110 return NULL;
113 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element
114 // from 'old' is matched as the closest element to multiple elements from 'new'.
115 // Each time this happens, the old element is transformed and serialized. This
116 // is a problem when the old element is huge compared with the new element
117 // because the mutliple serialized copies can be much bigger than the size of
118 // either ensemble.
120 // The right way to avoid this is to ensure any one element from 'old' is
121 // serialized once, which requires matching code in the patch application.
123 // This is a quick hack to avoid the problem by prohibiting a big difference in
124 // size between matching elements.
125 bool UnsafeDifference(Element* old_element, Element* new_element) {
126 double kMaxBloat = 2.0;
127 size_t kMinWorrysomeDifference = 2 << 20; // 2MB
128 size_t old_size = old_element->region().length();
129 size_t new_size = new_element->region().length();
130 size_t low_size = std::min(old_size, new_size);
131 size_t high_size = std::max(old_size, new_size);
132 if (high_size - low_size < kMinWorrysomeDifference) return false;
133 if (high_size < low_size * kMaxBloat) return false;
134 return true;
137 // FindGenerators finds TransformationPatchGenerators for the elements of
138 // |new_ensemble|. For each element of |new_ensemble| we find the closest
139 // matching element from |old_ensemble| and use that as the basis for
140 // differential compression. The elements have to be the same kind so as to
141 // support transformation into the same kind of 'new representation'.
143 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
144 std::vector<TransformationPatchGenerator*>* generators) {
145 base::Time start_find_time = base::Time::Now();
146 old_ensemble->FindEmbeddedElements();
147 new_ensemble->FindEmbeddedElements();
148 VLOG(1) << "done FindEmbeddedElements "
149 << (base::Time::Now() - start_find_time).InSecondsF();
151 std::vector<Element*> old_elements(old_ensemble->elements());
152 std::vector<Element*> new_elements(new_ensemble->elements());
154 VLOG(1) << "old has " << old_elements.size() << " elements";
155 VLOG(1) << "new has " << new_elements.size() << " elements";
157 DifferenceEstimator difference_estimator;
158 std::vector<DifferenceEstimator::Base*> bases;
160 base::Time start_bases_time = base::Time::Now();
161 for (size_t i = 0; i < old_elements.size(); ++i) {
162 bases.push_back(
163 difference_estimator.MakeBase(old_elements[i]->region()));
165 VLOG(1) << "done make bases "
166 << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
168 for (size_t new_index = 0; new_index < new_elements.size(); ++new_index) {
169 Element* new_element = new_elements[new_index];
170 DifferenceEstimator::Subject* new_subject =
171 difference_estimator.MakeSubject(new_element->region());
173 // Search through old elements to find the best match.
175 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
176 // new_ensemble probably have a very similar structure. We can make the
177 // search faster by making the comparison provided by DifferenceEstimator
178 // more nuanced, returning early if the measured difference is greater than
179 // the current best. This will be most effective if we can arrange that the
180 // first elements we try to match are likely the 'right' ones. We could
181 // prioritize elements that are of a similar size or similar position in the
182 // sequence of elements.
184 Element* best_old_element = NULL;
185 size_t best_difference = std::numeric_limits<size_t>::max();
186 for (size_t old_index = 0; old_index < old_elements.size(); ++old_index) {
187 Element* old_element = old_elements[old_index];
188 // Elements of different kinds are incompatible.
189 if (old_element->kind() != new_element->kind())
190 continue;
192 if (UnsafeDifference(old_element, new_element))
193 continue;
195 base::Time start_compare = base::Time::Now();
196 DifferenceEstimator::Base* old_base = bases[old_index];
197 size_t difference = difference_estimator.Measure(old_base, new_subject);
199 VLOG(1) << "Compare " << old_element->Name()
200 << " to " << new_element->Name()
201 << " --> " << difference
202 << " in " << (base::Time::Now() - start_compare).InSecondsF()
203 << "s";
204 if (difference == 0) {
205 VLOG(1) << "Skip " << new_element->Name()
206 << " - identical to " << old_element->Name();
207 best_difference = 0;
208 best_old_element = NULL;
209 break;
211 if (difference < best_difference) {
212 best_difference = difference;
213 best_old_element = old_element;
217 if (best_old_element) {
218 VLOG(1) << "Matched " << best_old_element->Name()
219 << " to " << new_element->Name()
220 << " --> " << best_difference;
221 TransformationPatchGenerator* generator =
222 MakeGenerator(best_old_element, new_element);
223 if (generator)
224 generators->push_back(generator);
228 VLOG(1) << "done FindGenerators found " << generators->size()
229 << " in " << (base::Time::Now() - start_find_time).InSecondsF()
230 << "s";
232 return C_OK;
235 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
236 for (size_t i = 0; i < generators->size(); ++i) {
237 delete (*generators)[i];
239 generators->clear();
242 ////////////////////////////////////////////////////////////////////////////////
244 Status GenerateEnsemblePatch(SourceStream* base,
245 SourceStream* update,
246 SinkStream* final_patch) {
247 VLOG(1) << "start GenerateEnsemblePatch";
248 base::Time start_time = base::Time::Now();
250 Region old_region(base->Buffer(), base->Remaining());
251 Region new_region(update->Buffer(), update->Remaining());
252 Ensemble old_ensemble(old_region, "old");
253 Ensemble new_ensemble(new_region, "new");
254 std::vector<TransformationPatchGenerator*> generators;
255 Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
256 &generators);
257 if (generators_status != C_OK)
258 return generators_status;
260 SinkStreamSet patch_streams;
262 SinkStream* tranformation_descriptions = patch_streams.stream(0);
263 SinkStream* parameter_correction = patch_streams.stream(1);
264 SinkStream* transformed_elements_correction = patch_streams.stream(2);
265 SinkStream* ensemble_correction = patch_streams.stream(3);
267 size_t number_of_transformations = generators.size();
268 if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
269 return C_STREAM_ERROR;
271 for (size_t i = 0; i < number_of_transformations; ++i) {
272 ExecutableType kind = generators[i]->Kind();
273 if (!tranformation_descriptions->WriteVarint32(kind))
274 return C_STREAM_ERROR;
277 for (size_t i = 0; i < number_of_transformations; ++i) {
278 Status status =
279 generators[i]->WriteInitialParameters(tranformation_descriptions);
280 if (status != C_OK)
281 return status;
285 // Generate sub-patch for parameters.
287 SinkStreamSet predicted_parameters_sink;
288 SinkStreamSet corrected_parameters_sink;
290 for (size_t i = 0; i < number_of_transformations; ++i) {
291 SinkStreamSet single_predicted_parameters;
292 Status status;
293 status = generators[i]->PredictTransformParameters(
294 &single_predicted_parameters);
295 if (status != C_OK)
296 return status;
297 if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
298 return C_STREAM_ERROR;
300 SinkStreamSet single_corrected_parameters;
301 status = generators[i]->CorrectedTransformParameters(
302 &single_corrected_parameters);
303 if (status != C_OK)
304 return status;
305 if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
306 return C_STREAM_ERROR;
309 SinkStream linearized_predicted_parameters;
310 SinkStream linearized_corrected_parameters;
312 if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
313 return C_STREAM_ERROR;
314 if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
315 return C_STREAM_ERROR;
317 SourceStream predicted_parameters_source;
318 SourceStream corrected_parameters_source;
319 predicted_parameters_source.Init(linearized_predicted_parameters);
320 corrected_parameters_source.Init(linearized_corrected_parameters);
322 Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
323 &corrected_parameters_source,
324 parameter_correction);
325 if (delta1_status != C_OK)
326 return delta1_status;
329 // Generate sub-patch for elements.
331 corrected_parameters_source.Init(linearized_corrected_parameters);
332 SourceStreamSet corrected_parameters_source_set;
333 if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
334 return C_STREAM_ERROR;
336 SinkStreamSet predicted_transformed_elements;
337 SinkStreamSet corrected_transformed_elements;
339 for (size_t i = 0; i < number_of_transformations; ++i) {
340 SourceStreamSet single_parameters;
341 if (!corrected_parameters_source_set.ReadSet(&single_parameters))
342 return C_STREAM_ERROR;
343 SinkStreamSet single_predicted_transformed_element;
344 SinkStreamSet single_corrected_transformed_element;
345 Status status = generators[i]->Transform(
346 &single_parameters,
347 &single_predicted_transformed_element,
348 &single_corrected_transformed_element);
349 if (status != C_OK)
350 return status;
351 if (!single_parameters.Empty())
352 return C_STREAM_NOT_CONSUMED;
353 if (!predicted_transformed_elements.WriteSet(
354 &single_predicted_transformed_element))
355 return C_STREAM_ERROR;
356 if (!corrected_transformed_elements.WriteSet(
357 &single_corrected_transformed_element))
358 return C_STREAM_ERROR;
361 if (!corrected_parameters_source_set.Empty())
362 return C_STREAM_NOT_CONSUMED;
364 SinkStream linearized_predicted_transformed_elements;
365 SinkStream linearized_corrected_transformed_elements;
367 if (!predicted_transformed_elements.CopyTo(
368 &linearized_predicted_transformed_elements))
369 return C_STREAM_ERROR;
370 if (!corrected_transformed_elements.CopyTo(
371 &linearized_corrected_transformed_elements))
372 return C_STREAM_ERROR;
374 SourceStream predicted_transformed_elements_source;
375 SourceStream corrected_transformed_elements_source;
376 predicted_transformed_elements_source
377 .Init(linearized_predicted_transformed_elements);
378 corrected_transformed_elements_source
379 .Init(linearized_corrected_transformed_elements);
381 Status delta2_status =
382 GenerateSimpleDelta(&predicted_transformed_elements_source,
383 &corrected_transformed_elements_source,
384 transformed_elements_correction);
385 if (delta2_status != C_OK)
386 return delta2_status;
388 // Last use, free storage.
389 linearized_predicted_transformed_elements.Retire();
392 // Generate sub-patch for whole enchilada.
394 SinkStream predicted_ensemble;
396 if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
397 return C_STREAM_ERROR;
399 SourceStreamSet corrected_transformed_elements_source_set;
400 corrected_transformed_elements_source
401 .Init(linearized_corrected_transformed_elements);
402 if (!corrected_transformed_elements_source_set
403 .Init(&corrected_transformed_elements_source))
404 return C_STREAM_ERROR;
406 for (size_t i = 0; i < number_of_transformations; ++i) {
407 SourceStreamSet single_corrected_transformed_element;
408 if (!corrected_transformed_elements_source_set.ReadSet(
409 &single_corrected_transformed_element))
410 return C_STREAM_ERROR;
411 Status status = generators[i]->Reform(&single_corrected_transformed_element,
412 &predicted_ensemble);
413 if (status != C_OK)
414 return status;
415 if (!single_corrected_transformed_element.Empty())
416 return C_STREAM_NOT_CONSUMED;
419 if (!corrected_transformed_elements_source_set.Empty())
420 return C_STREAM_NOT_CONSUMED;
422 // No more references to this stream's buffer.
423 linearized_corrected_transformed_elements.Retire();
425 FreeGenerators(&generators);
427 size_t final_patch_input_size = predicted_ensemble.Length();
428 SourceStream predicted_ensemble_source;
429 predicted_ensemble_source.Init(predicted_ensemble);
430 Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
431 update,
432 ensemble_correction);
433 if (delta3_status != C_OK)
434 return delta3_status;
437 // Final output stream has a header followed by a StreamSet.
439 uint32 ensemble_version =
440 CourgetteConfig::GetInstance()->ensemble_version();
441 if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
442 !final_patch->WriteVarint32(ensemble_version) ||
443 !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
444 old_region.length())) ||
445 !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
446 new_region.length())) ||
447 !final_patch->WriteSizeVarint32(final_patch_input_size) ||
448 !patch_streams.CopyTo(final_patch)) {
449 return C_STREAM_ERROR;
452 VLOG(1) << "done GenerateEnsemblePatch "
453 << (base::Time::Now() - start_time).InSecondsF() << "s";
455 return C_OK;
458 } // namespace courgette