Separate Simple Backend creation from initialization.
[chromium-blink-merge.git] / courgette / ensemble_create.cc
blobc098710fe27aabcf0a3fafa2b77f63679b58ff66
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The main idea in Courgette is to do patching *under a tranformation*. The
6 // input is transformed into a new representation, patching occurs in the new
7 // repesentation, and then the tranform is reversed to get the patched data.
8 //
9 // The idea is applied to pieces (or 'elements') of the whole (or 'ensemble').
10 // Each of the elements has to go through the same set of steps in lock-step.
12 // This file contains the code to create the patch.
15 #include "courgette/ensemble.h"
17 #include <vector>
18 #include <limits>
20 #include "base/basictypes.h"
21 #include "base/logging.h"
22 #include "base/time.h"
24 #include "courgette/third_party/bsdiff.h"
25 #include "courgette/crc.h"
26 #include "courgette/difference_estimator.h"
27 #include "courgette/streams.h"
28 #include "courgette/region.h"
29 #include "courgette/simple_delta.h"
31 #include "courgette/patcher_x86_32.h"
32 #include "courgette/patch_generator_x86_32.h"
34 namespace courgette {
36 TransformationPatchGenerator::TransformationPatchGenerator(
37 Element* old_element,
38 Element* new_element,
39 TransformationPatcher* patcher)
40 : old_element_(old_element),
41 new_element_(new_element),
42 patcher_(patcher) {
45 TransformationPatchGenerator::~TransformationPatchGenerator() {
46 delete patcher_;
49 // The default implementation of PredictTransformParameters delegates to the
50 // patcher.
51 Status TransformationPatchGenerator::PredictTransformParameters(
52 SinkStreamSet* prediction) {
53 return patcher_->PredictTransformParameters(prediction);
56 // The default implementation of Reform delegates to the patcher.
57 Status TransformationPatchGenerator::Reform(
58 SourceStreamSet* transformed_element,
59 SinkStream* reformed_element) {
60 return patcher_->Reform(transformed_element, reformed_element);
63 // Makes a TransformationPatchGenerator of the appropriate variety for the
64 // Element kind.
65 TransformationPatchGenerator* MakeGenerator(Element* old_element,
66 Element* new_element) {
67 switch (new_element->kind()) {
68 case EXE_UNKNOWN:
69 break;
70 case EXE_WIN_32_X86: {
71 TransformationPatchGenerator* generator =
72 new PatchGeneratorX86_32(
73 old_element,
74 new_element,
75 new PatcherX86_32(old_element->region()),
76 EXE_WIN_32_X86);
77 return generator;
79 case EXE_ELF_32_X86: {
80 TransformationPatchGenerator* generator =
81 new PatchGeneratorX86_32(
82 old_element,
83 new_element,
84 new PatcherX86_32(old_element->region()),
85 EXE_ELF_32_X86);
86 return generator;
90 LOG(WARNING) << "Unexpected Element::Kind " << old_element->kind();
91 return NULL;
94 // Checks to see if the proposed comparison is 'unsafe'. Sometimes one element
95 // from 'old' is matched as the closest element to multiple elements from 'new'.
96 // Each time this happens, the old element is transformed and serialized. This
97 // is a problem when the old element is huge compared with the new element
98 // because the mutliple serialized copies can be much bigger than the size of
99 // either ensemble.
101 // The right way to avoid this is to ensure any one element from 'old' is
102 // serialized once, which requires matching code in the patch application.
104 // This is a quick hack to avoid the problem by prohibiting a big difference in
105 // size between matching elements.
106 bool UnsafeDifference(Element* old_element, Element* new_element) {
107 double kMaxBloat = 2.0;
108 size_t kMinWorrysomeDifference = 2 << 20; // 2MB
109 size_t old_size = old_element->region().length();
110 size_t new_size = new_element->region().length();
111 size_t low_size = std::min(old_size, new_size);
112 size_t high_size = std::max(old_size, new_size);
113 if (high_size - low_size < kMinWorrysomeDifference) return false;
114 if (high_size < low_size * kMaxBloat) return false;
115 return true;
118 // FindGenerators finds TransformationPatchGenerators for the elements of
119 // |new_ensemble|. For each element of |new_ensemble| we find the closest
120 // matching element from |old_ensemble| and use that as the basis for
121 // differential compression. The elements have to be the same kind so as to
122 // support transformation into the same kind of 'new representation'.
124 Status FindGenerators(Ensemble* old_ensemble, Ensemble* new_ensemble,
125 std::vector<TransformationPatchGenerator*>* generators) {
126 base::Time start_find_time = base::Time::Now();
127 old_ensemble->FindEmbeddedElements();
128 new_ensemble->FindEmbeddedElements();
129 VLOG(1) << "done FindEmbeddedElements "
130 << (base::Time::Now() - start_find_time).InSecondsF();
132 std::vector<Element*> old_elements(old_ensemble->elements());
133 std::vector<Element*> new_elements(new_ensemble->elements());
135 VLOG(1) << "old has " << old_elements.size() << " elements";
136 VLOG(1) << "new has " << new_elements.size() << " elements";
138 DifferenceEstimator difference_estimator;
139 std::vector<DifferenceEstimator::Base*> bases;
141 base::Time start_bases_time = base::Time::Now();
142 for (size_t i = 0; i < old_elements.size(); ++i) {
143 bases.push_back(
144 difference_estimator.MakeBase(old_elements[i]->region()));
146 VLOG(1) << "done make bases "
147 << (base::Time::Now() - start_bases_time).InSecondsF() << "s";
149 for (size_t new_index = 0; new_index < new_elements.size(); ++new_index) {
150 Element* new_element = new_elements[new_index];
151 DifferenceEstimator::Subject* new_subject =
152 difference_estimator.MakeSubject(new_element->region());
154 // Search through old elements to find the best match.
156 // TODO(sra): This is O(N x M), i.e. O(N^2) since old_ensemble and
157 // new_ensemble probably have a very similar structure. We can make the
158 // search faster by making the comparison provided by DifferenceEstimator
159 // more nuanced, returning early if the measured difference is greater than
160 // the current best. This will be most effective if we can arrange that the
161 // first elements we try to match are likely the 'right' ones. We could
162 // prioritize elements that are of a similar size or similar position in the
163 // sequence of elements.
165 Element* best_old_element = NULL;
166 size_t best_difference = std::numeric_limits<size_t>::max();
167 for (size_t old_index = 0; old_index < old_elements.size(); ++old_index) {
168 Element* old_element = old_elements[old_index];
169 // Elements of different kinds are incompatible.
170 if (old_element->kind() != new_element->kind())
171 continue;
173 if (UnsafeDifference(old_element, new_element))
174 continue;
176 base::Time start_compare = base::Time::Now();
177 DifferenceEstimator::Base* old_base = bases[old_index];
178 size_t difference = difference_estimator.Measure(old_base, new_subject);
180 VLOG(1) << "Compare " << old_element->Name()
181 << " to " << new_element->Name()
182 << " --> " << difference
183 << " in " << (base::Time::Now() - start_compare).InSecondsF()
184 << "s";
185 if (difference == 0) {
186 VLOG(1) << "Skip " << new_element->Name()
187 << " - identical to " << old_element->Name();
188 best_difference = 0;
189 best_old_element = NULL;
190 break;
192 if (difference < best_difference) {
193 best_difference = difference;
194 best_old_element = old_element;
198 if (best_old_element) {
199 VLOG(1) << "Matched " << best_old_element->Name()
200 << " to " << new_element->Name()
201 << " --> " << best_difference;
202 TransformationPatchGenerator* generator =
203 MakeGenerator(best_old_element, new_element);
204 if (generator)
205 generators->push_back(generator);
209 VLOG(1) << "done FindGenerators found " << generators->size()
210 << " in " << (base::Time::Now() - start_find_time).InSecondsF()
211 << "s";
213 return C_OK;
216 void FreeGenerators(std::vector<TransformationPatchGenerator*>* generators) {
217 for (size_t i = 0; i < generators->size(); ++i) {
218 delete (*generators)[i];
220 generators->clear();
223 ////////////////////////////////////////////////////////////////////////////////
225 Status GenerateEnsemblePatch(SourceStream* base,
226 SourceStream* update,
227 SinkStream* final_patch) {
228 VLOG(1) << "start GenerateEnsemblePatch";
229 base::Time start_time = base::Time::Now();
231 Region old_region(base->Buffer(), base->Remaining());
232 Region new_region(update->Buffer(), update->Remaining());
233 Ensemble old_ensemble(old_region, "old");
234 Ensemble new_ensemble(new_region, "new");
235 std::vector<TransformationPatchGenerator*> generators;
236 Status generators_status = FindGenerators(&old_ensemble, &new_ensemble,
237 &generators);
238 if (generators_status != C_OK)
239 return generators_status;
241 SinkStreamSet patch_streams;
243 SinkStream* tranformation_descriptions = patch_streams.stream(0);
244 SinkStream* parameter_correction = patch_streams.stream(1);
245 SinkStream* transformed_elements_correction = patch_streams.stream(2);
246 SinkStream* ensemble_correction = patch_streams.stream(3);
248 size_t number_of_transformations = generators.size();
249 if (!tranformation_descriptions->WriteSizeVarint32(number_of_transformations))
250 return C_STREAM_ERROR;
252 for (size_t i = 0; i < number_of_transformations; ++i) {
253 ExecutableType kind = generators[i]->Kind();
254 if (!tranformation_descriptions->WriteVarint32(kind))
255 return C_STREAM_ERROR;
258 for (size_t i = 0; i < number_of_transformations; ++i) {
259 Status status =
260 generators[i]->WriteInitialParameters(tranformation_descriptions);
261 if (status != C_OK)
262 return status;
266 // Generate sub-patch for parameters.
268 SinkStreamSet predicted_parameters_sink;
269 SinkStreamSet corrected_parameters_sink;
271 for (size_t i = 0; i < number_of_transformations; ++i) {
272 SinkStreamSet single_predicted_parameters;
273 Status status;
274 status = generators[i]->PredictTransformParameters(
275 &single_predicted_parameters);
276 if (status != C_OK)
277 return status;
278 if (!predicted_parameters_sink.WriteSet(&single_predicted_parameters))
279 return C_STREAM_ERROR;
281 SinkStreamSet single_corrected_parameters;
282 status = generators[i]->CorrectedTransformParameters(
283 &single_corrected_parameters);
284 if (status != C_OK)
285 return status;
286 if (!corrected_parameters_sink.WriteSet(&single_corrected_parameters))
287 return C_STREAM_ERROR;
290 SinkStream linearized_predicted_parameters;
291 SinkStream linearized_corrected_parameters;
293 if (!predicted_parameters_sink.CopyTo(&linearized_predicted_parameters))
294 return C_STREAM_ERROR;
295 if (!corrected_parameters_sink.CopyTo(&linearized_corrected_parameters))
296 return C_STREAM_ERROR;
298 SourceStream predicted_parameters_source;
299 SourceStream corrected_parameters_source;
300 predicted_parameters_source.Init(linearized_predicted_parameters);
301 corrected_parameters_source.Init(linearized_corrected_parameters);
303 Status delta1_status = GenerateSimpleDelta(&predicted_parameters_source,
304 &corrected_parameters_source,
305 parameter_correction);
306 if (delta1_status != C_OK)
307 return delta1_status;
310 // Generate sub-patch for elements.
312 corrected_parameters_source.Init(linearized_corrected_parameters);
313 SourceStreamSet corrected_parameters_source_set;
314 if (!corrected_parameters_source_set.Init(&corrected_parameters_source))
315 return C_STREAM_ERROR;
317 SinkStreamSet predicted_transformed_elements;
318 SinkStreamSet corrected_transformed_elements;
320 for (size_t i = 0; i < number_of_transformations; ++i) {
321 SourceStreamSet single_parameters;
322 if (!corrected_parameters_source_set.ReadSet(&single_parameters))
323 return C_STREAM_ERROR;
324 SinkStreamSet single_predicted_transformed_element;
325 SinkStreamSet single_corrected_transformed_element;
326 Status status = generators[i]->Transform(
327 &single_parameters,
328 &single_predicted_transformed_element,
329 &single_corrected_transformed_element);
330 if (status != C_OK)
331 return status;
332 if (!single_parameters.Empty())
333 return C_STREAM_NOT_CONSUMED;
334 if (!predicted_transformed_elements.WriteSet(
335 &single_predicted_transformed_element))
336 return C_STREAM_ERROR;
337 if (!corrected_transformed_elements.WriteSet(
338 &single_corrected_transformed_element))
339 return C_STREAM_ERROR;
342 if (!corrected_parameters_source_set.Empty())
343 return C_STREAM_NOT_CONSUMED;
345 SinkStream linearized_predicted_transformed_elements;
346 SinkStream linearized_corrected_transformed_elements;
348 if (!predicted_transformed_elements.CopyTo(
349 &linearized_predicted_transformed_elements))
350 return C_STREAM_ERROR;
351 if (!corrected_transformed_elements.CopyTo(
352 &linearized_corrected_transformed_elements))
353 return C_STREAM_ERROR;
355 SourceStream predicted_transformed_elements_source;
356 SourceStream corrected_transformed_elements_source;
357 predicted_transformed_elements_source
358 .Init(linearized_predicted_transformed_elements);
359 corrected_transformed_elements_source
360 .Init(linearized_corrected_transformed_elements);
362 Status delta2_status =
363 GenerateSimpleDelta(&predicted_transformed_elements_source,
364 &corrected_transformed_elements_source,
365 transformed_elements_correction);
366 if (delta2_status != C_OK)
367 return delta2_status;
369 // Last use, free storage.
370 linearized_predicted_transformed_elements.Retire();
373 // Generate sub-patch for whole enchilada.
375 SinkStream predicted_ensemble;
377 if (!predicted_ensemble.Write(base->Buffer(), base->Remaining()))
378 return C_STREAM_ERROR;
380 SourceStreamSet corrected_transformed_elements_source_set;
381 corrected_transformed_elements_source
382 .Init(linearized_corrected_transformed_elements);
383 if (!corrected_transformed_elements_source_set
384 .Init(&corrected_transformed_elements_source))
385 return C_STREAM_ERROR;
387 for (size_t i = 0; i < number_of_transformations; ++i) {
388 SourceStreamSet single_corrected_transformed_element;
389 if (!corrected_transformed_elements_source_set.ReadSet(
390 &single_corrected_transformed_element))
391 return C_STREAM_ERROR;
392 Status status = generators[i]->Reform(&single_corrected_transformed_element,
393 &predicted_ensemble);
394 if (status != C_OK)
395 return status;
396 if (!single_corrected_transformed_element.Empty())
397 return C_STREAM_NOT_CONSUMED;
400 if (!corrected_transformed_elements_source_set.Empty())
401 return C_STREAM_NOT_CONSUMED;
403 // No more references to this stream's buffer.
404 linearized_corrected_transformed_elements.Retire();
406 FreeGenerators(&generators);
408 size_t final_patch_input_size = predicted_ensemble.Length();
409 SourceStream predicted_ensemble_source;
410 predicted_ensemble_source.Init(predicted_ensemble);
411 Status delta3_status = GenerateSimpleDelta(&predicted_ensemble_source,
412 update,
413 ensemble_correction);
414 if (delta3_status != C_OK)
415 return delta3_status;
418 // Final output stream has a header followed by a StreamSet.
420 if (!final_patch->WriteVarint32(CourgettePatchFile::kMagic) ||
421 !final_patch->WriteVarint32(CourgettePatchFile::kVersion) ||
422 !final_patch->WriteVarint32(CalculateCrc(old_region.start(),
423 old_region.length())) ||
424 !final_patch->WriteVarint32(CalculateCrc(new_region.start(),
425 new_region.length())) ||
426 !final_patch->WriteSizeVarint32(final_patch_input_size) ||
427 !patch_streams.CopyTo(final_patch)) {
428 return C_STREAM_ERROR;
431 VLOG(1) << "done GenerateEnsemblePatch "
432 << (base::Time::Now() - start_time).InSecondsF() << "s";
434 return C_OK;
437 } // namespace