src/liblzma/common/block_buffer_encoder.c

   1 // SPDX-License-Identifier: 0BSD
   2
   3 ///////////////////////////////////////////////////////////////////////////////
   4 //
   5 /// \file       block_buffer_encoder.c
   6 /// \brief      Single-call .xz Block encoder
   7 //
   8 //  Author:     Lasse Collin
   9 //
  10 ///////////////////////////////////////////////////////////////////////////////
  11
  12 #include "block_buffer_encoder.h"
  13 #include "block_encoder.h"
  14 #include "filter_encoder.h"
  15 #include "lzma2_encoder.h"
  16 #include "check.h"
  17
  18
  19 /// Estimate the maximum size of the Block Header and Check fields for
  20 /// a Block that uses LZMA2 uncompressed chunks. We could use
  21 /// lzma_block_header_size() but this is simpler.
  22 ///
  23 /// Block Header Size + Block Flags + Compressed Size
  24 /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check
  25 /// and round up to the next multiple of four to take Header Padding
  26 /// into account.
  27 #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
  28                 + LZMA_CHECK_SIZE_MAX + 3) & ~3)
  29
  30
  31 static uint64_t
  32 lzma2_bound(uint64_t uncompressed_size)
  33 {
  34         // Prevent integer overflow in overhead calculation.
  35         if (uncompressed_size > COMPRESSED_SIZE_MAX)
  36                 return 0;
  37
  38         // Calculate the exact overhead of the LZMA2 headers: Round
  39         // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
  40         // multiply by the size of per-chunk header, and add one byte for
  41         // the end marker.
  42         const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
  43                                 / LZMA2_CHUNK_MAX)
  44                         * LZMA2_HEADER_UNCOMPRESSED + 1;
  45
  46         // Catch the possible integer overflow.
  47         if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
  48                 return 0;
  49
  50         return uncompressed_size + overhead;
  51 }
  52
  53
  54 extern uint64_t
  55 lzma_block_buffer_bound64(uint64_t uncompressed_size)
  56 {
  57         // If the data doesn't compress, we always use uncompressed
  58         // LZMA2 chunks.
  59         uint64_t lzma2_size = lzma2_bound(uncompressed_size);
  60         if (lzma2_size == 0)
  61                 return 0;
  62
  63         // Take Block Padding into account.
  64         lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
  65
  66         // No risk of integer overflow because lzma2_bound() already takes
  67         // into account the size of the headers in the Block.
  68         return HEADERS_BOUND + lzma2_size;
  69 }
  70
  71
  72 extern LZMA_API(size_t)
  73 lzma_block_buffer_bound(size_t uncompressed_size)
  74 {
  75         uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
  76
  77 #if SIZE_MAX < UINT64_MAX
  78         // Catch the possible integer overflow on 32-bit systems.
  79         if (ret > SIZE_MAX)
  80                 return 0;
  81 #endif
  82
  83         return ret;
  84 }
  85
  86
  87 static lzma_ret
  88 block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
  89                 uint8_t *out, size_t *out_pos, size_t out_size)
  90 {
  91         // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
  92         // all, but LZMA2 always requires a dictionary, so use the minimum
  93         // value to minimize memory usage of the decoder.
  94         lzma_options_lzma lzma2 = {
  95                 .dict_size = LZMA_DICT_SIZE_MIN,
  96         };
  97
  98         lzma_filter filters[2];
  99         filters[0].id = LZMA_FILTER_LZMA2;
 100         filters[0].options = &lzma2;
 101         filters[1].id = LZMA_VLI_UNKNOWN;
 102
 103         // Set the above filter options to *block temporarily so that we can
 104         // encode the Block Header.
 105         lzma_filter *filters_orig = block->filters;
 106         block->filters = filters;
 107
 108         if (lzma_block_header_size(block) != LZMA_OK) {
 109                 block->filters = filters_orig;
 110                 return LZMA_PROG_ERROR;
 111         }
 112
 113         // Check that there's enough output space. The caller has already
 114         // set block->compressed_size to what lzma2_bound() has returned,
 115         // so we can reuse that value. We know that compressed_size is a
 116         // known valid VLI and header_size is a small value so their sum
 117         // will never overflow.
 118         assert(block->compressed_size == lzma2_bound(in_size));
 119         if (out_size - *out_pos
 120                         < block->header_size + block->compressed_size) {
 121                 block->filters = filters_orig;
 122                 return LZMA_BUF_ERROR;
 123         }
 124
 125         if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
 126                 block->filters = filters_orig;
 127                 return LZMA_PROG_ERROR;
 128         }
 129
 130         block->filters = filters_orig;
 131         *out_pos += block->header_size;
 132
 133         // Encode the data using LZMA2 uncompressed chunks.
 134         size_t in_pos = 0;
 135         uint8_t control = 0x01; // Dictionary reset
 136
 137         while (in_pos < in_size) {
 138                 // Control byte: Indicate uncompressed chunk, of which
 139                 // the first resets the dictionary.
 140                 out[(*out_pos)++] = control;
 141                 control = 0x02; // No dictionary reset
 142
 143                 // Size of the uncompressed chunk
 144                 const size_t copy_size
 145                                 = my_min(in_size - in_pos, LZMA2_CHUNK_MAX);
 146                 out[(*out_pos)++] = (copy_size - 1) >> 8;
 147                 out[(*out_pos)++] = (copy_size - 1) & 0xFF;
 148
 149                 // The actual data
 150                 assert(*out_pos + copy_size <= out_size);
 151                 memcpy(out + *out_pos, in + in_pos, copy_size);
 152
 153                 in_pos += copy_size;
 154                 *out_pos += copy_size;
 155         }
 156
 157         // End marker
 158         out[(*out_pos)++] = 0x00;
 159         assert(*out_pos <= out_size);
 160
 161         return LZMA_OK;
 162 }
 163
 164
 165 static lzma_ret
 166 block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
 167                 const uint8_t *in, size_t in_size,
 168                 uint8_t *out, size_t *out_pos, size_t out_size)
 169 {
 170         // Find out the size of the Block Header.
 171         return_if_error(lzma_block_header_size(block));
 172
 173         // Reserve space for the Block Header and skip it for now.
 174         if (out_size - *out_pos <= block->header_size)
 175                 return LZMA_BUF_ERROR;
 176
 177         const size_t out_start = *out_pos;
 178         *out_pos += block->header_size;
 179
 180         // Limit out_size so that we stop encoding if the output would grow
 181         // bigger than what uncompressed Block would be.
 182         if (out_size - *out_pos > block->compressed_size)
 183                 out_size = *out_pos + block->compressed_size;
 184
 185         // TODO: In many common cases this could be optimized to use
 186         // significantly less memory.
 187         lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT;
 188         lzma_ret ret = lzma_raw_encoder_init(
 189                         &raw_encoder, allocator, block->filters);
 190
 191         if (ret == LZMA_OK) {
 192                 size_t in_pos = 0;
 193                 ret = raw_encoder.code(raw_encoder.coder, allocator,
 194                                 in, &in_pos, in_size, out, out_pos, out_size,
 195                                 LZMA_FINISH);
 196         }
 197
 198         // NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
 199         lzma_next_end(&raw_encoder, allocator);
 200
 201         if (ret == LZMA_STREAM_END) {
 202                 // Compression was successful. Write the Block Header.
 203                 block->compressed_size
 204                                 = *out_pos - (out_start + block->header_size);
 205                 ret = lzma_block_header_encode(block, out + out_start);
 206                 if (ret != LZMA_OK)
 207                         ret = LZMA_PROG_ERROR;
 208
 209         } else if (ret == LZMA_OK) {
 210                 // Output buffer became full.
 211                 ret = LZMA_BUF_ERROR;
 212         }
 213
 214         // Reset *out_pos if something went wrong.
 215         if (ret != LZMA_OK)
 216                 *out_pos = out_start;
 217
 218         return ret;
 219 }
 220
 221
 222 static lzma_ret
 223 block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 224                 const uint8_t *in, size_t in_size,
 225                 uint8_t *out, size_t *out_pos, size_t out_size,
 226                 bool try_to_compress)
 227 {
 228         // Validate the arguments.
 229         if (block == NULL || (in == NULL && in_size != 0) || out == NULL
 230                         || out_pos == NULL || *out_pos > out_size)
 231                 return LZMA_PROG_ERROR;
 232
 233         // The contents of the structure may depend on the version so
 234         // check the version before validating the contents of *block.
 235         if (block->version > 1)
 236                 return LZMA_OPTIONS_ERROR;
 237
 238         if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
 239                         || (try_to_compress && block->filters == NULL))
 240                 return LZMA_PROG_ERROR;
 241
 242         if (!lzma_check_is_supported(block->check))
 243                 return LZMA_UNSUPPORTED_CHECK;
 244
 245         // Size of a Block has to be a multiple of four, so limit the size
 246         // here already. This way we don't need to check it again when adding
 247         // Block Padding.
 248         out_size -= (out_size - *out_pos) & 3;
 249
 250         // Get the size of the Check field.
 251         const size_t check_size = lzma_check_size(block->check);
 252         assert(check_size != UINT32_MAX);
 253
 254         // Reserve space for the Check field.
 255         if (out_size - *out_pos <= check_size)
 256                 return LZMA_BUF_ERROR;
 257
 258         out_size -= check_size;
 259
 260         // Initialize block->uncompressed_size and calculate the worst-case
 261         // value for block->compressed_size.
 262         block->uncompressed_size = in_size;
 263         block->compressed_size = lzma2_bound(in_size);
 264         if (block->compressed_size == 0)
 265                 return LZMA_DATA_ERROR;
 266
 267         // Do the actual compression.
 268         lzma_ret ret = LZMA_BUF_ERROR;
 269         if (try_to_compress)
 270                 ret = block_encode_normal(block, allocator,
 271                                 in, in_size, out, out_pos, out_size);
 272
 273         if (ret != LZMA_OK) {
 274                 // If the error was something else than output buffer
 275                 // becoming full, return the error now.
 276                 if (ret != LZMA_BUF_ERROR)
 277                         return ret;
 278
 279                 // The data was incompressible (at least with the options
 280                 // given to us) or the output buffer was too small. Use the
 281                 // uncompressed chunks of LZMA2 to wrap the data into a valid
 282                 // Block. If we haven't been given enough output space, even
 283                 // this may fail.
 284                 return_if_error(block_encode_uncompressed(block, in, in_size,
 285                                 out, out_pos, out_size));
 286         }
 287
 288         assert(*out_pos <= out_size);
 289
 290         // Block Padding. No buffer overflow here, because we already adjusted
 291         // out_size so that (out_size - out_start) is a multiple of four.
 292         // Thus, if the buffer is full, the loop body can never run.
 293         for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
 294                 assert(*out_pos < out_size);
 295                 out[(*out_pos)++] = 0x00;
 296         }
 297
 298         // If there's no Check field, we are done now.
 299         if (check_size > 0) {
 300                 // Calculate the integrity check. We reserved space for
 301                 // the Check field earlier so we don't need to check for
 302                 // available output space here.
 303                 lzma_check_state check;
 304                 lzma_check_init(&check, block->check);
 305                 lzma_check_update(&check, block->check, in, in_size);
 306                 lzma_check_finish(&check, block->check);
 307
 308                 memcpy(block->raw_check, check.buffer.u8, check_size);
 309                 memcpy(out + *out_pos, check.buffer.u8, check_size);
 310                 *out_pos += check_size;
 311         }
 312
 313         return LZMA_OK;
 314 }
 315
 316
 317 extern LZMA_API(lzma_ret)
 318 lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
 319                 const uint8_t *in, size_t in_size,
 320                 uint8_t *out, size_t *out_pos, size_t out_size)
 321 {
 322         return block_buffer_encode(block, allocator,
 323                         in, in_size, out, out_pos, out_size, true);
 324 }
 325
 326
 327 #ifdef HAVE_SYMBOL_VERSIONS_LINUX
 328 // This is for compatibility with binaries linked against liblzma that
 329 // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
 330 LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2",
 331         lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block,
 332                 const uint8_t *in, size_t in_size,
 333                 uint8_t *out, size_t *out_pos, size_t out_size)
 334                 lzma_nothrow lzma_attr_warn_unused_result
 335                 __attribute__((__alias__("lzma_block_uncomp_encode_52")));
 336
 337 LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2",
 338         lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block,
 339                 const uint8_t *in, size_t in_size,
 340                 uint8_t *out, size_t *out_pos, size_t out_size)
 341                 lzma_nothrow lzma_attr_warn_unused_result;
 342
 343 #define lzma_block_uncomp_encode lzma_block_uncomp_encode_52
 344 #endif
 345 extern LZMA_API(lzma_ret)
 346 lzma_block_uncomp_encode(lzma_block *block,
 347                 const uint8_t *in, size_t in_size,
 348                 uint8_t *out, size_t *out_pos, size_t out_size)
 349 {
 350         // It won't allocate any memory from heap so no need
 351         // for lzma_allocator.
 352         return block_buffer_encode(block, NULL,
 353                         in, in_size, out, out_pos, out_size, false);
 354 }