src/liblzma/common/index_hash.c

   1 // SPDX-License-Identifier: 0BSD
   2
   3 ///////////////////////////////////////////////////////////////////////////////
   4 //
   5 /// \file       index_hash.c
   6 /// \brief      Validates Index by using a hash function
   7 //
   8 //  Author:     Lasse Collin
   9 //
  10 ///////////////////////////////////////////////////////////////////////////////
  11
  12 #include "common.h"
  13 #include "index.h"
  14 #include "check.h"
  15
  16
  17 typedef struct {
  18         /// Sum of the Block sizes (including Block Padding)
  19         lzma_vli blocks_size;
  20
  21         /// Sum of the Uncompressed Size fields
  22         lzma_vli uncompressed_size;
  23
  24         /// Number of Records
  25         lzma_vli count;
  26
  27         /// Size of the List of Index Records as bytes
  28         lzma_vli index_list_size;
  29
  30         /// Check calculated from Unpadded Sizes and Uncompressed Sizes.
  31         lzma_check_state check;
  32
  33 } lzma_index_hash_info;
  34
  35
  36 struct lzma_index_hash_s {
  37         enum {
  38                 SEQ_BLOCK,
  39                 SEQ_COUNT,
  40                 SEQ_UNPADDED,
  41                 SEQ_UNCOMPRESSED,
  42                 SEQ_PADDING_INIT,
  43                 SEQ_PADDING,
  44                 SEQ_CRC32,
  45         } sequence;
  46
  47         /// Information collected while decoding the actual Blocks.
  48         lzma_index_hash_info blocks;
  49
  50         /// Information collected from the Index field.
  51         lzma_index_hash_info records;
  52
  53         /// Number of Records not fully decoded
  54         lzma_vli remaining;
  55
  56         /// Unpadded Size currently being read from an Index Record.
  57         lzma_vli unpadded_size;
  58
  59         /// Uncompressed Size currently being read from an Index Record.
  60         lzma_vli uncompressed_size;
  61
  62         /// Position in variable-length integers when decoding them from
  63         /// the List of Records.
  64         size_t pos;
  65
  66         /// CRC32 of the Index
  67         uint32_t crc32;
  68 };
  69
  70
  71 extern LZMA_API(lzma_index_hash *)
  72 lzma_index_hash_init(lzma_index_hash *index_hash,
  73                 const lzma_allocator *allocator)
  74 {
  75         if (index_hash == NULL) {
  76                 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
  77                 if (index_hash == NULL)
  78                         return NULL;
  79         }
  80
  81         index_hash->sequence = SEQ_BLOCK;
  82         index_hash->blocks.blocks_size = 0;
  83         index_hash->blocks.uncompressed_size = 0;
  84         index_hash->blocks.count = 0;
  85         index_hash->blocks.index_list_size = 0;
  86         index_hash->records.blocks_size = 0;
  87         index_hash->records.uncompressed_size = 0;
  88         index_hash->records.count = 0;
  89         index_hash->records.index_list_size = 0;
  90         index_hash->unpadded_size = 0;
  91         index_hash->uncompressed_size = 0;
  92         index_hash->pos = 0;
  93         index_hash->crc32 = 0;
  94
  95         // These cannot fail because LZMA_CHECK_BEST is known to be supported.
  96         (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
  97         (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
  98
  99         return index_hash;
 100 }
 101
 102
 103 extern LZMA_API(void)
 104 lzma_index_hash_end(lzma_index_hash *index_hash,
 105                 const lzma_allocator *allocator)
 106 {
 107         lzma_free(index_hash, allocator);
 108         return;
 109 }
 110
 111
 112 extern LZMA_API(lzma_vli)
 113 lzma_index_hash_size(const lzma_index_hash *index_hash)
 114 {
 115         // Get the size of the Index from ->blocks instead of ->records for
 116         // cases where application wants to know the Index Size before
 117         // decoding the Index.
 118         return index_size(index_hash->blocks.count,
 119                         index_hash->blocks.index_list_size);
 120 }
 121
 122
 123 /// Updates the sizes and the hash without any validation.
 124 static void
 125 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
 126                 lzma_vli uncompressed_size)
 127 {
 128         info->blocks_size += vli_ceil4(unpadded_size);
 129         info->uncompressed_size += uncompressed_size;
 130         info->index_list_size += lzma_vli_size(unpadded_size)
 131                         + lzma_vli_size(uncompressed_size);
 132         ++info->count;
 133
 134         const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
 135         lzma_check_update(&info->check, LZMA_CHECK_BEST,
 136                         (const uint8_t *)(sizes), sizeof(sizes));
 137
 138         return;
 139 }
 140
 141
 142 extern LZMA_API(lzma_ret)
 143 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
 144                 lzma_vli uncompressed_size)
 145 {
 146         // Validate the arguments.
 147         if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
 148                         || unpadded_size < UNPADDED_SIZE_MIN
 149                         || unpadded_size > UNPADDED_SIZE_MAX
 150                         || uncompressed_size > LZMA_VLI_MAX)
 151                 return LZMA_PROG_ERROR;
 152
 153         // Update the hash.
 154         hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
 155
 156         // Validate the properties of *info are still in allowed limits.
 157         if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
 158                         || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
 159                         || index_size(index_hash->blocks.count,
 160                                         index_hash->blocks.index_list_size)
 161                                 > LZMA_BACKWARD_SIZE_MAX
 162                         || index_stream_size(index_hash->blocks.blocks_size,
 163                                         index_hash->blocks.count,
 164                                         index_hash->blocks.index_list_size)
 165                                 > LZMA_VLI_MAX)
 166                 return LZMA_DATA_ERROR;
 167
 168         return LZMA_OK;
 169 }
 170
 171
 172 extern LZMA_API(lzma_ret)
 173 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
 174                 size_t *in_pos, size_t in_size)
 175 {
 176         // Catch zero input buffer here, because in contrast to Index encoder
 177         // and decoder functions, applications call this function directly
 178         // instead of via lzma_code(), which does the buffer checking.
 179         if (*in_pos >= in_size)
 180                 return LZMA_BUF_ERROR;
 181
 182         // NOTE: This function has many similarities to index_encode() and
 183         // index_decode() functions found from index_encoder.c and
 184         // index_decoder.c. See the comments especially in index_encoder.c.
 185         const size_t in_start = *in_pos;
 186         lzma_ret ret = LZMA_OK;
 187
 188         while (*in_pos < in_size)
 189         switch (index_hash->sequence) {
 190         case SEQ_BLOCK:
 191                 // Check the Index Indicator is present.
 192                 if (in[(*in_pos)++] != INDEX_INDICATOR)
 193                         return LZMA_DATA_ERROR;
 194
 195                 index_hash->sequence = SEQ_COUNT;
 196                 break;
 197
 198         case SEQ_COUNT: {
 199                 ret = lzma_vli_decode(&index_hash->remaining,
 200                                 &index_hash->pos, in, in_pos, in_size);
 201                 if (ret != LZMA_STREAM_END)
 202                         goto out;
 203
 204                 // The count must match the count of the Blocks decoded.
 205                 if (index_hash->remaining != index_hash->blocks.count)
 206                         return LZMA_DATA_ERROR;
 207
 208                 ret = LZMA_OK;
 209                 index_hash->pos = 0;
 210
 211                 // Handle the special case when there are no Blocks.
 212                 index_hash->sequence = index_hash->remaining == 0
 213                                 ? SEQ_PADDING_INIT : SEQ_UNPADDED;
 214                 break;
 215         }
 216
 217         case SEQ_UNPADDED:
 218         case SEQ_UNCOMPRESSED: {
 219                 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
 220                                 ? &index_hash->unpadded_size
 221                                 : &index_hash->uncompressed_size;
 222
 223                 ret = lzma_vli_decode(size, &index_hash->pos,
 224                                 in, in_pos, in_size);
 225                 if (ret != LZMA_STREAM_END)
 226                         goto out;
 227
 228                 ret = LZMA_OK;
 229                 index_hash->pos = 0;
 230
 231                 if (index_hash->sequence == SEQ_UNPADDED) {
 232                         if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
 233                                         || index_hash->unpadded_size
 234                                                 > UNPADDED_SIZE_MAX)
 235                                 return LZMA_DATA_ERROR;
 236
 237                         index_hash->sequence = SEQ_UNCOMPRESSED;
 238                 } else {
 239                         // Update the hash.
 240                         hash_append(&index_hash->records,
 241                                         index_hash->unpadded_size,
 242                                         index_hash->uncompressed_size);
 243
 244                         // Verify that we don't go over the known sizes. Note
 245                         // that this validation is simpler than the one used
 246                         // in lzma_index_hash_append(), because here we know
 247                         // that values in index_hash->blocks are already
 248                         // validated and we are fine as long as we don't
 249                         // exceed them in index_hash->records.
 250                         if (index_hash->blocks.blocks_size
 251                                         < index_hash->records.blocks_size
 252                                         || index_hash->blocks.uncompressed_size
 253                                         < index_hash->records.uncompressed_size
 254                                         || index_hash->blocks.index_list_size
 255                                         < index_hash->records.index_list_size)
 256                                 return LZMA_DATA_ERROR;
 257
 258                         // Check if this was the last Record.
 259                         index_hash->sequence = --index_hash->remaining == 0
 260                                         ? SEQ_PADDING_INIT : SEQ_UNPADDED;
 261                 }
 262
 263                 break;
 264         }
 265
 266         case SEQ_PADDING_INIT:
 267                 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
 268                                 index_hash->records.count,
 269                                 index_hash->records.index_list_size)) & 3;
 270                 index_hash->sequence = SEQ_PADDING;
 271
 272         // Fall through
 273
 274         case SEQ_PADDING:
 275                 if (index_hash->pos > 0) {
 276                         --index_hash->pos;
 277                         if (in[(*in_pos)++] != 0x00)
 278                                 return LZMA_DATA_ERROR;
 279
 280                         break;
 281                 }
 282
 283                 // Compare the sizes.
 284                 if (index_hash->blocks.blocks_size
 285                                 != index_hash->records.blocks_size
 286                                 || index_hash->blocks.uncompressed_size
 287                                 != index_hash->records.uncompressed_size
 288                                 || index_hash->blocks.index_list_size
 289                                 != index_hash->records.index_list_size)
 290                         return LZMA_DATA_ERROR;
 291
 292                 // Finish the hashes and compare them.
 293                 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
 294                 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
 295                 if (memcmp(index_hash->blocks.check.buffer.u8,
 296                                 index_hash->records.check.buffer.u8,
 297                                 lzma_check_size(LZMA_CHECK_BEST)) != 0)
 298                         return LZMA_DATA_ERROR;
 299
 300                 // Finish the CRC32 calculation.
 301                 index_hash->crc32 = lzma_crc32(in + in_start,
 302                                 *in_pos - in_start, index_hash->crc32);
 303
 304                 index_hash->sequence = SEQ_CRC32;
 305
 306         // Fall through
 307
 308         case SEQ_CRC32:
 309                 do {
 310                         if (*in_pos == in_size)
 311                                 return LZMA_OK;
 312
 313                         if (((index_hash->crc32 >> (index_hash->pos * 8))
 314                                         & 0xFF) != in[(*in_pos)++]) {
 315 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
 316                                 return LZMA_DATA_ERROR;
 317 #endif
 318                         }
 319
 320                 } while (++index_hash->pos < 4);
 321
 322                 return LZMA_STREAM_END;
 323
 324         default:
 325                 assert(0);
 326                 return LZMA_PROG_ERROR;
 327         }
 328
 329 out:
 330         // Update the CRC32.
 331         //
 332         // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
 333         // In such a case we had no input and thus in_used == 0.
 334         {
 335                 const size_t in_used = *in_pos - in_start;
 336                 if (in_used > 0)
 337                         index_hash->crc32 = lzma_crc32(in + in_start,
 338                                         in_used, index_hash->crc32);
 339         }
 340
 341         return ret;
 342 }