src/liblzma/common/index_hash.c

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       index_hash.c
   4 /// \brief      Validates Index by using a hash function
   5 //
   6 //  Author:     Lasse Collin
   7 //
   8 //  This file has been put into the public domain.
   9 //  You can do whatever you want with this file.
  10 //
  11 ///////////////////////////////////////////////////////////////////////////////
  12
  13 #include "common.h"
  14 #include "index.h"
  15 #include "check.h"
  16
  17
  18 typedef struct {
  19         /// Sum of the Block sizes (including Block Padding)
  20         lzma_vli blocks_size;
  21
  22         /// Sum of the Uncompressed Size fields
  23         lzma_vli uncompressed_size;
  24
  25         /// Number of Records
  26         lzma_vli count;
  27
  28         /// Size of the List of Index Records as bytes
  29         lzma_vli index_list_size;
  30
  31         /// Check calculated from Unpadded Sizes and Uncompressed Sizes.
  32         lzma_check_state check;
  33
  34 } lzma_index_hash_info;
  35
  36
  37 struct lzma_index_hash_s {
  38         enum {
  39                 SEQ_BLOCK,
  40                 SEQ_COUNT,
  41                 SEQ_UNPADDED,
  42                 SEQ_UNCOMPRESSED,
  43                 SEQ_PADDING_INIT,
  44                 SEQ_PADDING,
  45                 SEQ_CRC32,
  46         } sequence;
  47
  48         /// Information collected while decoding the actual Blocks.
  49         lzma_index_hash_info blocks;
  50
  51         /// Information collected from the Index field.
  52         lzma_index_hash_info records;
  53
  54         /// Number of Records not fully decoded
  55         lzma_vli remaining;
  56
  57         /// Unpadded Size currently being read from an Index Record.
  58         lzma_vli unpadded_size;
  59
  60         /// Uncompressed Size currently being read from an Index Record.
  61         lzma_vli uncompressed_size;
  62
  63         /// Position in variable-length integers when decoding them from
  64         /// the List of Records.
  65         size_t pos;
  66
  67         /// CRC32 of the Index
  68         uint32_t crc32;
  69 };
  70
  71
  72 extern LZMA_API(lzma_index_hash *)
  73 lzma_index_hash_init(lzma_index_hash *index_hash,
  74                 const lzma_allocator *allocator)
  75 {
  76         if (index_hash == NULL) {
  77                 index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
  78                 if (index_hash == NULL)
  79                         return NULL;
  80         }
  81
  82         index_hash->sequence = SEQ_BLOCK;
  83         index_hash->blocks.blocks_size = 0;
  84         index_hash->blocks.uncompressed_size = 0;
  85         index_hash->blocks.count = 0;
  86         index_hash->blocks.index_list_size = 0;
  87         index_hash->records.blocks_size = 0;
  88         index_hash->records.uncompressed_size = 0;
  89         index_hash->records.count = 0;
  90         index_hash->records.index_list_size = 0;
  91         index_hash->unpadded_size = 0;
  92         index_hash->uncompressed_size = 0;
  93         index_hash->pos = 0;
  94         index_hash->crc32 = 0;
  95
  96         // These cannot fail because LZMA_CHECK_BEST is known to be supported.
  97         (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
  98         (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
  99
 100         return index_hash;
 101 }
 102
 103
 104 extern LZMA_API(void)
 105 lzma_index_hash_end(lzma_index_hash *index_hash,
 106                 const lzma_allocator *allocator)
 107 {
 108         lzma_free(index_hash, allocator);
 109         return;
 110 }
 111
 112
 113 extern LZMA_API(lzma_vli)
 114 lzma_index_hash_size(const lzma_index_hash *index_hash)
 115 {
 116         // Get the size of the Index from ->blocks instead of ->records for
 117         // cases where application wants to know the Index Size before
 118         // decoding the Index.
 119         return index_size(index_hash->blocks.count,
 120                         index_hash->blocks.index_list_size);
 121 }
 122
 123
 124 /// Updates the sizes and the hash without any validation.
 125 static void
 126 hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
 127                 lzma_vli uncompressed_size)
 128 {
 129         info->blocks_size += vli_ceil4(unpadded_size);
 130         info->uncompressed_size += uncompressed_size;
 131         info->index_list_size += lzma_vli_size(unpadded_size)
 132                         + lzma_vli_size(uncompressed_size);
 133         ++info->count;
 134
 135         const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
 136         lzma_check_update(&info->check, LZMA_CHECK_BEST,
 137                         (const uint8_t *)(sizes), sizeof(sizes));
 138
 139         return;
 140 }
 141
 142
 143 extern LZMA_API(lzma_ret)
 144 lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
 145                 lzma_vli uncompressed_size)
 146 {
 147         // Validate the arguments.
 148         if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
 149                         || unpadded_size < UNPADDED_SIZE_MIN
 150                         || unpadded_size > UNPADDED_SIZE_MAX
 151                         || uncompressed_size > LZMA_VLI_MAX)
 152                 return LZMA_PROG_ERROR;
 153
 154         // Update the hash.
 155         hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
 156
 157         // Validate the properties of *info are still in allowed limits.
 158         if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
 159                         || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
 160                         || index_size(index_hash->blocks.count,
 161                                         index_hash->blocks.index_list_size)
 162                                 > LZMA_BACKWARD_SIZE_MAX
 163                         || index_stream_size(index_hash->blocks.blocks_size,
 164                                         index_hash->blocks.count,
 165                                         index_hash->blocks.index_list_size)
 166                                 > LZMA_VLI_MAX)
 167                 return LZMA_DATA_ERROR;
 168
 169         return LZMA_OK;
 170 }
 171
 172
 173 extern LZMA_API(lzma_ret)
 174 lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
 175                 size_t *in_pos, size_t in_size)
 176 {
 177         // Catch zero input buffer here, because in contrast to Index encoder
 178         // and decoder functions, applications call this function directly
 179         // instead of via lzma_code(), which does the buffer checking.
 180         if (*in_pos >= in_size)
 181                 return LZMA_BUF_ERROR;
 182
 183         // NOTE: This function has many similarities to index_encode() and
 184         // index_decode() functions found from index_encoder.c and
 185         // index_decoder.c. See the comments especially in index_encoder.c.
 186         const size_t in_start = *in_pos;
 187         lzma_ret ret = LZMA_OK;
 188
 189         while (*in_pos < in_size)
 190         switch (index_hash->sequence) {
 191         case SEQ_BLOCK:
 192                 // Check the Index Indicator is present.
 193                 if (in[(*in_pos)++] != INDEX_INDICATOR)
 194                         return LZMA_DATA_ERROR;
 195
 196                 index_hash->sequence = SEQ_COUNT;
 197                 break;
 198
 199         case SEQ_COUNT: {
 200                 ret = lzma_vli_decode(&index_hash->remaining,
 201                                 &index_hash->pos, in, in_pos, in_size);
 202                 if (ret != LZMA_STREAM_END)
 203                         goto out;
 204
 205                 // The count must match the count of the Blocks decoded.
 206                 if (index_hash->remaining != index_hash->blocks.count)
 207                         return LZMA_DATA_ERROR;
 208
 209                 ret = LZMA_OK;
 210                 index_hash->pos = 0;
 211
 212                 // Handle the special case when there are no Blocks.
 213                 index_hash->sequence = index_hash->remaining == 0
 214                                 ? SEQ_PADDING_INIT : SEQ_UNPADDED;
 215                 break;
 216         }
 217
 218         case SEQ_UNPADDED:
 219         case SEQ_UNCOMPRESSED: {
 220                 lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
 221                                 ? &index_hash->unpadded_size
 222                                 : &index_hash->uncompressed_size;
 223
 224                 ret = lzma_vli_decode(size, &index_hash->pos,
 225                                 in, in_pos, in_size);
 226                 if (ret != LZMA_STREAM_END)
 227                         goto out;
 228
 229                 ret = LZMA_OK;
 230                 index_hash->pos = 0;
 231
 232                 if (index_hash->sequence == SEQ_UNPADDED) {
 233                         if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
 234                                         || index_hash->unpadded_size
 235                                                 > UNPADDED_SIZE_MAX)
 236                                 return LZMA_DATA_ERROR;
 237
 238                         index_hash->sequence = SEQ_UNCOMPRESSED;
 239                 } else {
 240                         // Update the hash.
 241                         hash_append(&index_hash->records,
 242                                         index_hash->unpadded_size,
 243                                         index_hash->uncompressed_size);
 244
 245                         // Verify that we don't go over the known sizes. Note
 246                         // that this validation is simpler than the one used
 247                         // in lzma_index_hash_append(), because here we know
 248                         // that values in index_hash->blocks are already
 249                         // validated and we are fine as long as we don't
 250                         // exceed them in index_hash->records.
 251                         if (index_hash->blocks.blocks_size
 252                                         < index_hash->records.blocks_size
 253                                         || index_hash->blocks.uncompressed_size
 254                                         < index_hash->records.uncompressed_size
 255                                         || index_hash->blocks.index_list_size
 256                                         < index_hash->records.index_list_size)
 257                                 return LZMA_DATA_ERROR;
 258
 259                         // Check if this was the last Record.
 260                         index_hash->sequence = --index_hash->remaining == 0
 261                                         ? SEQ_PADDING_INIT : SEQ_UNPADDED;
 262                 }
 263
 264                 break;
 265         }
 266
 267         case SEQ_PADDING_INIT:
 268                 index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
 269                                 index_hash->records.count,
 270                                 index_hash->records.index_list_size)) & 3;
 271                 index_hash->sequence = SEQ_PADDING;
 272
 273         // Fall through
 274
 275         case SEQ_PADDING:
 276                 if (index_hash->pos > 0) {
 277                         --index_hash->pos;
 278                         if (in[(*in_pos)++] != 0x00)
 279                                 return LZMA_DATA_ERROR;
 280
 281                         break;
 282                 }
 283
 284                 // Compare the sizes.
 285                 if (index_hash->blocks.blocks_size
 286                                 != index_hash->records.blocks_size
 287                                 || index_hash->blocks.uncompressed_size
 288                                 != index_hash->records.uncompressed_size
 289                                 || index_hash->blocks.index_list_size
 290                                 != index_hash->records.index_list_size)
 291                         return LZMA_DATA_ERROR;
 292
 293                 // Finish the hashes and compare them.
 294                 lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
 295                 lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
 296                 if (memcmp(index_hash->blocks.check.buffer.u8,
 297                                 index_hash->records.check.buffer.u8,
 298                                 lzma_check_size(LZMA_CHECK_BEST)) != 0)
 299                         return LZMA_DATA_ERROR;
 300
 301                 // Finish the CRC32 calculation.
 302                 index_hash->crc32 = lzma_crc32(in + in_start,
 303                                 *in_pos - in_start, index_hash->crc32);
 304
 305                 index_hash->sequence = SEQ_CRC32;
 306
 307         // Fall through
 308
 309         case SEQ_CRC32:
 310                 do {
 311                         if (*in_pos == in_size)
 312                                 return LZMA_OK;
 313
 314                         if (((index_hash->crc32 >> (index_hash->pos * 8))
 315                                         & 0xFF) != in[(*in_pos)++]) {
 316 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
 317                                 return LZMA_DATA_ERROR;
 318 #endif
 319                         }
 320
 321                 } while (++index_hash->pos < 4);
 322
 323                 return LZMA_STREAM_END;
 324
 325         default:
 326                 assert(0);
 327                 return LZMA_PROG_ERROR;
 328         }
 329
 330 out:
 331         // Update the CRC32.
 332         //
 333         // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
 334         // In such a case we had no input and thus in_used == 0.
 335         {
 336                 const size_t in_used = *in_pos - in_start;
 337                 if (in_used > 0)
 338                         index_hash->crc32 = lzma_crc32(in + in_start,
 339                                         in_used, index_hash->crc32);
 340         }
 341
 342         return ret;
 343 }