src/liblzma/common/file_info.c

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       file_info.c
   4 /// \brief      Decode .xz file information into a lzma_index structure
   5 //
   6 //  Author:     Lasse Collin
   7 //
   8 //  This file has been put into the public domain.
   9 //  You can do whatever you want with this file.
  10 //
  11 ///////////////////////////////////////////////////////////////////////////////
  12
  13 #include "index_decoder.h"
  14
  15
  16 typedef struct {
  17         enum {
  18                 SEQ_MAGIC_BYTES,
  19                 SEQ_PADDING_SEEK,
  20                 SEQ_PADDING_DECODE,
  21                 SEQ_FOOTER,
  22                 SEQ_INDEX_INIT,
  23                 SEQ_INDEX_DECODE,
  24                 SEQ_HEADER_DECODE,
  25                 SEQ_HEADER_COMPARE,
  26         } sequence;
  27
  28         /// Absolute position of in[*in_pos] in the file. All code that
  29         /// modifies *in_pos also updates this. seek_to_pos() needs this
  30         /// to determine if we need to request the application to seek for
  31         /// us or if we can do the seeking internally by adjusting *in_pos.
  32         uint64_t file_cur_pos;
  33
  34         /// This refers to absolute positions of interesting parts of the
  35         /// input file. Sometimes it points to the *beginning* of a specific
  36         /// field and sometimes to the *end* of a field. The current target
  37         /// position at each moment is explained in the comments.
  38         uint64_t file_target_pos;
  39
  40         /// Size of the .xz file (from the application).
  41         uint64_t file_size;
  42
  43         /// Index decoder
  44         lzma_next_coder index_decoder;
  45
  46         /// Number of bytes remaining in the Index field that is currently
  47         /// being decoded.
  48         lzma_vli index_remaining;
  49
  50         /// The Index decoder will store the decoded Index in this pointer.
  51         lzma_index *this_index;
  52
  53         /// Amount of Stream Padding in the current Stream.
  54         lzma_vli stream_padding;
  55
  56         /// The final combined index is collected here.
  57         lzma_index *combined_index;
  58
  59         /// Pointer from the application where to store the index information
  60         /// after successful decoding.
  61         lzma_index **dest_index;
  62
  63         /// Pointer to lzma_stream.seek_pos to be used when returning
  64         /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
  65         uint64_t *external_seek_pos;
  66
  67         /// Memory usage limit
  68         uint64_t memlimit;
  69
  70         /// Stream Flags from the very beginning of the file.
  71         lzma_stream_flags first_header_flags;
  72
  73         /// Stream Flags from Stream Header of the current Stream.
  74         lzma_stream_flags header_flags;
  75
  76         /// Stream Flags from Stream Footer of the current Stream.
  77         lzma_stream_flags footer_flags;
  78
  79         size_t temp_pos;
  80         size_t temp_size;
  81         uint8_t temp[8192];
  82
  83 } lzma_file_info_coder;
  84
  85
  86 /// Copies data from in[*in_pos] into coder->temp until
  87 /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
  88 /// in sync with *in_pos. Returns true if more input is needed.
  89 static bool
  90 fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
  91                 size_t *restrict in_pos, size_t in_size)
  92 {
  93         coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
  94                         coder->temp, &coder->temp_pos, coder->temp_size);
  95         return coder->temp_pos < coder->temp_size;
  96 }
  97
  98
  99 /// Seeks to the absolute file position specified by target_pos.
 100 /// This tries to do the seeking by only modifying *in_pos, if possible.
 101 /// The main benefit of this is that if one passes the whole file at once
 102 /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
 103 /// as all the seeking can be done by adjusting *in_pos in this function.
 104 ///
 105 /// Returns true if an external seek is needed and the caller must return
 106 /// LZMA_SEEK_NEEDED.
 107 static bool
 108 seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
 109                 size_t in_start, size_t *in_pos, size_t in_size)
 110 {
 111         // The input buffer doesn't extend beyond the end of the file.
 112         // This has been checked by file_info_decode() already.
 113         assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
 114
 115         const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
 116         const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
 117
 118         bool external_seek_needed;
 119
 120         if (target_pos >= pos_min && target_pos <= pos_max) {
 121                 // The requested position is available in the current input
 122                 // buffer or right after it. That is, in a corner case we
 123                 // end up setting *in_pos == in_size and thus will immediately
 124                 // need new input bytes from the application.
 125                 *in_pos += (size_t)(target_pos - coder->file_cur_pos);
 126                 external_seek_needed = false;
 127         } else {
 128                 // Ask the application to seek the input file.
 129                 *coder->external_seek_pos = target_pos;
 130                 external_seek_needed = true;
 131
 132                 // Mark the whole input buffer as used. This way
 133                 // lzma_stream.total_in will have a better estimate
 134                 // of the amount of data read. It still won't be perfect
 135                 // as the value will depend on the input buffer size that
 136                 // the application uses, but it should be good enough for
 137                 // those few who want an estimate.
 138                 *in_pos = in_size;
 139         }
 140
 141         // After seeking (internal or external) the current position
 142         // will match the requested target position.
 143         coder->file_cur_pos = target_pos;
 144
 145         return external_seek_needed;
 146 }
 147
 148
 149 /// The caller sets coder->file_target_pos so that it points to the *end*
 150 /// of the desired file position. This function then determines how far
 151 /// backwards from that position we can seek. After seeking fill_temp()
 152 /// can be used to read data into coder->temp. When fill_temp() has finished,
 153 /// coder->temp[coder->temp_size] will match coder->file_target_pos.
 154 ///
 155 /// This also validates that coder->target_file_pos is sane in sense that
 156 /// we aren't trying to seek too far backwards (too close or beyond the
 157 /// beginning of the file).
 158 static lzma_ret
 159 reverse_seek(lzma_file_info_coder *coder,
 160                 size_t in_start, size_t *in_pos, size_t in_size)
 161 {
 162         // Check that there is enough data before the target position
 163         // to contain at least Stream Header and Stream Footer. If there
 164         // isn't, the file cannot be valid.
 165         if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
 166                 return LZMA_DATA_ERROR;
 167
 168         coder->temp_pos = 0;
 169
 170         // The Stream Header at the very beginning of the file gets handled
 171         // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
 172         // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
 173         // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
 174         // application uses an extremely small input buffer and the input
 175         // file is very small.
 176         if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
 177                         < sizeof(coder->temp))
 178                 coder->temp_size = (size_t)(coder->file_target_pos
 179                                 - LZMA_STREAM_HEADER_SIZE);
 180         else
 181                 coder->temp_size = sizeof(coder->temp);
 182
 183         // The above if-statements guarantee this. This is important because
 184         // the Stream Header/Footer decoders assume that there's at least
 185         // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
 186         assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
 187
 188         if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
 189                         in_start, in_pos, in_size))
 190                 return LZMA_SEEK_NEEDED;
 191
 192         return LZMA_OK;
 193 }
 194
 195
 196 /// Gets the number of zero-bytes at the end of the buffer.
 197 static size_t
 198 get_padding_size(const uint8_t *buf, size_t buf_size)
 199 {
 200         size_t padding = 0;
 201         while (buf_size > 0 && buf[--buf_size] == 0x00)
 202                 ++padding;
 203
 204         return padding;
 205 }
 206
 207
 208 /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
 209 /// is used to tell the application that Magic Bytes didn't match. In other
 210 /// Stream Header/Footer fields (in the middle/end of the file) it could be
 211 /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
 212 /// is a valid Stream Header at the beginning of the file. For those cases
 213 /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
 214 static lzma_ret
 215 hide_format_error(lzma_ret ret)
 216 {
 217         if (ret == LZMA_FORMAT_ERROR)
 218                 ret = LZMA_DATA_ERROR;
 219
 220         return ret;
 221 }
 222
 223
 224 /// Calls the Index decoder and updates coder->index_remaining.
 225 /// This is a separate function because the input can be either directly
 226 /// from the application or from coder->temp.
 227 static lzma_ret
 228 decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
 229                 const uint8_t *restrict in, size_t *restrict in_pos,
 230                 size_t in_size, bool update_file_cur_pos)
 231 {
 232         const size_t in_start = *in_pos;
 233
 234         const lzma_ret ret = coder->index_decoder.code(
 235                         coder->index_decoder.coder,
 236                         allocator, in, in_pos, in_size,
 237                         NULL, NULL, 0, LZMA_RUN);
 238
 239         coder->index_remaining -= *in_pos - in_start;
 240
 241         if (update_file_cur_pos)
 242                 coder->file_cur_pos += *in_pos - in_start;
 243
 244         return ret;
 245 }
 246
 247
 248 static lzma_ret
 249 file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
 250                 const uint8_t *restrict in, size_t *restrict in_pos,
 251                 size_t in_size,
 252                 uint8_t *restrict out lzma_attribute((__unused__)),
 253                 size_t *restrict out_pos lzma_attribute((__unused__)),
 254                 size_t out_size lzma_attribute((__unused__)),
 255                 lzma_action action lzma_attribute((__unused__)))
 256 {
 257         lzma_file_info_coder *coder = coder_ptr;
 258         const size_t in_start = *in_pos;
 259
 260         // If the caller provides input past the end of the file, trim
 261         // the extra bytes from the buffer so that we won't read too far.
 262         assert(coder->file_size >= coder->file_cur_pos);
 263         if (coder->file_size - coder->file_cur_pos < in_size - in_start)
 264                 in_size = in_start
 265                         + (size_t)(coder->file_size - coder->file_cur_pos);
 266
 267         while (true)
 268         switch (coder->sequence) {
 269         case SEQ_MAGIC_BYTES:
 270                 // Decode the Stream Header at the beginning of the file
 271                 // first to check if the Magic Bytes match. The flags
 272                 // are stored in coder->first_header_flags so that we
 273                 // don't need to seek to it again.
 274                 //
 275                 // Check that the file is big enough to contain at least
 276                 // Stream Header.
 277                 if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
 278                         return LZMA_FORMAT_ERROR;
 279
 280                 // Read the Stream Header field into coder->temp.
 281                 if (fill_temp(coder, in, in_pos, in_size))
 282                         return LZMA_OK;
 283
 284                 // This is the only Stream Header/Footer decoding where we
 285                 // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
 286                 // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
 287                 return_if_error(lzma_stream_header_decode(
 288                                 &coder->first_header_flags, coder->temp));
 289
 290                 // Now that we know that the Magic Bytes match, check the
 291                 // file size. It's better to do this here after checking the
 292                 // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
 293                 // instead of LZMA_DATA_ERROR when the Magic Bytes don't
 294                 // match in a file that is too big or isn't a multiple of
 295                 // four bytes.
 296                 if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
 297                         return LZMA_DATA_ERROR;
 298
 299                 // Start looking for Stream Padding and Stream Footer
 300                 // at the end of the file.
 301                 coder->file_target_pos = coder->file_size;
 302
 303         // Fall through
 304
 305         case SEQ_PADDING_SEEK:
 306                 coder->sequence = SEQ_PADDING_DECODE;
 307                 return_if_error(reverse_seek(
 308                                 coder, in_start, in_pos, in_size));
 309
 310         // Fall through
 311
 312         case SEQ_PADDING_DECODE: {
 313                 // Copy to coder->temp first. This keeps the code simpler if
 314                 // the application only provides input a few bytes at a time.
 315                 if (fill_temp(coder, in, in_pos, in_size))
 316                         return LZMA_OK;
 317
 318                 // Scan the buffer backwards to get the size of the
 319                 // Stream Padding field (if any).
 320                 const size_t new_padding = get_padding_size(
 321                                 coder->temp, coder->temp_size);
 322                 coder->stream_padding += new_padding;
 323
 324                 // Set the target position to the beginning of Stream Padding
 325                 // that has been observed so far. If all Stream Padding has
 326                 // been seen, then the target position will be at the end
 327                 // of the Stream Footer field.
 328                 coder->file_target_pos -= new_padding;
 329
 330                 if (new_padding == coder->temp_size) {
 331                         // The whole buffer was padding. Seek backwards in
 332                         // the file to get more input.
 333                         coder->sequence = SEQ_PADDING_SEEK;
 334                         break;
 335                 }
 336
 337                 // Size of Stream Padding must be a multiple of 4 bytes.
 338                 if (coder->stream_padding & 3)
 339                         return LZMA_DATA_ERROR;
 340
 341                 coder->sequence = SEQ_FOOTER;
 342
 343                 // Calculate the amount of non-padding data in coder->temp.
 344                 coder->temp_size -= new_padding;
 345                 coder->temp_pos = coder->temp_size;
 346
 347                 // We can avoid an external seek if the whole Stream Footer
 348                 // is already in coder->temp. In that case SEQ_FOOTER won't
 349                 // read more input and will find the Stream Footer from
 350                 // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
 351                 //
 352                 // Otherwise we will need to seek. The seeking is done so
 353                 // that Stream Footer wil be at the end of coder->temp.
 354                 // This way it's likely that we also get a complete Index
 355                 // field into coder->temp without needing a separate seek
 356                 // for that (unless the Index field is big).
 357                 if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
 358                         return_if_error(reverse_seek(
 359                                         coder, in_start, in_pos, in_size));
 360         }
 361
 362         // Fall through
 363
 364         case SEQ_FOOTER:
 365                 // Copy the Stream Footer field into coder->temp.
 366                 // If Stream Footer was already available in coder->temp
 367                 // in SEQ_PADDING_DECODE, then this does nothing.
 368                 if (fill_temp(coder, in, in_pos, in_size))
 369                         return LZMA_OK;
 370
 371                 // Make coder->file_target_pos and coder->temp_size point
 372                 // to the beginning of Stream Footer and thus to the end
 373                 // of the Index field. coder->temp_pos will be updated
 374                 // a bit later.
 375                 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
 376                 coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
 377
 378                 // Decode Stream Footer.
 379                 return_if_error(hide_format_error(lzma_stream_footer_decode(
 380                                 &coder->footer_flags,
 381                                 coder->temp + coder->temp_size)));
 382
 383                 // Check that we won't seek past the beginning of the file.
 384                 //
 385                 // LZMA_STREAM_HEADER_SIZE is added because there must be
 386                 // space for Stream Header too even though we won't seek
 387                 // there before decoding the Index field.
 388                 //
 389                 // There's no risk of integer overflow here because
 390                 // Backward Size cannot be greater than 2^34.
 391                 if (coder->file_target_pos < coder->footer_flags.backward_size
 392                                 + LZMA_STREAM_HEADER_SIZE)
 393                         return LZMA_DATA_ERROR;
 394
 395                 // Set the target position to the beginning of the Index field.
 396                 coder->file_target_pos -= coder->footer_flags.backward_size;
 397                 coder->sequence = SEQ_INDEX_INIT;
 398
 399                 // We can avoid an external seek if the whole Index field is
 400                 // already available in coder->temp.
 401                 if (coder->temp_size >= coder->footer_flags.backward_size) {
 402                         // Set coder->temp_pos to point to the beginning
 403                         // of the Index.
 404                         coder->temp_pos = coder->temp_size
 405                                         - coder->footer_flags.backward_size;
 406                 } else {
 407                         // These are set to zero to indicate that there's no
 408                         // useful data (Index or anything else) in coder->temp.
 409                         coder->temp_pos = 0;
 410                         coder->temp_size = 0;
 411
 412                         // Seek to the beginning of the Index field.
 413                         if (seek_to_pos(coder, coder->file_target_pos,
 414                                         in_start, in_pos, in_size))
 415                                 return LZMA_SEEK_NEEDED;
 416                 }
 417
 418         // Fall through
 419
 420         case SEQ_INDEX_INIT: {
 421                 // Calculate the amount of memory already used by the earlier
 422                 // Indexes so that we know how big memory limit to pass to
 423                 // the Index decoder.
 424                 //
 425                 // NOTE: When there are multiple Streams, the separate
 426                 // lzma_index structures can use more RAM (as measured by
 427                 // lzma_index_memused()) than the final combined lzma_index.
 428                 // Thus memlimit may need to be slightly higher than the final
 429                 // calculated memory usage will be. This is perhaps a bit
 430                 // confusing to the application, but I think it shouldn't
 431                 // cause problems in practice.
 432                 uint64_t memused = 0;
 433                 if (coder->combined_index != NULL) {
 434                         memused = lzma_index_memused(coder->combined_index);
 435                         assert(memused <= coder->memlimit);
 436                         if (memused > coder->memlimit) // Extra sanity check
 437                                 return LZMA_PROG_ERROR;
 438                 }
 439
 440                 // Initialize the Index decoder.
 441                 return_if_error(lzma_index_decoder_init(
 442                                 &coder->index_decoder, allocator,
 443                                 &coder->this_index,
 444                                 coder->memlimit - memused));
 445
 446                 coder->index_remaining = coder->footer_flags.backward_size;
 447                 coder->sequence = SEQ_INDEX_DECODE;
 448         }
 449
 450         // Fall through
 451
 452         case SEQ_INDEX_DECODE: {
 453                 // Decode (a part of) the Index. If the whole Index is already
 454                 // in coder->temp, read it from there. Otherwise read from
 455                 // in[*in_pos] onwards. Note that index_decode() updates
 456                 // coder->index_remaining and optionally coder->file_cur_pos.
 457                 lzma_ret ret;
 458                 if (coder->temp_size != 0) {
 459                         assert(coder->temp_size - coder->temp_pos
 460                                         == coder->index_remaining);
 461                         ret = decode_index(coder, allocator, coder->temp,
 462                                         &coder->temp_pos, coder->temp_size,
 463                                         false);
 464                 } else {
 465                         // Don't give the decoder more input than the known
 466                         // remaining size of the Index field.
 467                         size_t in_stop = in_size;
 468                         if (in_size - *in_pos > coder->index_remaining)
 469                                 in_stop = *in_pos
 470                                         + (size_t)(coder->index_remaining);
 471
 472                         ret = decode_index(coder, allocator,
 473                                         in, in_pos, in_stop, true);
 474                 }
 475
 476                 switch (ret) {
 477                 case LZMA_OK:
 478                         // If the Index docoder asks for more input when we
 479                         // have already given it as much input as Backward Size
 480                         // indicated, the file is invalid.
 481                         if (coder->index_remaining == 0)
 482                                 return LZMA_DATA_ERROR;
 483
 484                         // We cannot get here if we were reading Index from
 485                         // coder->temp because when reading from coder->temp
 486                         // we give the Index decoder exactly
 487                         // coder->index_remaining bytes of input.
 488                         assert(coder->temp_size == 0);
 489
 490                         return LZMA_OK;
 491
 492                 case LZMA_STREAM_END:
 493                         // If the decoding seems to be successful, check also
 494                         // that the Index decoder consumed as much input as
 495                         // indicated by the Backward Size field.
 496                         if (coder->index_remaining != 0)
 497                                 return LZMA_DATA_ERROR;
 498
 499                         break;
 500
 501                 default:
 502                         return ret;
 503                 }
 504
 505                 // Calculate how much the Index tells us to seek backwards
 506                 // (relative to the beginning of the Index): Total size of
 507                 // all Blocks plus the size of the Stream Header field.
 508                 // No integer overflow here because lzma_index_total_size()
 509                 // cannot return a value greater than LZMA_VLI_MAX.
 510                 const uint64_t seek_amount
 511                                 = lzma_index_total_size(coder->this_index)
 512                                         + LZMA_STREAM_HEADER_SIZE;
 513
 514                 // Check that Index is sane in sense that seek_amount won't
 515                 // make us seek past the beginning of the file when locating
 516                 // the Stream Header.
 517                 //
 518                 // coder->file_target_pos still points to the beginning of
 519                 // the Index field.
 520                 if (coder->file_target_pos < seek_amount)
 521                         return LZMA_DATA_ERROR;
 522
 523                 // Set the target to the beginning of Stream Header.
 524                 coder->file_target_pos -= seek_amount;
 525
 526                 if (coder->file_target_pos == 0) {
 527                         // We would seek to the beginning of the file, but
 528                         // since we already decoded that Stream Header in
 529                         // SEQ_MAGIC_BYTES, we can use the cached value from
 530                         // coder->first_header_flags to avoid the seek.
 531                         coder->header_flags = coder->first_header_flags;
 532                         coder->sequence = SEQ_HEADER_COMPARE;
 533                         break;
 534                 }
 535
 536                 coder->sequence = SEQ_HEADER_DECODE;
 537
 538                 // Make coder->file_target_pos point to the end of
 539                 // the Stream Header field.
 540                 coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
 541
 542                 // If coder->temp_size is non-zero, it points to the end
 543                 // of the Index field. Then the beginning of the Index
 544                 // field is at coder->temp[coder->temp_size
 545                 // - coder->footer_flags.backward_size].
 546                 assert(coder->temp_size == 0 || coder->temp_size
 547                                 >= coder->footer_flags.backward_size);
 548
 549                 // If coder->temp contained the whole Index, see if it has
 550                 // enough data to contain also the Stream Header. If so,
 551                 // we avoid an external seek.
 552                 //
 553                 // NOTE: This can happen only with small .xz files and only
 554                 // for the non-first Stream as the Stream Flags of the first
 555                 // Stream are cached and already handled a few lines above.
 556                 // So this isn't as useful as the other seek-avoidance cases.
 557                 if (coder->temp_size != 0 && coder->temp_size
 558                                 - coder->footer_flags.backward_size
 559                                 >= seek_amount) {
 560                         // Make temp_pos and temp_size point to the *end* of
 561                         // Stream Header so that SEQ_HEADER_DECODE will find
 562                         // the start of Stream Header from coder->temp[
 563                         // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
 564                         coder->temp_pos = coder->temp_size
 565                                         - coder->footer_flags.backward_size
 566                                         - seek_amount
 567                                         + LZMA_STREAM_HEADER_SIZE;
 568                         coder->temp_size = coder->temp_pos;
 569                 } else {
 570                         // Seek so that Stream Header will be at the end of
 571                         // coder->temp. With typical multi-Stream files we
 572                         // will usually also get the Stream Footer and Index
 573                         // of the *previous* Stream in coder->temp and thus
 574                         // won't need a separate seek for them.
 575                         return_if_error(reverse_seek(coder,
 576                                         in_start, in_pos, in_size));
 577                 }
 578         }
 579
 580         // Fall through
 581
 582         case SEQ_HEADER_DECODE:
 583                 // Copy the Stream Header field into coder->temp.
 584                 // If Stream Header was already available in coder->temp
 585                 // in SEQ_INDEX_DECODE, then this does nothing.
 586                 if (fill_temp(coder, in, in_pos, in_size))
 587                         return LZMA_OK;
 588
 589                 // Make all these point to the beginning of Stream Header.
 590                 coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
 591                 coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
 592                 coder->temp_pos = coder->temp_size;
 593
 594                 // Decode the Stream Header.
 595                 return_if_error(hide_format_error(lzma_stream_header_decode(
 596                                 &coder->header_flags,
 597                                 coder->temp + coder->temp_size)));
 598
 599                 coder->sequence = SEQ_HEADER_COMPARE;
 600
 601         // Fall through
 602
 603         case SEQ_HEADER_COMPARE:
 604                 // Compare Stream Header against Stream Footer. They must
 605                 // match.
 606                 return_if_error(lzma_stream_flags_compare(
 607                                 &coder->header_flags, &coder->footer_flags));
 608
 609                 // Store the decoded Stream Flags into the Index. Use the
 610                 // Footer Flags because it contains Backward Size, although
 611                 // it shouldn't matter in practice.
 612                 if (lzma_index_stream_flags(coder->this_index,
 613                                 &coder->footer_flags) != LZMA_OK)
 614                         return LZMA_PROG_ERROR;
 615
 616                 // Store also the size of the Stream Padding field. It is
 617                 // needed to calculate the offsets of the Streams correctly.
 618                 if (lzma_index_stream_padding(coder->this_index,
 619                                 coder->stream_padding) != LZMA_OK)
 620                         return LZMA_PROG_ERROR;
 621
 622                 // Reset it so that it's ready for the next Stream.
 623                 coder->stream_padding = 0;
 624
 625                 // Append the earlier decoded Indexes after this_index.
 626                 if (coder->combined_index != NULL)
 627                         return_if_error(lzma_index_cat(coder->this_index,
 628                                         coder->combined_index, allocator));
 629
 630                 coder->combined_index = coder->this_index;
 631                 coder->this_index = NULL;
 632
 633                 // If the whole file was decoded, tell the caller that we
 634                 // are finished.
 635                 if (coder->file_target_pos == 0) {
 636                         // The combined index must indicate the same file
 637                         // size as was told to us at initialization.
 638                         assert(lzma_index_file_size(coder->combined_index)
 639                                         == coder->file_size);
 640
 641                         // Make the combined index available to
 642                         // the application.
 643                         *coder->dest_index = coder->combined_index;
 644                         coder->combined_index = NULL;
 645
 646                         // Mark the input buffer as used since we may have
 647                         // done internal seeking and thus don't know how
 648                         // many input bytes were actually used. This way
 649                         // lzma_stream.total_in gets a slightly better
 650                         // estimate of the amount of input used.
 651                         *in_pos = in_size;
 652                         return LZMA_STREAM_END;
 653                 }
 654
 655                 // We didn't hit the beginning of the file yet, so continue
 656                 // reading backwards in the file. If we have unprocessed
 657                 // data in coder->temp, use it before requesting more data
 658                 // from the application.
 659                 //
 660                 // coder->file_target_pos, coder->temp_size, and
 661                 // coder->temp_pos all point to the beginning of Stream Header
 662                 // and thus the end of the previous Stream in the file.
 663                 coder->sequence = coder->temp_size > 0
 664                                 ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
 665                 break;
 666
 667         default:
 668                 assert(0);
 669                 return LZMA_PROG_ERROR;
 670         }
 671 }
 672
 673
 674 static lzma_ret
 675 file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
 676                 uint64_t *old_memlimit, uint64_t new_memlimit)
 677 {
 678         lzma_file_info_coder *coder = coder_ptr;
 679
 680         // The memory usage calculation comes from three things:
 681         //
 682         // (1) The Indexes that have already been decoded and processed into
 683         //     coder->combined_index.
 684         //
 685         // (2) The latest Index in coder->this_index that has been decoded but
 686         //     not yet put into coder->combined_index.
 687         //
 688         // (3) The latest Index that we have started decoding but haven't
 689         //     finished and thus isn't available in coder->this_index yet.
 690         //     Memory usage and limit information needs to be communicated
 691         //     from/to coder->index_decoder.
 692         //
 693         // Care has to be taken to not do both (2) and (3) when calculating
 694         // the memory usage.
 695         uint64_t combined_index_memusage = 0;
 696         uint64_t this_index_memusage = 0;
 697
 698         // (1) If we have already successfully decoded one or more Indexes,
 699         // get their memory usage.
 700         if (coder->combined_index != NULL)
 701                 combined_index_memusage = lzma_index_memused(
 702                                 coder->combined_index);
 703
 704         // Choose between (2), (3), or neither.
 705         if (coder->this_index != NULL) {
 706                 // (2) The latest Index is available. Use its memory usage.
 707                 this_index_memusage = lzma_index_memused(coder->this_index);
 708
 709         } else if (coder->sequence == SEQ_INDEX_DECODE) {
 710                 // (3) The Index decoder is activate and hasn't yet stored
 711                 // the new index in coder->this_index. Get the memory usage
 712                 // information from the Index decoder.
 713                 //
 714                 // NOTE: If the Index decoder doesn't yet know how much memory
 715                 // it will eventually need, it will return a tiny value here.
 716                 uint64_t dummy;
 717                 if (coder->index_decoder.memconfig(coder->index_decoder.coder,
 718                                         &this_index_memusage, &dummy, 0)
 719                                 != LZMA_OK) {
 720                         assert(0);
 721                         return LZMA_PROG_ERROR;
 722                 }
 723         }
 724
 725         // Now we know the total memory usage/requirement. If we had neither
 726         // old Indexes nor a new Index, this will be zero which isn't
 727         // acceptable as lzma_memusage() has to return non-zero on success
 728         // and even with an empty .xz file we will end up with a lzma_index
 729         // that takes some memory.
 730         *memusage = combined_index_memusage + this_index_memusage;
 731         if (*memusage == 0)
 732                 *memusage = lzma_index_memusage(1, 0);
 733
 734         *old_memlimit = coder->memlimit;
 735
 736         // If requested, set a new memory usage limit.
 737         if (new_memlimit != 0) {
 738                 if (new_memlimit < *memusage)
 739                         return LZMA_MEMLIMIT_ERROR;
 740
 741                 // In the condition (3) we need to tell the Index decoder
 742                 // its new memory usage limit.
 743                 if (coder->this_index == NULL
 744                                 && coder->sequence == SEQ_INDEX_DECODE) {
 745                         const uint64_t idec_new_memlimit = new_memlimit
 746                                         - combined_index_memusage;
 747
 748                         assert(this_index_memusage > 0);
 749                         assert(idec_new_memlimit > 0);
 750
 751                         uint64_t dummy1;
 752                         uint64_t dummy2;
 753
 754                         if (coder->index_decoder.memconfig(
 755                                         coder->index_decoder.coder,
 756                                         &dummy1, &dummy2, idec_new_memlimit)
 757                                         != LZMA_OK) {
 758                                 assert(0);
 759                                 return LZMA_PROG_ERROR;
 760                         }
 761                 }
 762
 763                 coder->memlimit = new_memlimit;
 764         }
 765
 766         return LZMA_OK;
 767 }
 768
 769
 770 static void
 771 file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
 772 {
 773         lzma_file_info_coder *coder = coder_ptr;
 774
 775         lzma_next_end(&coder->index_decoder, allocator);
 776         lzma_index_end(coder->this_index, allocator);
 777         lzma_index_end(coder->combined_index, allocator);
 778
 779         lzma_free(coder, allocator);
 780         return;
 781 }
 782
 783
 784 static lzma_ret
 785 lzma_file_info_decoder_init(lzma_next_coder *next,
 786                 const lzma_allocator *allocator, uint64_t *seek_pos,
 787                 lzma_index **dest_index,
 788                 uint64_t memlimit, uint64_t file_size)
 789 {
 790         lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
 791
 792         if (dest_index == NULL)
 793                 return LZMA_PROG_ERROR;
 794
 795         lzma_file_info_coder *coder = next->coder;
 796         if (coder == NULL) {
 797                 coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
 798                 if (coder == NULL)
 799                         return LZMA_MEM_ERROR;
 800
 801                 next->coder = coder;
 802                 next->code = &file_info_decode;
 803                 next->end = &file_info_decoder_end;
 804                 next->memconfig = &file_info_decoder_memconfig;
 805
 806                 coder->index_decoder = LZMA_NEXT_CODER_INIT;
 807                 coder->this_index = NULL;
 808                 coder->combined_index = NULL;
 809         }
 810
 811         coder->sequence = SEQ_MAGIC_BYTES;
 812         coder->file_cur_pos = 0;
 813         coder->file_target_pos = 0;
 814         coder->file_size = file_size;
 815
 816         lzma_index_end(coder->this_index, allocator);
 817         coder->this_index = NULL;
 818
 819         lzma_index_end(coder->combined_index, allocator);
 820         coder->combined_index = NULL;
 821
 822         coder->stream_padding = 0;
 823
 824         coder->dest_index = dest_index;
 825         coder->external_seek_pos = seek_pos;
 826
 827         // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
 828         // won't return 0 (which would indicate an error).
 829         coder->memlimit = my_max(1, memlimit);
 830
 831         // Prepare these for reading the first Stream Header into coder->temp.
 832         coder->temp_pos = 0;
 833         coder->temp_size = LZMA_STREAM_HEADER_SIZE;
 834
 835         return LZMA_OK;
 836 }
 837
 838
 839 extern LZMA_API(lzma_ret)
 840 lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
 841                 uint64_t memlimit, uint64_t file_size)
 842 {
 843         lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
 844                         dest_index, memlimit, file_size);
 845
 846         // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
 847         // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
 848         // combination in a sane way. Applications still need to be careful
 849         // if they use LZMA_FINISH so that they remember to reset it back
 850         // to LZMA_RUN after seeking if needed.
 851         strm->internal->supported_actions[LZMA_RUN] = true;
 852         strm->internal->supported_actions[LZMA_FINISH] = true;
 853
 854         return LZMA_OK;
 855 }