1 /***************************************************************************
2 * chm_lib.c - CHM archive manipulation routines *
3 * ------------------- *
5 * author: Jed Wing <jedwin@ugcs.caltech.edu> *
7 * notes: These routines are meant for the manipulation of microsoft *
8 * .chm (compiled html help) files, but may likely be used *
9 * for the manipulation of any ITSS archive, if ever ITSS *
10 * archives are used for any other purpose. *
12 * Note also that the section names are statically handled. *
13 * To be entirely correct, the section names should be read *
14 * from the section names meta-file, and then the various *
15 * content sections and the "transforms" to apply to the data *
16 * they contain should be inferred from the section name and *
17 * the meta-files referenced using that name; however, all of *
18 * the files I've been able to get my hands on appear to have *
19 * only two sections: Uncompressed and MSCompressed. *
20 * Additionally, the ITSS.DLL file included with Windows does *
21 * not appear to handle any different transforms than the *
22 * simple LZX-transform. Furthermore, the list of transforms *
23 * to apply is broken, in that only half the required space *
24 * is allocated for the list. (It appears as though the *
25 * space is allocated for ASCII strings, but the strings are *
26 * written as unicode. As a result, only the first half of *
27 * the string appears.) So this is probably not too big of *
28 * a deal, at least until CHM v4 (MS .lit files), which also *
29 * incorporate encryption, of some description. *
31 ***************************************************************************/
33 /***************************************************************************
35 * This library is free software; you can redistribute it and/or
36 * modify it under the terms of the GNU Lesser General Public
37 * License as published by the Free Software Foundation; either
38 * version 2.1 of the License, or (at your option) any later version.
40 * This library is distributed in the hope that it will be useful,
41 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43 * Lesser General Public License for more details.
45 * You should have received a copy of the GNU Lesser General Public
46 * License along with this library; if not, write to the Free Software
47 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
49 ***************************************************************************/
51 /***************************************************************************
53 * Adapted for Wine by Mike McCormack *
55 ***************************************************************************/
70 #define CHM_ACQUIRE_LOCK(a) do { \
71 EnterCriticalSection(&(a)); \
73 #define CHM_RELEASE_LOCK(a) do { \
74 LeaveCriticalSection(&(a)); \
77 #define CHM_NULL_FD (INVALID_HANDLE_VALUE)
78 #define CHM_CLOSE_FILE(fd) CloseHandle((fd))
81 * defines related to tuning
83 #ifndef CHM_MAX_BLOCKS_CACHED
84 #define CHM_MAX_BLOCKS_CACHED 5
86 #define CHM_PARAM_MAX_BLOCKS_CACHED 0
89 * architecture specific defines
91 * Note: as soon as C99 is more widespread, the below defines should
92 * probably just use the C99 sized-int types.
94 * The following settings will probably work for many platforms. The sizes
95 * don't have to be exactly correct, but the types must accommodate at least as
96 * many bits as they specify.
99 /* i386, 32-bit, Windows */
102 typedef USHORT UInt16
;
104 typedef DWORD UInt32
;
105 typedef LONGLONG Int64
;
106 typedef ULONGLONG UInt64
;
108 /* utilities for unmarshalling data */
109 static BOOL
_unmarshal_char_array(unsigned char **pData
,
110 unsigned int *pLenRemain
,
114 if (count
<= 0 || (unsigned int)count
> *pLenRemain
)
116 memcpy(dest
, (*pData
), count
);
118 *pLenRemain
-= count
;
122 static BOOL
_unmarshal_uchar_array(unsigned char **pData
,
123 unsigned int *pLenRemain
,
127 if (count
<= 0 || (unsigned int)count
> *pLenRemain
)
129 memcpy(dest
, (*pData
), count
);
131 *pLenRemain
-= count
;
135 static BOOL
_unmarshal_int32(unsigned char **pData
,
136 unsigned int *pLenRemain
,
141 *dest
= (*pData
)[0] | (*pData
)[1]<<8 | (*pData
)[2]<<16 | (*pData
)[3]<<24;
147 static BOOL
_unmarshal_uint32(unsigned char **pData
,
148 unsigned int *pLenRemain
,
153 *dest
= (*pData
)[0] | (*pData
)[1]<<8 | (*pData
)[2]<<16 | (*pData
)[3]<<24;
159 static BOOL
_unmarshal_int64(unsigned char **pData
,
160 unsigned int *pLenRemain
,
171 temp
|= (*pData
)[i
-1];
179 static BOOL
_unmarshal_uint64(unsigned char **pData
,
180 unsigned int *pLenRemain
,
191 temp
|= (*pData
)[i
-1];
199 static BOOL
_unmarshal_uuid(unsigned char **pData
,
200 unsigned int *pDataLen
,
203 return _unmarshal_uchar_array(pData
, pDataLen
, dest
, 16);
207 * structures local to this module
210 /* structure of ITSF headers */
211 #define _CHM_ITSF_V2_LEN (0x58)
212 #define _CHM_ITSF_V3_LEN (0x60)
215 char signature
[4]; /* 0 (ITSF) */
216 Int32 version
; /* 4 */
217 Int32 header_len
; /* 8 */
218 Int32 unknown_000c
; /* c */
219 UInt32 last_modified
; /* 10 */
220 UInt32 lang_id
; /* 14 */
221 UChar dir_uuid
[16]; /* 18 */
222 UChar stream_uuid
[16]; /* 28 */
223 UInt64 unknown_offset
; /* 38 */
224 UInt64 unknown_len
; /* 40 */
225 UInt64 dir_offset
; /* 48 */
226 UInt64 dir_len
; /* 50 */
227 UInt64 data_offset
; /* 58 (Not present before V3) */
228 }; /* __attribute__ ((aligned (1))); */
230 static BOOL
_unmarshal_itsf_header(unsigned char **pData
,
231 unsigned int *pDataLen
,
232 struct chmItsfHeader
*dest
)
234 /* we only know how to deal with the 0x58 and 0x60 byte structures */
235 if (*pDataLen
!= _CHM_ITSF_V2_LEN
&& *pDataLen
!= _CHM_ITSF_V3_LEN
)
238 /* unmarshal common fields */
239 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
240 _unmarshal_int32 (pData
, pDataLen
, &dest
->version
);
241 _unmarshal_int32 (pData
, pDataLen
, &dest
->header_len
);
242 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_000c
);
243 _unmarshal_uint32 (pData
, pDataLen
, &dest
->last_modified
);
244 _unmarshal_uint32 (pData
, pDataLen
, &dest
->lang_id
);
245 _unmarshal_uuid (pData
, pDataLen
, dest
->dir_uuid
);
246 _unmarshal_uuid (pData
, pDataLen
, dest
->stream_uuid
);
247 _unmarshal_uint64 (pData
, pDataLen
, &dest
->unknown_offset
);
248 _unmarshal_uint64 (pData
, pDataLen
, &dest
->unknown_len
);
249 _unmarshal_uint64 (pData
, pDataLen
, &dest
->dir_offset
);
250 _unmarshal_uint64 (pData
, pDataLen
, &dest
->dir_len
);
252 /* error check the data */
253 /* XXX: should also check UUIDs, probably, though with a version 3 file,
254 * current MS tools do not seem to use them.
256 if (memcmp(dest
->signature
, "ITSF", 4) != 0)
258 if (dest
->version
== 2)
260 if (dest
->header_len
< _CHM_ITSF_V2_LEN
)
263 else if (dest
->version
== 3)
265 if (dest
->header_len
< _CHM_ITSF_V3_LEN
)
271 /* now, if we have a V3 structure, unmarshal the rest.
272 * otherwise, compute it
274 if (dest
->version
== 3)
277 _unmarshal_uint64(pData
, pDataLen
, &dest
->data_offset
);
282 dest
->data_offset
= dest
->dir_offset
+ dest
->dir_len
;
287 /* structure of ITSP headers */
288 #define _CHM_ITSP_V1_LEN (0x54)
291 char signature
[4]; /* 0 (ITSP) */
292 Int32 version
; /* 4 */
293 Int32 header_len
; /* 8 */
294 Int32 unknown_000c
; /* c */
295 UInt32 block_len
; /* 10 */
296 Int32 blockidx_intvl
; /* 14 */
297 Int32 index_depth
; /* 18 */
298 Int32 index_root
; /* 1c */
299 Int32 index_head
; /* 20 */
300 Int32 unknown_0024
; /* 24 */
301 UInt32 num_blocks
; /* 28 */
302 Int32 unknown_002c
; /* 2c */
303 UInt32 lang_id
; /* 30 */
304 UChar system_uuid
[16]; /* 34 */
305 UChar unknown_0044
[16]; /* 44 */
306 }; /* __attribute__ ((aligned (1))); */
308 static BOOL
_unmarshal_itsp_header(unsigned char **pData
,
309 unsigned int *pDataLen
,
310 struct chmItspHeader
*dest
)
312 /* we only know how to deal with a 0x54 byte structures */
313 if (*pDataLen
!= _CHM_ITSP_V1_LEN
)
316 /* unmarshal fields */
317 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
318 _unmarshal_int32 (pData
, pDataLen
, &dest
->version
);
319 _unmarshal_int32 (pData
, pDataLen
, &dest
->header_len
);
320 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_000c
);
321 _unmarshal_uint32 (pData
, pDataLen
, &dest
->block_len
);
322 _unmarshal_int32 (pData
, pDataLen
, &dest
->blockidx_intvl
);
323 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_depth
);
324 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_root
);
325 _unmarshal_int32 (pData
, pDataLen
, &dest
->index_head
);
326 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_0024
);
327 _unmarshal_uint32 (pData
, pDataLen
, &dest
->num_blocks
);
328 _unmarshal_int32 (pData
, pDataLen
, &dest
->unknown_002c
);
329 _unmarshal_uint32 (pData
, pDataLen
, &dest
->lang_id
);
330 _unmarshal_uuid (pData
, pDataLen
, dest
->system_uuid
);
331 _unmarshal_uchar_array(pData
, pDataLen
, dest
->unknown_0044
, 16);
333 /* error check the data */
334 if (memcmp(dest
->signature
, "ITSP", 4) != 0)
336 if (dest
->version
!= 1)
338 if (dest
->header_len
!= _CHM_ITSP_V1_LEN
)
344 /* structure of PMGL headers */
345 static const char _chm_pmgl_marker
[4] = "PMGL";
346 #define _CHM_PMGL_LEN (0x14)
349 char signature
[4]; /* 0 (PMGL) */
350 UInt32 free_space
; /* 4 */
351 UInt32 unknown_0008
; /* 8 */
352 Int32 block_prev
; /* c */
353 Int32 block_next
; /* 10 */
354 }; /* __attribute__ ((aligned (1))); */
356 static BOOL
_unmarshal_pmgl_header(unsigned char **pData
,
357 unsigned int *pDataLen
,
358 struct chmPmglHeader
*dest
)
360 /* we only know how to deal with a 0x14 byte structures */
361 if (*pDataLen
!= _CHM_PMGL_LEN
)
364 /* unmarshal fields */
365 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
366 _unmarshal_uint32 (pData
, pDataLen
, &dest
->free_space
);
367 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown_0008
);
368 _unmarshal_int32 (pData
, pDataLen
, &dest
->block_prev
);
369 _unmarshal_int32 (pData
, pDataLen
, &dest
->block_next
);
371 /* check structure */
372 if (memcmp(dest
->signature
, _chm_pmgl_marker
, 4) != 0)
378 /* structure of PMGI headers */
379 static const char _chm_pmgi_marker
[4] = "PMGI";
380 #define _CHM_PMGI_LEN (0x08)
383 char signature
[4]; /* 0 (PMGI) */
384 UInt32 free_space
; /* 4 */
385 }; /* __attribute__ ((aligned (1))); */
387 static BOOL
_unmarshal_pmgi_header(unsigned char **pData
,
388 unsigned int *pDataLen
,
389 struct chmPmgiHeader
*dest
)
391 /* we only know how to deal with a 0x8 byte structures */
392 if (*pDataLen
!= _CHM_PMGI_LEN
)
395 /* unmarshal fields */
396 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
397 _unmarshal_uint32 (pData
, pDataLen
, &dest
->free_space
);
399 /* check structure */
400 if (memcmp(dest
->signature
, _chm_pmgi_marker
, 4) != 0)
406 /* structure of LZXC reset table */
407 #define _CHM_LZXC_RESETTABLE_V1_LEN (0x28)
408 struct chmLzxcResetTable
414 UInt64 uncompressed_len
;
415 UInt64 compressed_len
;
417 }; /* __attribute__ ((aligned (1))); */
419 static BOOL
_unmarshal_lzxc_reset_table(unsigned char **pData
,
420 unsigned int *pDataLen
,
421 struct chmLzxcResetTable
*dest
)
423 /* we only know how to deal with a 0x28 byte structures */
424 if (*pDataLen
!= _CHM_LZXC_RESETTABLE_V1_LEN
)
427 /* unmarshal fields */
428 _unmarshal_uint32 (pData
, pDataLen
, &dest
->version
);
429 _unmarshal_uint32 (pData
, pDataLen
, &dest
->block_count
);
430 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown
);
431 _unmarshal_uint32 (pData
, pDataLen
, &dest
->table_offset
);
432 _unmarshal_uint64 (pData
, pDataLen
, &dest
->uncompressed_len
);
433 _unmarshal_uint64 (pData
, pDataLen
, &dest
->compressed_len
);
434 _unmarshal_uint64 (pData
, pDataLen
, &dest
->block_len
);
436 /* check structure */
437 if (dest
->version
!= 2)
443 /* structure of LZXC control data block */
444 #define _CHM_LZXC_MIN_LEN (0x18)
445 #define _CHM_LZXC_V2_LEN (0x1c)
446 struct chmLzxcControlData
449 char signature
[4]; /* 4 (LZXC) */
450 UInt32 version
; /* 8 */
451 UInt32 resetInterval
; /* c */
452 UInt32 windowSize
; /* 10 */
453 UInt32 windowsPerReset
; /* 14 */
454 UInt32 unknown_18
; /* 18 */
457 static BOOL
_unmarshal_lzxc_control_data(unsigned char **pData
,
458 unsigned int *pDataLen
,
459 struct chmLzxcControlData
*dest
)
461 /* we want at least 0x18 bytes */
462 if (*pDataLen
< _CHM_LZXC_MIN_LEN
)
465 /* unmarshal fields */
466 _unmarshal_uint32 (pData
, pDataLen
, &dest
->size
);
467 _unmarshal_char_array(pData
, pDataLen
, dest
->signature
, 4);
468 _unmarshal_uint32 (pData
, pDataLen
, &dest
->version
);
469 _unmarshal_uint32 (pData
, pDataLen
, &dest
->resetInterval
);
470 _unmarshal_uint32 (pData
, pDataLen
, &dest
->windowSize
);
471 _unmarshal_uint32 (pData
, pDataLen
, &dest
->windowsPerReset
);
473 if (*pDataLen
>= _CHM_LZXC_V2_LEN
)
474 _unmarshal_uint32 (pData
, pDataLen
, &dest
->unknown_18
);
476 dest
->unknown_18
= 0;
478 if (dest
->version
== 2)
480 dest
->resetInterval
*= 0x8000;
481 dest
->windowSize
*= 0x8000;
483 if (dest
->windowSize
== 0 || dest
->resetInterval
== 0)
486 /* for now, only support resetInterval a multiple of windowSize/2 */
487 if (dest
->windowSize
== 1)
489 if ((dest
->resetInterval
% (dest
->windowSize
/2)) != 0)
492 /* check structure */
493 if (memcmp(dest
->signature
, "LZXC", 4) != 0)
499 /* the structure used for chm file handles */
504 CRITICAL_SECTION mutex
;
505 CRITICAL_SECTION lzx_mutex
;
506 CRITICAL_SECTION cache_mutex
;
516 struct chmUnitInfo rt_unit
;
517 struct chmUnitInfo cn_unit
;
518 struct chmLzxcResetTable reset_table
;
520 /* LZX control data */
521 int compression_enabled
;
523 UInt32 reset_interval
;
524 UInt32 reset_blkcount
;
526 /* decompressor state */
527 struct LZXstate
*lzx_state
;
530 /* cache for decompressed blocks */
531 UChar
**cache_blocks
;
532 Int64
*cache_block_indices
;
533 Int32 cache_num_blocks
;
537 * utility functions local to this module
540 /* utility function to handle differences between {pread,read}(64)? */
541 static Int64
_chm_fetch_bytes(struct chmFile
*h
,
547 if (h
->fd
== CHM_NULL_FD
)
550 CHM_ACQUIRE_LOCK(h
->mutex
);
551 /* NOTE: this might be better done with CreateFileMapping, et cetera... */
553 LARGE_INTEGER old_pos
, new_pos
;
556 /* awkward Win32 Seek/Tell */
557 new_pos
.QuadPart
= 0;
558 SetFilePointerEx( h
->fd
, new_pos
, &old_pos
, FILE_CURRENT
);
559 new_pos
.QuadPart
= os
;
560 SetFilePointerEx( h
->fd
, new_pos
, NULL
, FILE_BEGIN
);
572 /* restore original position */
573 SetFilePointerEx( h
->fd
, old_pos
, NULL
, FILE_BEGIN
);
575 CHM_RELEASE_LOCK(h
->mutex
);
580 * set a parameter on the file handle.
581 * valid parameter types:
582 * CHM_PARAM_MAX_BLOCKS_CACHED:
583 * how many decompressed blocks should be cached? A simple
584 * caching scheme is used, wherein the index of the block is
585 * used as a hash value, and hash collision results in the
586 * invalidation of the previously cached block.
588 static void chm_set_param(struct chmFile
*h
,
594 case CHM_PARAM_MAX_BLOCKS_CACHED
:
595 CHM_ACQUIRE_LOCK(h
->cache_mutex
);
596 if (paramVal
!= h
->cache_num_blocks
)
602 /* allocate new cached blocks */
603 newBlocks
= HeapAlloc(GetProcessHeap(), 0, paramVal
* sizeof (UChar
*));
604 newIndices
= HeapAlloc(GetProcessHeap(), 0, paramVal
* sizeof (UInt64
));
605 for (i
=0; i
<paramVal
; i
++)
611 /* re-distribute old cached blocks */
614 for (i
=0; i
<h
->cache_num_blocks
; i
++)
616 int newSlot
= (int)(h
->cache_block_indices
[i
] % paramVal
);
618 if (h
->cache_blocks
[i
])
620 /* in case of collision, destroy newcomer */
621 if (newBlocks
[newSlot
])
623 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
[i
]);
624 h
->cache_blocks
[i
] = NULL
;
628 newBlocks
[newSlot
] = h
->cache_blocks
[i
];
629 newIndices
[newSlot
] =
630 h
->cache_block_indices
[i
];
635 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
);
636 HeapFree(GetProcessHeap(), 0, h
->cache_block_indices
);
639 /* now, set new values */
640 h
->cache_blocks
= newBlocks
;
641 h
->cache_block_indices
= newIndices
;
642 h
->cache_num_blocks
= paramVal
;
644 CHM_RELEASE_LOCK(h
->cache_mutex
);
652 /* open an ITS archive */
653 struct chmFile
*chm_openW(const WCHAR
*filename
)
655 unsigned char sbuffer
[256];
656 unsigned int sremain
;
657 unsigned char *sbufpos
;
658 struct chmFile
*newHandle
=NULL
;
659 struct chmItsfHeader itsfHeader
;
660 struct chmItspHeader itspHeader
;
662 struct chmUnitInfo uiSpan
;
664 struct chmUnitInfo uiLzxc
;
665 struct chmLzxcControlData ctlData
;
667 /* allocate handle */
668 newHandle
= HeapAlloc(GetProcessHeap(), 0, sizeof(struct chmFile
));
669 newHandle
->fd
= CHM_NULL_FD
;
670 newHandle
->lzx_state
= NULL
;
671 newHandle
->cache_blocks
= NULL
;
672 newHandle
->cache_block_indices
= NULL
;
673 newHandle
->cache_num_blocks
= 0;
676 if ((newHandle
->fd
=CreateFileW(filename
,
681 FILE_ATTRIBUTE_NORMAL
,
682 NULL
)) == CHM_NULL_FD
)
684 HeapFree(GetProcessHeap(), 0, newHandle
);
688 /* initialize mutexes, if needed */
689 InitializeCriticalSection(&newHandle
->mutex
);
690 newHandle
->mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.mutex");
691 InitializeCriticalSection(&newHandle
->lzx_mutex
);
692 newHandle
->lzx_mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.lzx_mutex");
693 InitializeCriticalSection(&newHandle
->cache_mutex
);
694 newHandle
->cache_mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.cache_mutex");
696 /* read and verify header */
697 sremain
= _CHM_ITSF_V3_LEN
;
699 if (_chm_fetch_bytes(newHandle
, sbuffer
, 0, sremain
) != sremain
||
700 !_unmarshal_itsf_header(&sbufpos
, &sremain
, &itsfHeader
))
702 chm_close(newHandle
);
706 /* stash important values from header */
707 newHandle
->dir_offset
= itsfHeader
.dir_offset
;
708 newHandle
->dir_len
= itsfHeader
.dir_len
;
709 newHandle
->data_offset
= itsfHeader
.data_offset
;
711 /* now, read and verify the directory header chunk */
712 sremain
= _CHM_ITSP_V1_LEN
;
714 if (_chm_fetch_bytes(newHandle
, sbuffer
,
715 itsfHeader
.dir_offset
, sremain
) != sremain
||
716 !_unmarshal_itsp_header(&sbufpos
, &sremain
, &itspHeader
))
718 chm_close(newHandle
);
722 /* grab essential information from ITSP header */
723 newHandle
->dir_offset
+= itspHeader
.header_len
;
724 newHandle
->dir_len
-= itspHeader
.header_len
;
725 newHandle
->index_root
= itspHeader
.index_root
;
726 newHandle
->index_head
= itspHeader
.index_head
;
727 newHandle
->block_len
= itspHeader
.block_len
;
729 /* if the index root is -1, this means we don't have any PMGI blocks.
730 * as a result, we must use the sole PMGL block as the index root
732 if (newHandle
->index_root
== -1)
733 newHandle
->index_root
= newHandle
->index_head
;
735 /* initialize cache */
736 chm_set_param(newHandle
, CHM_PARAM_MAX_BLOCKS_CACHED
,
737 CHM_MAX_BLOCKS_CACHED
);
739 /* By default, compression is enabled. */
740 newHandle
->compression_enabled
= 1;
742 /* prefetch most commonly needed unit infos */
743 if (CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
744 L
"::DataSpace/Storage/MSCompressed/Transform/"
745 "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/"
746 "InstanceData/ResetTable",
747 &newHandle
->rt_unit
) ||
748 newHandle
->rt_unit
.space
== CHM_COMPRESSED
||
749 CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
750 L
"::DataSpace/Storage/MSCompressed/Content",
751 &newHandle
->cn_unit
) ||
752 newHandle
->cn_unit
.space
== CHM_COMPRESSED
||
753 CHM_RESOLVE_SUCCESS
!= chm_resolve_object(newHandle
,
754 L
"::DataSpace/Storage/MSCompressed/ControlData",
756 uiLzxc
.space
== CHM_COMPRESSED
)
758 newHandle
->compression_enabled
= 0;
761 /* read reset table info */
762 if (newHandle
->compression_enabled
)
764 sremain
= _CHM_LZXC_RESETTABLE_V1_LEN
;
766 if (chm_retrieve_object(newHandle
, &newHandle
->rt_unit
, sbuffer
,
767 0, sremain
) != sremain
||
768 !_unmarshal_lzxc_reset_table(&sbufpos
, &sremain
,
769 &newHandle
->reset_table
))
771 newHandle
->compression_enabled
= 0;
775 /* read control data */
776 if (newHandle
->compression_enabled
)
778 sremain
= (unsigned long)uiLzxc
.length
;
780 if (chm_retrieve_object(newHandle
, &uiLzxc
, sbuffer
,
781 0, sremain
) != sremain
||
782 !_unmarshal_lzxc_control_data(&sbufpos
, &sremain
,
785 newHandle
->compression_enabled
= 0;
789 newHandle
->window_size
= ctlData
.windowSize
;
790 newHandle
->reset_interval
= ctlData
.resetInterval
;
791 /* Jed, Mon Jun 28: Experimentally, it appears that the reset block count */
792 /* must be multiplied by this formerly unknown ctrl data field in */
793 /* order to decompress some files. */
794 newHandle
->reset_blkcount
= newHandle
->reset_interval
/ (newHandle
->window_size
/ 2) *
795 ctlData
.windowsPerReset
;
802 /* Duplicate an ITS archive handle */
803 struct chmFile
*chm_dup(struct chmFile
*oldHandle
)
805 struct chmFile
*newHandle
=NULL
;
807 newHandle
= HeapAlloc(GetProcessHeap(), 0, sizeof(struct chmFile
));
808 *newHandle
= *oldHandle
;
810 /* duplicate fd handle */
811 DuplicateHandle(GetCurrentProcess(), oldHandle
->fd
,
812 GetCurrentProcess(), &(newHandle
->fd
),
813 0, FALSE
, DUPLICATE_SAME_ACCESS
);
814 newHandle
->lzx_state
= NULL
;
815 newHandle
->cache_blocks
= NULL
;
816 newHandle
->cache_block_indices
= NULL
;
817 newHandle
->cache_num_blocks
= 0;
819 /* initialize mutexes, if needed */
820 InitializeCriticalSection(&newHandle
->mutex
);
821 newHandle
->mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.mutex");
822 InitializeCriticalSection(&newHandle
->lzx_mutex
);
823 newHandle
->lzx_mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.lzx_mutex");
824 InitializeCriticalSection(&newHandle
->cache_mutex
);
825 newHandle
->cache_mutex
.DebugInfo
->Spare
[0] = (DWORD_PTR
)(__FILE__
": chmFile.cache_mutex");
827 /* initialize cache */
828 chm_set_param(newHandle
, CHM_PARAM_MAX_BLOCKS_CACHED
,
829 CHM_MAX_BLOCKS_CACHED
);
834 /* close an ITS archive */
835 void chm_close(struct chmFile
*h
)
839 if (h
->fd
!= CHM_NULL_FD
)
840 CHM_CLOSE_FILE(h
->fd
);
843 h
->mutex
.DebugInfo
->Spare
[0] = 0;
844 DeleteCriticalSection(&h
->mutex
);
845 h
->lzx_mutex
.DebugInfo
->Spare
[0] = 0;
846 DeleteCriticalSection(&h
->lzx_mutex
);
847 h
->cache_mutex
.DebugInfo
->Spare
[0] = 0;
848 DeleteCriticalSection(&h
->cache_mutex
);
851 LZXteardown(h
->lzx_state
);
857 for (i
=0; i
<h
->cache_num_blocks
; i
++)
859 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
[i
]);
861 HeapFree(GetProcessHeap(), 0, h
->cache_blocks
);
862 h
->cache_blocks
= NULL
;
865 HeapFree(GetProcessHeap(), 0, h
->cache_block_indices
);
866 h
->cache_block_indices
= NULL
;
868 HeapFree(GetProcessHeap(), 0, h
);
873 * helper methods for chm_resolve_object
876 /* skip a compressed dword */
877 static void _chm_skip_cword(UChar
**pEntry
)
879 while (*(*pEntry
)++ >= 0x80)
883 /* skip the data from a PMGL entry */
884 static void _chm_skip_PMGL_entry_data(UChar
**pEntry
)
886 _chm_skip_cword(pEntry
);
887 _chm_skip_cword(pEntry
);
888 _chm_skip_cword(pEntry
);
891 /* parse a compressed dword */
892 static UInt64
_chm_parse_cword(UChar
**pEntry
)
896 while ((temp
=*(*pEntry
)++) >= 0x80)
899 accum
+= temp
& 0x7f;
902 return (accum
<< 7) + temp
;
905 /* parse a utf-8 string into an ASCII char buffer */
906 static BOOL
_chm_parse_UTF8(UChar
**pEntry
, UInt64 count
, WCHAR
*path
)
908 DWORD length
= MultiByteToWideChar(CP_UTF8
, 0, (char *)*pEntry
, count
, path
, CHM_MAX_PATHLEN
);
914 /* parse a PMGL entry into a chmUnitInfo struct; return 1 on success. */
915 static BOOL
_chm_parse_PMGL_entry(UChar
**pEntry
, struct chmUnitInfo
*ui
)
920 strLen
= _chm_parse_cword(pEntry
);
921 if (strLen
> CHM_MAX_PATHLEN
)
925 if (! _chm_parse_UTF8(pEntry
, strLen
, ui
->path
))
929 ui
->space
= (int)_chm_parse_cword(pEntry
);
930 ui
->start
= _chm_parse_cword(pEntry
);
931 ui
->length
= _chm_parse_cword(pEntry
);
935 /* find an exact entry in PMGL; return NULL if we fail */
936 static UChar
*_chm_find_in_PMGL(UChar
*page_buf
,
938 const WCHAR
*objPath
)
940 /* XXX: modify this to do a binary search using the nice index structure
941 * that is provided for us.
943 struct chmPmglHeader header
;
949 WCHAR buffer
[CHM_MAX_PATHLEN
+1];
951 /* figure out where to start and end */
953 hremain
= _CHM_PMGL_LEN
;
954 if (! _unmarshal_pmgl_header(&cur
, &hremain
, &header
))
956 end
= page_buf
+ block_len
- (header
.free_space
);
958 /* now, scan progressively */
963 strLen
= _chm_parse_cword(&cur
);
964 if (! _chm_parse_UTF8(&cur
, strLen
, buffer
))
967 /* check if it is the right name */
968 if (! wcsicmp(buffer
, objPath
))
971 _chm_skip_PMGL_entry_data(&cur
);
977 /* find which block should be searched next for the entry; -1 if no block */
978 static Int32
_chm_find_in_PMGI(UChar
*page_buf
,
980 const WCHAR
*objPath
)
982 /* XXX: modify this to do a binary search using the nice index structure
983 * that is provided for us
985 struct chmPmgiHeader header
;
991 WCHAR buffer
[CHM_MAX_PATHLEN
+1];
993 /* figure out where to start and end */
995 hremain
= _CHM_PMGI_LEN
;
996 if (! _unmarshal_pmgi_header(&cur
, &hremain
, &header
))
998 end
= page_buf
+ block_len
- (header
.free_space
);
1000 /* now, scan progressively */
1004 strLen
= _chm_parse_cword(&cur
);
1005 if (! _chm_parse_UTF8(&cur
, strLen
, buffer
))
1008 /* check if it is the right name */
1009 if (wcsicmp(buffer
, objPath
) > 0)
1012 /* load next value for path */
1013 page
= (int)_chm_parse_cword(&cur
);
1019 /* resolve a particular object from the archive */
1020 int chm_resolve_object(struct chmFile
*h
,
1021 const WCHAR
*objPath
,
1022 struct chmUnitInfo
*ui
)
1025 * XXX: implement caching scheme for dir pages
1030 /* buffer to hold whatever page we're looking at */
1031 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, h
->block_len
);
1034 curPage
= h
->index_root
;
1036 /* until we have either returned or given up */
1037 while (curPage
!= -1)
1040 /* try to fetch the index page */
1041 if (_chm_fetch_bytes(h
, page_buf
,
1042 h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1043 h
->block_len
) != h
->block_len
)
1045 HeapFree(GetProcessHeap(), 0, page_buf
);
1046 return CHM_RESOLVE_FAILURE
;
1049 /* now, if it is a leaf node: */
1050 if (memcmp(page_buf
, _chm_pmgl_marker
, 4) == 0)
1053 UChar
*pEntry
= _chm_find_in_PMGL(page_buf
,
1058 HeapFree(GetProcessHeap(), 0, page_buf
);
1059 return CHM_RESOLVE_FAILURE
;
1062 /* parse entry and return */
1063 _chm_parse_PMGL_entry(&pEntry
, ui
);
1064 HeapFree(GetProcessHeap(), 0, page_buf
);
1065 return CHM_RESOLVE_SUCCESS
;
1068 /* else, if it is a branch node: */
1069 else if (memcmp(page_buf
, _chm_pmgi_marker
, 4) == 0)
1070 curPage
= _chm_find_in_PMGI(page_buf
, h
->block_len
, objPath
);
1072 /* else, we are confused. give up. */
1075 HeapFree(GetProcessHeap(), 0, page_buf
);
1076 return CHM_RESOLVE_FAILURE
;
1080 /* didn't find anything. fail. */
1081 HeapFree(GetProcessHeap(), 0, page_buf
);
1082 return CHM_RESOLVE_FAILURE
;
1086 * utility methods for dealing with compressed data
1089 /* get the bounds of a compressed block. Returns FALSE on failure */
1090 static BOOL
_chm_get_cmpblock_bounds(struct chmFile
*h
,
1095 UChar buffer
[8], *dummy
;
1098 /* for all but the last block, use the reset table */
1099 if (block
< h
->reset_table
.block_count
-1)
1101 /* unpack the start address */
1104 if (_chm_fetch_bytes(h
, buffer
,
1107 + h
->reset_table
.table_offset
1109 remain
) != remain
||
1110 !_unmarshal_uint64(&dummy
, &remain
, start
))
1113 /* unpack the end address */
1116 if (_chm_fetch_bytes(h
, buffer
,
1119 + h
->reset_table
.table_offset
1121 remain
) != remain
||
1122 !_unmarshal_int64(&dummy
, &remain
, len
))
1126 /* for the last block, use the span in addition to the reset table */
1129 /* unpack the start address */
1132 if (_chm_fetch_bytes(h
, buffer
,
1135 + h
->reset_table
.table_offset
1137 remain
) != remain
||
1138 !_unmarshal_uint64(&dummy
, &remain
, start
))
1141 *len
= h
->reset_table
.compressed_len
;
1144 /* compute the length and absolute start address */
1146 *start
+= h
->data_offset
+ h
->cn_unit
.start
;
1151 /* decompress the block. must have lzx_mutex. */
1152 static Int64
_chm_decompress_block(struct chmFile
*h
,
1156 UChar
*cbuffer
= HeapAlloc( GetProcessHeap(), 0,
1157 ((unsigned int)h
->reset_table
.block_len
+ 6144));
1158 UInt64 cmpStart
; /* compressed start */
1159 Int64 cmpLen
; /* compressed len */
1160 int indexSlot
; /* cache index slot */
1161 UChar
*lbuffer
; /* local buffer ptr */
1162 UInt32 blockAlign
= (UInt32
)(block
% h
->reset_blkcount
); /* reset interval align */
1163 UInt32 i
; /* local loop index */
1165 /* let the caching system pull its weight! */
1166 if (block
- blockAlign
<= h
->lzx_last_block
&&
1167 block
>= h
->lzx_last_block
)
1168 blockAlign
= (block
- h
->lzx_last_block
);
1170 /* check if we need previous blocks */
1171 if (blockAlign
!= 0)
1173 /* fetch all required previous blocks since last reset */
1174 for (i
= blockAlign
; i
> 0; i
--)
1176 UInt32 curBlockIdx
= block
- i
;
1178 /* check if we most recently decompressed the previous block */
1179 if (h
->lzx_last_block
!= curBlockIdx
)
1181 if ((curBlockIdx
% h
->reset_blkcount
) == 0)
1184 fprintf(stderr
, "***RESET (1)***\n");
1186 LZXreset(h
->lzx_state
);
1189 indexSlot
= (int)((curBlockIdx
) % h
->cache_num_blocks
);
1190 h
->cache_block_indices
[indexSlot
] = curBlockIdx
;
1191 if (! h
->cache_blocks
[indexSlot
])
1192 h
->cache_blocks
[indexSlot
] =
1193 HeapAlloc(GetProcessHeap(), 0,
1194 (unsigned int)(h
->reset_table
.block_len
));
1195 lbuffer
= h
->cache_blocks
[indexSlot
];
1197 /* decompress the previous block */
1199 fprintf(stderr
, "Decompressing block #%4d (EXTRA)\n", curBlockIdx
);
1201 if (!_chm_get_cmpblock_bounds(h
, curBlockIdx
, &cmpStart
, &cmpLen
) ||
1202 _chm_fetch_bytes(h
, cbuffer
, cmpStart
, cmpLen
) != cmpLen
||
1203 LZXdecompress(h
->lzx_state
, cbuffer
, lbuffer
, (int)cmpLen
,
1204 (int)h
->reset_table
.block_len
) != DECR_OK
)
1207 fprintf(stderr
, " (DECOMPRESS FAILED!)\n");
1209 HeapFree(GetProcessHeap(), 0, cbuffer
);
1213 h
->lzx_last_block
= (int)curBlockIdx
;
1219 if ((block
% h
->reset_blkcount
) == 0)
1222 fprintf(stderr
, "***RESET (2)***\n");
1224 LZXreset(h
->lzx_state
);
1228 /* allocate slot in cache */
1229 indexSlot
= (int)(block
% h
->cache_num_blocks
);
1230 h
->cache_block_indices
[indexSlot
] = block
;
1231 if (! h
->cache_blocks
[indexSlot
])
1232 h
->cache_blocks
[indexSlot
] =
1233 HeapAlloc(GetProcessHeap(), 0, ((unsigned int)h
->reset_table
.block_len
));
1234 lbuffer
= h
->cache_blocks
[indexSlot
];
1237 /* decompress the block we actually want */
1239 fprintf(stderr
, "Decompressing block #%4d (REAL )\n", block
);
1241 if (! _chm_get_cmpblock_bounds(h
, block
, &cmpStart
, &cmpLen
) ||
1242 _chm_fetch_bytes(h
, cbuffer
, cmpStart
, cmpLen
) != cmpLen
||
1243 LZXdecompress(h
->lzx_state
, cbuffer
, lbuffer
, (int)cmpLen
,
1244 (int)h
->reset_table
.block_len
) != DECR_OK
)
1247 fprintf(stderr
, " (DECOMPRESS FAILED!)\n");
1249 HeapFree(GetProcessHeap(), 0, cbuffer
);
1252 h
->lzx_last_block
= (int)block
;
1254 /* XXX: modify LZX routines to return the length of the data they
1255 * decompressed and return that instead, for an extra sanity check.
1257 HeapFree(GetProcessHeap(), 0, cbuffer
);
1258 return h
->reset_table
.block_len
;
1261 /* grab a region from a compressed block */
1262 static Int64
_chm_decompress_region(struct chmFile
*h
,
1267 UInt64 nBlock
, nOffset
;
1270 UChar
*ubuffer
= NULL
;
1275 /* figure out what we need to read */
1276 nBlock
= start
/ h
->reset_table
.block_len
;
1277 nOffset
= start
% h
->reset_table
.block_len
;
1279 if (nLen
> (h
->reset_table
.block_len
- nOffset
))
1280 nLen
= h
->reset_table
.block_len
- nOffset
;
1282 /* if block is cached, return data from it. */
1283 CHM_ACQUIRE_LOCK(h
->lzx_mutex
);
1284 CHM_ACQUIRE_LOCK(h
->cache_mutex
);
1285 if (h
->cache_block_indices
[nBlock
% h
->cache_num_blocks
] == nBlock
&&
1286 h
->cache_blocks
[nBlock
% h
->cache_num_blocks
] != NULL
)
1289 h
->cache_blocks
[nBlock
% h
->cache_num_blocks
] + nOffset
,
1290 (unsigned int)nLen
);
1291 CHM_RELEASE_LOCK(h
->cache_mutex
);
1292 CHM_RELEASE_LOCK(h
->lzx_mutex
);
1295 CHM_RELEASE_LOCK(h
->cache_mutex
);
1297 /* data request not satisfied, so... start up the decompressor machine */
1300 h
->lzx_last_block
= -1;
1301 h
->lzx_state
= LZXinit(h
->window_size
);
1304 /* decompress some data */
1305 gotLen
= _chm_decompress_block(h
, nBlock
, &ubuffer
);
1308 memcpy(buf
, ubuffer
+nOffset
, (unsigned int)nLen
);
1309 CHM_RELEASE_LOCK(h
->lzx_mutex
);
1313 /* retrieve (part of) an object */
1314 LONGINT64
chm_retrieve_object(struct chmFile
*h
,
1315 struct chmUnitInfo
*ui
,
1320 /* must be valid file handle */
1324 /* starting address must be in correct range */
1325 if (addr
>= ui
->length
)
1329 if (addr
+ len
> ui
->length
)
1330 len
= ui
->length
- addr
;
1332 /* if the file is uncompressed, it's simple */
1333 if (ui
->space
== CHM_UNCOMPRESSED
)
1336 return _chm_fetch_bytes(h
,
1338 h
->data_offset
+ ui
->start
+ addr
,
1342 /* else if the file is compressed, it's a little trickier */
1343 else /* ui->space == CHM_COMPRESSED */
1345 Int64 swath
=0, total
=0;
1347 /* if compression is not enabled for this file... */
1348 if (! h
->compression_enabled
)
1353 /* swill another mouthful */
1354 swath
= _chm_decompress_region(h
, buf
, ui
->start
+ addr
, len
);
1356 /* if we didn't get any... */
1372 BOOL
chm_enumerate_dir(struct chmFile
*h
,
1373 const WCHAR
*prefix
,
1379 * XXX: do this efficiently (i.e. using the tree index)
1384 /* buffer to hold whatever page we're looking at */
1385 UChar
*page_buf
= HeapAlloc(GetProcessHeap(), 0, h
->block_len
);
1386 struct chmPmglHeader header
;
1389 unsigned int lenRemain
;
1391 /* set to TRUE once we've started */
1392 BOOL it_has_begun
= FALSE
;
1394 /* the current ui */
1395 struct chmUnitInfo ui
;
1399 /* the length of the prefix */
1400 WCHAR prefixRectified
[CHM_MAX_PATHLEN
+1];
1402 WCHAR lastPath
[CHM_MAX_PATHLEN
];
1406 curPage
= h
->index_head
;
1408 /* initialize pathname state */
1409 lstrcpynW(prefixRectified
, prefix
, CHM_MAX_PATHLEN
);
1410 prefixLen
= lstrlenW(prefixRectified
);
1413 if (prefixRectified
[prefixLen
-1] != '/')
1415 prefixRectified
[prefixLen
] = '/';
1416 prefixRectified
[prefixLen
+1] = '\0';
1423 /* until we have either returned or given up */
1424 while (curPage
!= -1)
1427 /* try to fetch the index page */
1428 if (_chm_fetch_bytes(h
,
1430 h
->dir_offset
+ (UInt64
)curPage
*h
->block_len
,
1431 h
->block_len
) != h
->block_len
)
1433 HeapFree(GetProcessHeap(), 0, page_buf
);
1437 /* figure out start and end for this page */
1439 lenRemain
= _CHM_PMGL_LEN
;
1440 if (! _unmarshal_pmgl_header(&cur
, &lenRemain
, &header
))
1442 HeapFree(GetProcessHeap(), 0, page_buf
);
1445 end
= page_buf
+ h
->block_len
- (header
.free_space
);
1447 /* loop over this page */
1450 if (! _chm_parse_PMGL_entry(&cur
, &ui
))
1452 HeapFree(GetProcessHeap(), 0, page_buf
);
1456 /* check if we should start */
1459 if (ui
.length
== 0 && wcsnicmp(ui
.path
, prefixRectified
, prefixLen
) == 0)
1460 it_has_begun
= TRUE
;
1464 if (ui
.path
[prefixLen
] == '\0')
1468 /* check if we should stop */
1471 if (wcsnicmp(ui
.path
, prefixRectified
, prefixLen
) != 0)
1473 HeapFree(GetProcessHeap(), 0, page_buf
);
1478 /* check if we should include this path */
1479 if (lastPathLen
!= -1)
1481 if (wcsnicmp(ui
.path
, lastPath
, lastPathLen
) == 0)
1484 lstrcpyW(lastPath
, ui
.path
);
1485 lastPathLen
= lstrlenW(lastPath
);
1487 /* get the length of the path */
1488 ui_path_len
= lstrlenW(ui
.path
)-1;
1490 /* check for DIRS */
1491 if (ui
.path
[ui_path_len
] == '/' && !(what
& CHM_ENUMERATE_DIRS
))
1494 /* check for FILES */
1495 if (ui
.path
[ui_path_len
] != '/' && !(what
& CHM_ENUMERATE_FILES
))
1498 /* check for NORMAL vs. META */
1499 if (ui
.path
[0] == '/')
1502 /* check for NORMAL vs. SPECIAL */
1503 if (ui
.path
[1] == '#' || ui
.path
[1] == '$')
1504 flag
= CHM_ENUMERATE_SPECIAL
;
1506 flag
= CHM_ENUMERATE_NORMAL
;
1509 flag
= CHM_ENUMERATE_META
;
1510 if (! (what
& flag
))
1513 /* call the enumerator */
1515 int status
= (*e
)(h
, &ui
, context
);
1518 case CHM_ENUMERATOR_FAILURE
:
1519 HeapFree(GetProcessHeap(), 0, page_buf
);
1521 case CHM_ENUMERATOR_CONTINUE
:
1523 case CHM_ENUMERATOR_SUCCESS
:
1524 HeapFree(GetProcessHeap(), 0, page_buf
);
1532 /* advance to next page */
1533 curPage
= header
.block_next
;
1536 HeapFree(GetProcessHeap(), 0, page_buf
);