1 // LZW.C New improved super-duper LZW compressor & decompressor
2 // This module by Greg Travis and Rex Bradford
4 // This module implements LZW-based compression and decompression.
5 // It has flexible control over the source and destination of the
6 // data it uses. Data is read from a "source" and written to a
7 // "destination". In the case of compression, the source is uncompressed
8 // and the destination is compressed; for expansion the reverse is true.
9 // Sources and destinations deal in byte values, even though the LZW
10 // routine works in 14-bit compression codes.
12 // Sources may be one of the following standard types:
14 // BUFF A memory block
15 // FD A file descriptor (fd = open()), already positioned with seek()
16 // FP A file ptr (fp = fopen()), already positioned with lseek()
17 // USER A user-supplied function
19 // Destinations may be any of the above 4 types, plus additionally:
21 // NULL The bit-bucket. Used to determine the size of destination
22 // data without putting it anywhere.
24 // The LZW module consists of these public routines:
26 // LzwInit() - Just sets LzwTerm() to be called on exit.
28 // LzwTerm() - Just calls LzwFreeBuffer(), to free lzw buffer if it
29 // has been malloc'ed.
31 // LzwSetBuffer() - Sets buffer for lzw compression & expansion routines
32 // to use. The buffer must be at least LZW_BUFF_SIZE
33 // in size, which for 14-bit lzw is about 91K.
35 // LzwMallocBuffer() - Allocates buffer for lzw compression & expansion
36 // routines to use. This routine will be called auto-
37 // matically the first time LzwCompress() or LzwExpand()
38 // is used if a buffer has not been set or allocated.
40 // LzwFreeBuffer() - Frees current buffer if allocated.
42 // LzwCompress() - Compresses data, reading from an uncompressed source
43 // and writing compressed bytes to a destination. Returns
44 // the size of the compressed data. A maximum destination
45 // size may be specified, in which case a destination which
46 // is about to exceed this will be aborted, returning -1.
48 // LzwExpand() - Expands data, reading from a compressed source and
49 // writing decompressed bytes to a destination. Returns the
50 // size of the decompressed data. Parameters may be used
51 // to capture a subsection of the uncompressed stream (by
52 // skipping the first n1 destination bytes and then taking
55 // Lzw.h supplies a large set of macros of the form:
57 // LzwCompressSrc2Dest(...) and LzweExpandSrc2Dest(...)
59 // which implement all combinations of source and destination types,
60 // such as buffer->buffer, fd->buffer, buffer->null, fp->user, etc.
62 // User types are handy when there is a need to transform the data
63 // on its way to or from compression (to enhance the compression, or
64 // just to massage the data into a usable form). For example, a map
65 // may want to transform elevations to delta format on the way to
66 // and from compression in order to enhance the compression.
68 // User sources supply two functions of the form:
70 // void f_SrcCtrl(long srcLoc, LzwCtrl ctrl);
73 // The control function is used to set up and tear down the Get()
74 // function, which is used to supply the next byte of data. Before
75 // any compression or decompression begins, the SrcCtrl() function
76 // is called with the srcLoc argument (supplied at the call to
77 // LzwCompress or LzwExpand, its meaning is user-defined), and the
78 // ctrl argument set to BEGIN. After all compression and decompression
79 // is done, cleanup is invoked by calling SrcCtrl() with ctrl equal
80 // to END. Between BEGIN and END, the SrcGet() function is called
81 // repeatedly to get the next byte from the user input stream.
83 // User destinations work similarly. Again, two functions:
85 // void f_DestCtrl(long destLoc, LzwCtrl ctrl);
86 // void f_DestPut(uchar byte);
88 // The control function is called with BEGIN and END just like the
89 // source function. The DestPut() function is called repeatedly to
90 // put the next byte to the user output stream.
92 // Note that user sources can be used for both compression (source of
93 // uncompressed bytes) and expansion (source of compressed bytes).
94 // Similarly, user destinations can be used for both compression
95 // (destination of compressed bytes) and expansion (destination of
96 // uncompressed bytes). This is true of standard sources and
97 // destinations as well, of course.
100 * $Header: x:/prj/tech/libsrc/res/RCS/lzw.cpp 1.16 1997/01/07 11:12:52 TOML Exp $
104 // ------------------------------------------------------------
106 // ------------------------------------------------------------
122 #include <thrdtool.h>
124 // ----------------------------------------------------------
136 totalTime
= timeGetTime();
141 double totalTimeInSecs
= (double) (timeGetTime() - totalTime
) / 1000.0;
142 double sumTimeInSecs
= (double) sumTime
/ 1000.0;
143 double sumMBytes
= (double) sumBytes
/ (1024.0 * 1024.0);
146 sprintf(buf
, "Pct time in LZW was %f\n", sumTimeInSecs
/ totalTimeInSecs
);
149 sprintf(buf
, "MB expanded per second were %f\n", sumMBytes
/ sumTimeInSecs
);
155 startThisTime
= timeGetTime();
158 void Stop(DWORD bytes
)
161 sumTime
+= timeGetTime() - startThisTime
;
162 //Assert_(sumTime >= t);
166 //Assert_(sumBytes >= s);
175 sLZWTimer g_LzwExpandTimer
;
177 #define LZWTimerStart() g_LzwExpandTimer.Start()
178 #define LZWTimerStop(bytes) g_LzwExpandTimer.Stop(bytes);
180 #define LZWTimerStart()
181 #define LZWTimerStop(bytes)
184 // ----------------------------------------------------------
185 // The Watcom C++ parser is verbose in warning of possible integral
186 // truncation, even in cases where for the given native word size
187 // there is no problem (i.e., assigning longs to ints). Here, we
188 // quiet the warnings, although it wouldn't be bad for someone to
189 // evaluate them. (toml 09-14-96)
191 #pragma warning 389 9
194 // Important constants
196 #define MAX_VALUE ((1 << LZW_BITS) - 1) // end-of-compress-data code
197 #define MAX_CODE (MAX_VALUE - 2) // maximum real code allows
198 #define FLUSH_CODE (MAX_VALUE - 1) // code to lzw string table
199 #define HASHING_SHIFT (LZW_BITS-8) // # bits to shift when hashing
200 #define FLUSH_PAUSE 1000 // wait on full table before flush
202 // we use read & write buffer when doing short-circuit file to memory (toml 01-06-97)
203 #define LZW_FD_TO_MEM_READ_BUFF_SIZE (LZW_FD_READ_BUFF_SIZE + LZW_FD_WRITE_BUFF_SIZE)
207 static cThreadLock g_LzwThreadLock
;
209 // Overall lzw buffer info
211 void *lzwBuffer
; // total buffer
212 bool lzwBufferMalloced
; // buffer malloced?
214 // Global tables used for compression & expansion
216 tLzwCodeValue
*lzwCodeValue
; // code value array
217 tLzwPrefixCode
*lzwPrefixCode
; // prefix code array
218 tLzwAppendChar
*lzwAppendChar
; // appended chars array
219 uchar
*lzwDecodeStack
; // decoded string
221 uchar
*lzwFdReadBuff
; // buffer for file descriptor source
222 uchar
*lzwFdWriteBuff
; // buffer for file descriptor dest
224 // Prototypes of internal routines
226 int LzwFindMatch(int hash_prefix
, unsigned int hash_character
);
227 uchar
*LzwDecodeString(uchar
* buffer
, unsigned int code
);
228 // --------------------------------------------------------
229 // INITIALIZATION AND TERMINATION
230 // --------------------------------------------------------
232 // LzwInit() needs to be called once before any of the compression
233 // routines are used.
239 // ------------------------------------------------------------
241 // LzwTerm() needs to be called once when the lzw compression
242 // routines are no longer needed.
248 // ------------------------------------------------------------
250 // --------------------------------------------------------
252 // LzwSetBuffer() inits and sets buffer to use.
254 // Returns: 0 if ok, -1 if buffer not ok
256 void LzwSetBufferPointers(void *buff
)
259 lzwDecodeStack
= (uchar
*) lzwBuffer
;
260 lzwFdReadBuff
= ((uchar
*) lzwDecodeStack
) + LZW_DECODE_STACK_SIZE
;
261 lzwFdWriteBuff
= ((uchar
*) lzwFdReadBuff
) + LZW_FD_READ_BUFF_SIZE
;
262 lzwCodeValue
= (tLzwCodeValue
*) (((uchar
*) lzwFdWriteBuff
) + LZW_FD_WRITE_BUFF_SIZE
);
263 lzwPrefixCode
= (tLzwPrefixCode
*) (((uchar
*) lzwCodeValue
) + (LZW_TABLE_BUFF_SIZE
* sizeof(tLzwCodeValue
)));
264 lzwAppendChar
= ((tLzwAppendChar
*) lzwPrefixCode
) + (LZW_TABLE_BUFF_SIZE
* sizeof(tLzwPrefixCode
));
267 int LzwSetBuffer(void *buff
, long buffSize
)
269 cAutoLock
lock(g_LzwThreadLock
);
272 if (buffSize
< LZW_BUFF_SIZE
)
274 Warning(("LzwSetBuffer: buffer too small!\n"));
278 // De-allocate current buffer if malloced
282 // Set buffer pointers
283 LzwSetBufferPointers(buff
);
284 lzwBufferMalloced
= FALSE
;
288 // ------------------------------------------------------------
290 // LzwMallocBuffer() allocates buffer with Malloc.
292 // Returns: 0 if success, -1 if error.
294 int LzwMallocBuffer()
296 cAutoLock
lock(g_LzwThreadLock
);
298 if ((lzwBuffer
== NULL
) || (!lzwBufferMalloced
))
300 buff
= Malloc(LZW_BUFF_SIZE
);
303 Warning(("LzwMallocBuffer: failed to allocate buffers\n"));
308 LzwSetBuffer(buff
, LZW_BUFF_SIZE
);
309 lzwBufferMalloced
= TRUE
;
314 // ------------------------------------------------------------
316 // LzwFreeBuffer() frees buffer.
320 cAutoLock
lock(g_LzwThreadLock
);
321 if (lzwBufferMalloced
)
325 lzwBufferMalloced
= FALSE
;
328 // ------------------------------------------------------------
330 // ------------------------------------------------------------
332 // LzwCompress() does lzw compression. It reads uncompressed bytes
333 // from an input source and outputs compressed bytes to an output
334 // destination. It returns the number of bytes the compressed data
335 // took up, or -1 if the compressed data size exceeds the allowed space.
337 // f_ScrCtrl = routine to call to control source data stream
338 // f_SrcGet = routine to call to get next input data byte
339 // srcLoc = source data "location", actual type undefined
340 // srcSize = size of source (input) data
341 // f_DestCtrl = routine to call to control destination data stream
342 // f_DestPut = routine to call to put next output data byte
343 // destLoc = dest data "location", actual type undefined
344 // destSizeMax = maximum allowed size of output data
346 // Returns: actual output compressed size, or -1 if exceeded outputSizeMax
347 // (in which case compression has been aborted)
349 // This macro is used to accumulate output codes into a bit buffer
350 // and call the destination put routine whenever more than 8 bits
351 // are available. If the output data size ever exceeds the alloted
352 // size, the source and destination are shut down and -1 is returned.
356 unsigned int next_code
; // next available string code
357 unsigned int character
; // current character read from source
358 unsigned int string_code
; // current string compress code
359 unsigned int index
; // index into string table
360 long lzwInputCharCount
; // input character count
361 long lzwOutputSize
; // current size of output
362 int lzwOutputBitCount
; // current bit location in output
363 ulong lzwOutputBitBuffer
; // 32-bit buffer holding output bits
366 LzwC lzwc
; // current compress state
368 #define LzwOutputCode(code) { \
369 lzwc.lzwOutputBitBuffer |= ((ulong) code) << (32-LZW_BITS-lzwc.lzwOutputBitCount); \
370 lzwc.lzwOutputBitCount += LZW_BITS; \
371 while (lzwc.lzwOutputBitCount >= 8) \
373 (*f_DestPut)(lzwc.lzwOutputBitBuffer >> 24); \
374 if (++lzwc.lzwOutputSize > destSizeMax) \
376 (*f_SrcCtrl)(srcLoc, END); \
377 (*f_DestCtrl)(destLoc, END); \
380 lzwc.lzwOutputBitBuffer <<= 8; \
381 lzwc.lzwOutputBitCount -= 8; \
385 long LzwCompress( tLzwCompressCtrlFunc f_SrcCtrl
,// func to control source
386 tLzwCompressSrcGetFunc f_SrcGet
, // func to get bytes from source
387 long srcLoc
, // source "location" (ptr, FILE *, etc.)
388 long srcSize
, // size of source in bytes
389 void (*f_DestCtrl
) (long destLoc
, LzwCtrl ctrl
), // func to control dest
390 void (*f_DestPut
) (uchar b
), // func to put bytes to dest
391 long destLoc
, // dest "location" (ptr, FILE *, etc.)
392 long destSizeMax
// max size of dest (or LZW_MAXSIZE)
396 cAutoLock
lock(g_LzwThreadLock
);
397 // If not already initialized, do it
399 if (lzwBuffer
== NULL
)
401 if (LzwMallocBuffer() < 0)
405 // Set up for compress loop
407 lzwc
.next_code
= 256; // skip over real 256 char values
408 memset(lzwCodeValue
, -1, sizeof(tLzwCodeValue
) * LZW_TABLE_SIZE
);
410 lzwc
.lzwOutputSize
= 0;
411 lzwc
.lzwOutputBitCount
= 0;
412 lzwc
.lzwOutputBitBuffer
= 0;
414 (*f_SrcCtrl
) (srcLoc
, BEGIN
);
415 (*f_DestCtrl
) (destLoc
, BEGIN
);
417 lzwc
.string_code
= (*f_SrcGet
) ();
418 lzwc
.lzwInputCharCount
= 1;
420 // This is the main loop where it all happens. This loop runs until all of
421 // the input has been exhausted. Note that it stops adding codes to the
422 // table after all of the possible codes have been defined.
427 // Get next input char, if read all data then exit loop
429 lzwc
.character
= (*f_SrcGet
) ();
430 if (lzwc
.lzwInputCharCount
++ >= srcSize
)
433 // See if string is in string table. If it is, get the code value.
435 lzwc
.index
= LzwFindMatch(lzwc
.string_code
, lzwc
.character
);
436 if (lzwCodeValue
[lzwc
.index
] != -1)
437 lzwc
.string_code
= lzwCodeValue
[lzwc
.index
];
439 // Else if string not in string table, try to add it.
443 if (lzwc
.next_code
<= MAX_CODE
)
445 lzwCodeValue
[lzwc
.index
] = lzwc
.next_code
++;
446 lzwPrefixCode
[lzwc
.index
] = lzwc
.string_code
;
447 lzwAppendChar
[lzwc
.index
] = lzwc
.character
;
448 LzwOutputCode(lzwc
.string_code
);
449 lzwc
.string_code
= lzwc
.character
;
452 // Else if table is full and has been for a while, flush it, and drain
453 // the code value table too.
455 else if (lzwc
.next_code
> MAX_CODE
+ FLUSH_PAUSE
)
457 LzwOutputCode(lzwc
.string_code
);
458 LzwOutputCode(FLUSH_CODE
);
459 memset(lzwCodeValue
, -1, sizeof(tLzwCodeValue
) * LZW_TABLE_SIZE
);
460 lzwc
.string_code
= lzwc
.character
;
461 lzwc
.next_code
= 256;
464 // Else if can't add but table not full, just output the code.
469 LzwOutputCode(lzwc
.string_code
);
470 lzwc
.string_code
= lzwc
.character
;
475 // Done with processing loop, output current code, end-of-data code,
476 // and a final 0 to flush the buffer.
478 LzwOutputCode(lzwc
.string_code
);
479 LzwOutputCode(MAX_VALUE
);
482 // Shut down source and destination and return size of output
484 (*f_SrcCtrl
) (srcLoc
, END
);
485 (*f_DestCtrl
) (destLoc
, END
);
487 return (lzwc
.lzwOutputSize
);
489 // -----------------------------------------------------------
491 // -----------------------------------------------------------
493 // LzwExpand() does lzw expansion. It reads compressed bytes
494 // from an input source and outputs uncompressed bytes to an output
495 // destination. It returns the number of bytes the uncompressed data
498 // f_ScrCtrl = routine to call to control source data stream
499 // f_SrcGet = routine to call to get next input data byte
500 // srcLoc = source data "location", actual type undefined
501 // f_DestCtrl = routine to call to control destination data stream
502 // f_DestPut = routine to call to put next output data byte
503 // destLoc = dest data "location", actual type undefined
504 // destSkip = # bytes of output to skip over before storing
505 // destSize = # bytes of output to store (if 0, everything)
507 // Returns: # bytes in uncompressed output
509 // need to change LZW_PARTIAL_STATE_SIZE if this changes in size!
512 int lzwInputBitCount
;
513 ulong lzwInputBitBuffer
;
514 unsigned int next_code
; // next available string code
515 unsigned int new_code
; // next code from source
516 unsigned int old_code
; // last code gotten from source
517 unsigned int character
; // current char for string stack
518 uchar
*string
; // used to output string in reverse order
519 long outputSize
; // size of uncompressed data
520 long destSkip
; // # bytes to skip over
521 long destSize
; // destination size
522 uchar
*last_buffer
; // for partial expand, covers old buffer state
523 uchar
*curr_buffer
; // for partial, the buffer we are using for globals
524 void *in_state
; // for partial, pointer for the input installable func to use to save state
525 void *out_state
; // for partial, pointer for the output installable func to use to save state
528 LzwE lzwe
; // current expand state
530 ///////////////////////////////////////////////////////////////////////////////
532 static unsigned int LzwInputCodeGeneric(tLzwCompressSrcGetFunc f_SrcGet
, LzwE
* plzwe
)
534 unsigned int return_value
;
535 while (plzwe
->lzwInputBitCount
<= 24)
537 plzwe
->lzwInputBitBuffer
|= ((ulong
) (*f_SrcGet
) ()) << (24 - plzwe
->lzwInputBitCount
);
538 plzwe
->lzwInputBitCount
+= 8;
540 return_value
= plzwe
->lzwInputBitBuffer
>> (32 - LZW_BITS
);
542 plzwe
->lzwInputBitBuffer
<<= LZW_BITS
;
543 plzwe
->lzwInputBitCount
-= LZW_BITS
;
545 return (return_value
);
548 ///////////////////////////////////////
550 static long LzwExpandGeneric(tLzwCompressCtrlFunc f_SrcCtrl
, // func to control source
551 tLzwCompressSrcGetFunc f_SrcGet
, // func to get bytes from source
552 long srcLoc
, // source "location" (ptr, FILE *, etc.)
553 tLzwCompressCtrlFunc f_DestCtrl
, // func to control dest
554 tLzwCompressDestPutFunc f_DestPut
,// func to put bytes to dest
555 long destLoc
// dest "location" (ptr, FILE *, etc.)
558 // Notify the control routines
560 (*f_SrcCtrl
) (srcLoc
, BEGIN
);
561 (*f_DestCtrl
) (destLoc
, BEGIN
);
563 // Get first code & output it.
565 lzwe
.old_code
= LzwInputCodeGeneric(f_SrcGet
, &lzwe
);
566 lzwe
.character
= lzwe
.old_code
;
568 if (--lzwe
.destSkip
< 0)
570 (*f_DestPut
) (lzwe
.old_code
);
574 // This is the expansion loop. It reads in codes from the source until
575 // it sees the special end-of-data code.
577 while ((lzwe
.new_code
= LzwInputCodeGeneric(f_SrcGet
, &lzwe
)) != MAX_VALUE
)
580 // If flush code, flush the string table & restart from top of loop
582 if (lzwe
.new_code
== FLUSH_CODE
)
584 lzwe
.next_code
= 256;
585 lzwe
.old_code
= LzwInputCodeGeneric(f_SrcGet
, &lzwe
);
586 lzwe
.character
= lzwe
.old_code
;
587 if (--lzwe
.destSkip
< 0)
589 if (lzwe
.outputSize
++ >= lzwe
.destSize
)
591 (*f_DestPut
) (lzwe
.old_code
);
596 // Check for the special STRING+CHARACTER+STRING+CHARACTER+STRING, which
597 // generates an undefined code. Handle it by decoding the last code,
598 // adding a single character to the end of the decode string.
600 if (lzwe
.new_code
>= lzwe
.next_code
)
602 *lzwDecodeStack
= lzwe
.character
;
603 lzwe
.string
= LzwDecodeString(lzwDecodeStack
+ 1, lzwe
.old_code
);
606 // Otherwise we do a straight decode of the new code.
610 lzwe
.string
= LzwDecodeString(lzwDecodeStack
, lzwe
.new_code
);
613 // Output the decode string to the destination, in reverse order.
615 lzwe
.character
= *lzwe
.string
;
616 while (lzwe
.string
>= lzwDecodeStack
)
618 if (--lzwe
.destSkip
< 0)
620 if (lzwe
.outputSize
++ >= lzwe
.destSize
)
622 (*f_DestPut
) (*lzwe
.string
);
627 // If possible, add a new code to the string table.
629 if (lzwe
.next_code
<= MAX_CODE
)
631 lzwPrefixCode
[lzwe
.next_code
] = lzwe
.old_code
;
632 lzwAppendChar
[lzwe
.next_code
] = lzwe
.character
;
635 lzwe
.old_code
= lzwe
.new_code
;
638 // When break out of expansion loop, shut down source & dest & return size.
641 (*f_SrcCtrl
) (srcLoc
, END
);
642 (*f_DestCtrl
) (destLoc
, END
);
644 return (lzwe
.outputSize
);
647 ///////////////////////////////////////////////////////////////////////////////
649 static uchar
*lzwBuffDestPtr
;
651 static int lzwReadBuffIndex
;
653 static unsigned int LzwInputCodeFileToMemory()
655 unsigned int return_value
;
658 while (lzwe
.lzwInputBitCount
<= 24)
660 if (lzwReadBuffIndex
!= LZW_FD_TO_MEM_READ_BUFF_SIZE
)
662 c
= lzwFdReadBuff
[lzwReadBuffIndex
];
663 temp
= 24 - lzwe
.lzwInputBitCount
;
665 lzwe
.lzwInputBitBuffer
|= (c
<< temp
);
666 lzwe
.lzwInputBitCount
+= 8;
671 read(lzwFdSrc
, lzwFdReadBuff
, LZW_FD_TO_MEM_READ_BUFF_SIZE
);
673 lzwReadBuffIndex
= 0;
676 return_value
= lzwe
.lzwInputBitBuffer
>> (32 - LZW_BITS
);
678 lzwe
.lzwInputBitCount
-= LZW_BITS
;
679 lzwe
.lzwInputBitBuffer
<<= LZW_BITS
;
681 return (return_value
);
684 ///////////////////////////////////////
686 static long LzwExpandFileToMemory(long srcLoc
, long destLoc
)
689 lzwFdSrc
= (int) srcLoc
;
690 lzwReadBuffIndex
= LZW_FD_TO_MEM_READ_BUFF_SIZE
;
691 lzwBuffDestPtr
= (uchar
*) destLoc
;
693 // Get first code & output it.
694 lzwe
.old_code
= LzwInputCodeFileToMemory();
696 lzwe
.character
= lzwe
.old_code
;
698 if (--lzwe
.destSkip
< 0)
700 *lzwBuffDestPtr
++ = lzwe
.old_code
;
704 // This is the expansion loop. It reads in codes from the source until
705 // it sees the special end-of-data code.
707 while ((lzwe
.new_code
= LzwInputCodeFileToMemory()) != MAX_VALUE
)
709 // If flush code, flush the string table & restart from top of loop.
710 // Also check for the special STRING+CHARACTER+STRING+CHARACTER+STRING,
711 // which generates an undefined code. Handle it by decoding the last code,
712 // adding a single character to the end of the decode string.
714 if (lzwe
.new_code
>= lzwe
.next_code
)
716 if (lzwe
.new_code
== FLUSH_CODE
)
718 lzwe
.next_code
= 256;
719 lzwe
.old_code
= LzwInputCodeFileToMemory();
720 lzwe
.character
= lzwe
.old_code
;
721 if (--lzwe
.destSkip
< 0)
723 if (lzwe
.outputSize
++ >= lzwe
.destSize
)
725 *lzwBuffDestPtr
++ = lzwe
.old_code
;
731 *lzwDecodeStack
= lzwe
.character
;
732 lzwe
.string
= LzwDecodeString(lzwDecodeStack
+ 1, lzwe
.old_code
);
736 // Otherwise we do a straight decode of the new code.
740 if (lzwe
.new_code
< 256)
742 lzwe
.string
= lzwDecodeStack
;
743 *lzwe
.string
= lzwe
.new_code
;
746 lzwe
.string
= LzwDecodeString(lzwDecodeStack
, lzwe
.new_code
);
749 // Output the decode string to the destination, in reverse order.
751 lzwe
.character
= *lzwe
.string
;
752 while (lzwe
.string
>= lzwDecodeStack
)
754 if (--lzwe
.destSkip
< 0)
756 if (lzwe
.outputSize
++ >= lzwe
.destSize
)
758 *lzwBuffDestPtr
++ = *lzwe
.string
;
763 // If possible, add a new code to the string table.
765 if (lzwe
.next_code
<= MAX_CODE
)
767 lzwPrefixCode
[lzwe
.next_code
] = lzwe
.old_code
;
768 lzwAppendChar
[lzwe
.next_code
] = lzwe
.character
;
771 lzwe
.old_code
= lzwe
.new_code
;
774 // When break out of expansion loop, return size.
778 return (lzwe
.outputSize
);
781 ///////////////////////////////////////
783 long LzwExpand( tLzwCompressCtrlFunc f_SrcCtrl
, // func to control source
784 tLzwCompressSrcGetFunc f_SrcGet
, // func to get bytes from source
785 long srcLoc
, // source "location" (ptr, FILE *, etc.)
786 tLzwCompressCtrlFunc f_DestCtrl
, // func to control dest
787 tLzwCompressDestPutFunc f_DestPut
,// func to put bytes to dest
788 long destLoc
, // dest "location" (ptr, FILE *, etc.)
789 long destSkip
, // # dest bytes to skip over (or 0)
790 long destSize
// # dest bytes to capture (if 0, all)
793 cAutoLock
lock(g_LzwThreadLock
);
797 // If not already initialized, do it
799 if (lzwBuffer
== NULL
)
801 if (LzwMallocBuffer() < 0)
805 // Set up for expansion loop
807 lzwe
.lzwInputBitCount
= 0;
808 lzwe
.lzwInputBitBuffer
= 0;
809 lzwe
.next_code
= 256; // next available char after regular 256 chars
811 lzwe
.destSkip
= destSkip
;
812 lzwe
.destSize
= destSize
? destSize
: LZW_MAXSIZE
;
814 if (f_SrcCtrl
== LzwFdSrcCtrl
&& f_SrcGet
== LzwFdSrcGet
&&
815 f_DestCtrl
== LzwBuffDestCtrl
&& f_DestPut
== LzwBuffDestPut
)
817 retVal
= LzwExpandFileToMemory(srcLoc
, destLoc
);
821 retVal
= LzwExpandGeneric(f_SrcCtrl
, f_SrcGet
, srcLoc
,
822 f_DestCtrl
, f_DestPut
, destLoc
);
824 LZWTimerStop(lzwe
.outputSize
)
829 ///////////////////////////////////////////////////////////////////////////////
831 // Okay, clearly there is a lot of code overlay between this and LzwExpand but for
832 // performance reasons in LzwExpand I didn't want to attempt to make them wholly
833 // integrated -- Xemu 5/1/96
834 #pragma off(unreferenced)
835 long LzwExpandPartial( tLzwCompressCtrlFunc f_SrcCtrl
, // func to control source
836 tLzwCompressSrcGetFunc f_SrcGet
, // func to get bytes from source
837 long srcLoc
, // source "location" (ptr, FILE *, etc.)
838 tLzwCompressCtrlFunc f_DestCtrl
, // func to control dest
839 tLzwCompressDestPutFunc f_DestPut
, // func to put bytes to dest
840 long destLoc
, // dest "location" (ptr, FILE *, etc.)
841 long destSkip
, // # dest bytes to skip over (or 0)
842 long destSize
, // # dest bytes to capture (if 0, all)
843 void *state
, // internal state structure
844 long cycles
// how many iterations of actual processing
847 cAutoLock
lock(g_LzwThreadLock
);
850 plzwe
= (LzwE
*) state
;
851 LzwSetBufferPointers(plzwe
->curr_buffer
);
853 // notify IO installables
854 (*f_SrcCtrl
) ((long) plzwe
, RESUME
);
855 (*f_DestCtrl
) ((long) plzwe
, RESUME
);
857 // This is the expansion loop. It reads in codes from the source until
858 // it sees the special end-of-data code.
860 plzwe
->new_code
= LzwInputCodeGeneric(f_SrcGet
, plzwe
);
861 while ((plzwe
->new_code
!= MAX_VALUE
) && (count
< cycles
))
864 // If flush code, flush the string table & restart from top of loop
866 if (plzwe
->new_code
== FLUSH_CODE
)
868 plzwe
->next_code
= 256;
869 plzwe
->old_code
= LzwInputCodeGeneric(f_SrcGet
, plzwe
);
870 plzwe
->character
= plzwe
->old_code
;
871 if (--plzwe
->destSkip
< 0)
873 if (plzwe
->outputSize
++ >= plzwe
->destSize
)
876 (*f_DestPut
) (plzwe
->old_code
);
881 // Check for the special STRING+CHARACTER+STRING+CHARACTER+STRING, which
882 // generates an undefined code. Handle it by decoding the last code,
883 // adding a single character to the end of the decode string.
885 if (plzwe
->new_code
>= plzwe
->next_code
)
887 *lzwDecodeStack
= plzwe
->character
;
888 plzwe
->string
= LzwDecodeString(lzwDecodeStack
+ 1, plzwe
->old_code
);
891 // Otherwise we do a straight decode of the new code.
895 plzwe
->string
= LzwDecodeString(lzwDecodeStack
, plzwe
->new_code
);
898 // Output the decode string to the destination, in reverse order.
900 plzwe
->character
= *plzwe
->string
;
901 while (plzwe
->string
>= lzwDecodeStack
)
903 if (--plzwe
->destSkip
< 0)
905 if (plzwe
->outputSize
++ >= plzwe
->destSize
)
907 (*f_DestPut
) (*plzwe
->string
);
912 // If possible, add a new code to the string table.
914 if (plzwe
->next_code
<= MAX_CODE
)
916 lzwPrefixCode
[plzwe
->next_code
] = plzwe
->old_code
;
917 lzwAppendChar
[plzwe
->next_code
] = plzwe
->character
;
920 plzwe
->old_code
= plzwe
->new_code
;
924 plzwe
->new_code
= LzwInputCodeGeneric(f_SrcGet
, plzwe
);
927 if (plzwe
->new_code
!= MAX_VALUE
) // then we are still iterating, cause otherwise this is false or
928 // we jumped down to DONE_EXPAND
930 // notify IO installables so that we can restore again later
931 (*f_SrcCtrl
) ((long) plzwe
, SUSPEND
);
932 (*f_DestCtrl
) ((long) plzwe
, SUSPEND
);
933 LzwSetBufferPointers(plzwe
->last_buffer
);
937 // When break out of expansion loop, shut down source & dest & return size.
941 (*f_SrcCtrl
) (srcLoc
, END
);
942 (*f_DestCtrl
) (destLoc
, END
);
944 LzwSetBufferPointers(plzwe
->last_buffer
);
946 return (plzwe
->outputSize
);
948 #pragma on(unreferenced)
950 void LzwExpandPartialStart(
951 tLzwCompressCtrlFunc f_SrcCtrl
, // func to control source
952 tLzwCompressSrcGetFunc f_SrcGet
, // func to get bytes from source
953 long srcLoc
, // source "location" (ptr, FILE *, etc.)
954 tLzwCompressCtrlFunc f_DestCtrl
, // func to control dest
955 tLzwCompressDestPutFunc f_DestPut
, // func to put bytes to dest
956 long destLoc
, // dest "location" (ptr, FILE *, etc.)
957 long destSkip
, // # dest bytes to skip over (or 0)
958 long destSize
, // # dest bytes to capture (if 0, all)
959 void *state
, // internal state structure
960 uchar
* buffer
// buffer for storing "global" lzw state
963 cAutoLock
lock(g_LzwThreadLock
);
965 plzwe
= (LzwE
*) state
;
967 // Check initialization
971 Warning(("LzwExpandPartialStart: cannot do partial expand with NULL buffer!\n"));
975 plzwe
->last_buffer
= (uchar
*) lzwBuffer
;
976 plzwe
->curr_buffer
= buffer
;
977 LzwSetBufferPointers(buffer
);
979 // Set up for expansion loop
981 plzwe
->lzwInputBitCount
= 0;
982 plzwe
->lzwInputBitBuffer
= 0;
983 plzwe
->next_code
= 256; // next available char after regular 256 chars
984 plzwe
->outputSize
= 0;
985 plzwe
->destSkip
= destSkip
;
986 plzwe
->destSize
= destSize
? destSize
: LZW_MAXSIZE
;
988 // Notify the control routines
990 (*f_SrcCtrl
) (srcLoc
, BEGIN
);
991 (*f_DestCtrl
) (destLoc
, BEGIN
);
993 // Get first code & output it.
995 plzwe
->old_code
= LzwInputCodeGeneric(f_SrcGet
, plzwe
);
996 plzwe
->character
= plzwe
->old_code
;
998 if (--plzwe
->destSkip
< 0)
1000 (*f_DestPut
) (plzwe
->old_code
);
1001 plzwe
->outputSize
++;
1004 (*f_SrcCtrl
) ((long) plzwe
, SUSPEND
);
1005 (*f_DestCtrl
) ((long) plzwe
, SUSPEND
);
1007 LzwSetBufferPointers(plzwe
->last_buffer
);
1009 // --------------------------------------------------------------
1010 // STANDARD INPUT SOURCES
1011 // --------------------------------------------------------------
1013 // LzwBuffSrcCtrl() and LzwBuffSrcGet() implement a memory buffer
1014 // source for lzw compression and expansion.
1016 static uchar
*lzwBuffSrcPtr
;
1018 void LzwBuffSrcCtrl(long srcLoc
, LzwCtrl ctrl
)
1022 lzwBuffSrcPtr
= (uchar
*) srcLoc
;
1023 else if (ctrl
== SUSPEND
)
1025 lzwep
= (LzwE
*) srcLoc
;
1026 lzwep
->in_state
= (void *) lzwBuffSrcPtr
;
1028 else if (ctrl
== RESUME
)
1030 lzwep
= (LzwE
*) srcLoc
;
1031 lzwBuffSrcPtr
= (uchar
*) lzwep
->in_state
;
1035 uchar
LzwBuffSrcGet()
1037 return (*lzwBuffSrcPtr
++);
1039 // ---------------------------------------------------------------
1041 // LzwFdSrcCtrl() and LzwFdSrcGet() implement a file-descriptor
1042 // source (fd = open()) for lzw compression and expansion.
1044 void LzwFdSrcCtrl(long srcLoc
, LzwCtrl ctrl
)
1048 lzwFdSrc
= (int) srcLoc
;
1049 lzwReadBuffIndex
= LZW_FD_READ_BUFF_SIZE
;
1055 if (lzwReadBuffIndex
== LZW_FD_READ_BUFF_SIZE
)
1058 read(lzwFdSrc
, lzwFdReadBuff
, LZW_FD_READ_BUFF_SIZE
);
1060 lzwReadBuffIndex
= 0;
1062 return (lzwFdReadBuff
[lzwReadBuffIndex
++]);
1064 // ---------------------------------------------------------------
1066 // LzwFpSrcCtrl() and LzwFpSrcGet() implement a file-ptr source
1067 // (fp = fopen()) for lzw compression and expansion.
1069 static FILE *lzwFpSrc
;
1071 void LzwFpSrcCtrl(long srcLoc
, LzwCtrl ctrl
)
1074 lzwFpSrc
= (FILE *) srcLoc
;
1079 return (fgetc(lzwFpSrc
));
1081 // ---------------------------------------------------------------
1082 // STANDARD OUTPUT SOURCES
1083 // ---------------------------------------------------------------
1085 // LzwBuffDestCtrl() and LzwBuffDestPut() implement a memory
1086 // buffer destination for lzw compression and expansion.
1088 void LzwBuffDestCtrl(long destLoc
, LzwCtrl ctrl
)
1092 lzwBuffDestPtr
= (uchar
*) destLoc
;
1093 else if (ctrl
== SUSPEND
)
1095 lzwep
= (LzwE
*) destLoc
;
1096 lzwep
->out_state
= (void *) lzwBuffDestPtr
;
1098 else if (ctrl
== RESUME
)
1100 lzwep
= (LzwE
*) destLoc
;
1101 lzwBuffDestPtr
= (uchar
*) lzwep
->out_state
;
1106 void LzwBuffDestPut(uchar b
)
1108 *lzwBuffDestPtr
++ = b
;
1110 // ---------------------------------------------------------------
1112 // LzwFdDestCtrl() and LzwFdDestPut() implement a file-descriptor
1113 // destination (fd = open()) for lzw compression and expansion.
1115 static int lzwFdDest
;
1116 static int lzwWriteBuffIndex
;
1118 void LzwFdDestCtrl(long destLoc
, LzwCtrl ctrl
)
1122 lzwFdDest
= (int) destLoc
;
1123 lzwWriteBuffIndex
= 0;
1125 else if (ctrl
== END
)
1127 if (lzwWriteBuffIndex
)
1128 write(lzwFdDest
, lzwFdWriteBuff
, lzwWriteBuffIndex
);
1132 void LzwFdDestPut(uchar b
)
1134 lzwFdWriteBuff
[lzwWriteBuffIndex
++] = b
;
1135 if (lzwWriteBuffIndex
== LZW_FD_WRITE_BUFF_SIZE
)
1137 write(lzwFdDest
, lzwFdWriteBuff
, LZW_FD_WRITE_BUFF_SIZE
);
1138 lzwWriteBuffIndex
= 0;
1141 // ---------------------------------------------------------------
1143 // LzwFpDestCtrl() and LzwFpDestPut() implement a file-ptr destination
1144 // (fp = fopen()) for lzw compression and expansion.
1146 static FILE *lzwFpDest
;
1148 void LzwFpDestCtrl(long destLoc
, LzwCtrl ctrl
)
1151 lzwFpDest
= (FILE *) destLoc
;
1154 void LzwFpDestPut(uchar b
)
1156 fputc(b
, lzwFpDest
);
1158 // ---------------------------------------------------------------
1160 // LzwNullDestCtrl() and LzwNullDestPut() implement a bit-bucket
1161 // destination for lzw compression and expansion. Used to size
1162 // results of compression or expansion.
1164 #pragma off(unreferenced);
1166 void LzwNullDestCtrl(long destLoc
, LzwCtrl ctrl
)
1170 void LzwNullDestPut(uchar b
)
1173 #pragma on(unreferenced);
1175 // -----------------------------------------------------------
1176 // INTERNAL ROUTINES - COMPRESSION
1177 // -----------------------------------------------------------
1179 // LzwFindMatch() is the hashing routine. It tries to find a match
1180 // for the prefix+char string in the string table. If it finds it,
1181 // the index is returned. If the string is not found, the first available
1182 // index in the string table is returned instead.
1184 // hash_prefix = prefix to this code
1185 // hash_character = new character
1187 // Returns: string table index
1189 int LzwFindMatch(int hash_prefix
, unsigned int hash_character
)
1193 index
= (hash_character
<< HASHING_SHIFT
) ^ hash_prefix
;
1197 offset
= LZW_TABLE_SIZE
- index
;
1200 if (lzwCodeValue
[index
] == -1)
1202 if ((lzwPrefixCode
[index
] == hash_prefix
) &&
1203 (lzwAppendChar
[index
] == hash_character
))
1207 index
+= LZW_TABLE_SIZE
;
1210 // ------------------------------------------------------------
1211 // INTERNAL ROUTINES - EXPANSION
1212 // ------------------------------------------------------------
1214 // LzwDecodeString() decodes a string from the string table,
1215 // storing it in a buffer. The buffer can then be output in
1216 // reverse order by the expansion program.
1218 uchar
*LzwDecodeString(uchar
* buffer
, unsigned int code
)
1220 #if defined(DBG_ON) || defined(ONEOPT)
1226 *buffer
++ = lzwAppendChar
[code
];
1227 code
= lzwPrefixCode
[code
];
1229 #if defined(DBG_ON) || defined(ONEOPT)
1231 CriticalMsg("LzwDecodeString: Fatal error during code expansion");