convert line ends
[canaan.git] / prj / tech / libsrc / res / lzw.cpp
blobf5b917ffbc8d5d25d48f955324245052abfc5dbd
1 // LZW.C New improved super-duper LZW compressor & decompressor
2 // This module by Greg Travis and Rex Bradford
3 //
4 // This module implements LZW-based compression and decompression.
5 // It has flexible control over the source and destination of the
6 // data it uses. Data is read from a "source" and written to a
7 // "destination". In the case of compression, the source is uncompressed
8 // and the destination is compressed; for expansion the reverse is true.
9 // Sources and destinations deal in byte values, even though the LZW
10 // routine works in 14-bit compression codes.
12 // Sources may be one of the following standard types:
14 // BUFF A memory block
15 // FD A file descriptor (fd = open()), already positioned with seek()
16 // FP A file ptr (fp = fopen()), already positioned with lseek()
17 // USER A user-supplied function
19 // Destinations may be any of the above 4 types, plus additionally:
21 // NULL The bit-bucket. Used to determine the size of destination
22 // data without putting it anywhere.
24 // The LZW module consists of these public routines:
26 // LzwInit() - Just sets LzwTerm() to be called on exit.
28 // LzwTerm() - Just calls LzwFreeBuffer(), to free lzw buffer if it
29 // has been malloc'ed.
31 // LzwSetBuffer() - Sets buffer for lzw compression & expansion routines
32 // to use. The buffer must be at least LZW_BUFF_SIZE
33 // in size, which for 14-bit lzw is about 91K.
35 // LzwMallocBuffer() - Allocates buffer for lzw compression & expansion
36 // routines to use. This routine will be called auto-
37 // matically the first time LzwCompress() or LzwExpand()
38 // is used if a buffer has not been set or allocated.
40 // LzwFreeBuffer() - Frees current buffer if allocated.
42 // LzwCompress() - Compresses data, reading from an uncompressed source
43 // and writing compressed bytes to a destination. Returns
44 // the size of the compressed data. A maximum destination
45 // size may be specified, in which case a destination which
46 // is about to exceed this will be aborted, returning -1.
48 // LzwExpand() - Expands data, reading from a compressed source and
49 // writing decompressed bytes to a destination. Returns the
50 // size of the decompressed data. Parameters may be used
51 // to capture a subsection of the uncompressed stream (by
52 // skipping the first n1 destination bytes and then taking
53 // the next n2).
55 // Lzw.h supplies a large set of macros of the form:
57 // LzwCompressSrc2Dest(...) and LzweExpandSrc2Dest(...)
59 // which implement all combinations of source and destination types,
60 // such as buffer->buffer, fd->buffer, buffer->null, fp->user, etc.
62 // User types are handy when there is a need to transform the data
63 // on its way to or from compression (to enhance the compression, or
64 // just to massage the data into a usable form). For example, a map
65 // may want to transform elevations to delta format on the way to
66 // and from compression in order to enhance the compression.
68 // User sources supply two functions of the form:
70 // void f_SrcCtrl(long srcLoc, LzwCtrl ctrl);
71 // uchar f_SrcGet();
73 // The control function is used to set up and tear down the Get()
74 // function, which is used to supply the next byte of data. Before
75 // any compression or decompression begins, the SrcCtrl() function
76 // is called with the srcLoc argument (supplied at the call to
77 // LzwCompress or LzwExpand, its meaning is user-defined), and the
78 // ctrl argument set to BEGIN. After all compression and decompression
79 // is done, cleanup is invoked by calling SrcCtrl() with ctrl equal
80 // to END. Between BEGIN and END, the SrcGet() function is called
81 // repeatedly to get the next byte from the user input stream.
83 // User destinations work similarly. Again, two functions:
85 // void f_DestCtrl(long destLoc, LzwCtrl ctrl);
86 // void f_DestPut(uchar byte);
88 // The control function is called with BEGIN and END just like the
89 // source function. The DestPut() function is called repeatedly to
90 // put the next byte to the user output stream.
92 // Note that user sources can be used for both compression (source of
93 // uncompressed bytes) and expansion (source of compressed bytes).
94 // Similarly, user destinations can be used for both compression
95 // (destination of compressed bytes) and expansion (destination of
96 // uncompressed bytes). This is true of standard sources and
97 // destinations as well, of course.
100 * $Header: x:/prj/tech/libsrc/res/RCS/lzw.cpp 1.16 1997/01/07 11:12:52 TOML Exp $
101 * $log$
104 // ------------------------------------------------------------
105 // HEADER SECTION
106 // ------------------------------------------------------------
108 #ifdef _WIN32
109 #include <windows.h>
110 #endif
112 #include <stdio.h>
113 #include <stdlib.h>
114 #include <string.h>
115 #include <io.h>
117 #include <lg.h>
118 #ifndef __LZW_H
119 #include <lzw.h>
120 #endif
122 #include <thrdtool.h>
124 // ----------------------------------------------------------
125 // Profiling tool
127 #ifdef TIME_LZW
128 #include <mprintf.h>
130 struct sLZWTimer
132 sLZWTimer()
134 sumTime = 0;
135 sumBytes = 0;
136 totalTime = timeGetTime();
139 ~sLZWTimer()
141 double totalTimeInSecs = (double) (timeGetTime() - totalTime) / 1000.0;
142 double sumTimeInSecs = (double) sumTime / 1000.0;
143 double sumMBytes = (double) sumBytes / (1024.0 * 1024.0);
144 char buf[256];
146 sprintf(buf, "Pct time in LZW was %f\n", sumTimeInSecs / totalTimeInSecs);
147 mprintf(buf);
149 sprintf(buf, "MB expanded per second were %f\n", sumMBytes / sumTimeInSecs);
150 mprintf(buf);
153 void Start()
155 startThisTime = timeGetTime();
158 void Stop(DWORD bytes)
160 DWORD t = sumTime;
161 sumTime += timeGetTime() - startThisTime;
162 //Assert_(sumTime >= t);
164 DWORD s = sumBytes;
165 sumBytes += bytes;
166 //Assert_(sumBytes >= s);
169 DWORD sumTime;
170 DWORD sumBytes;
171 DWORD totalTime;
172 DWORD startThisTime;
175 sLZWTimer g_LzwExpandTimer;
177 #define LZWTimerStart() g_LzwExpandTimer.Start()
178 #define LZWTimerStop(bytes) g_LzwExpandTimer.Stop(bytes);
179 #else
180 #define LZWTimerStart()
181 #define LZWTimerStop(bytes)
182 #endif
184 // ----------------------------------------------------------
185 // The Watcom C++ parser is verbose in warning of possible integral
186 // truncation, even in cases where for the given native word size
187 // there is no problem (i.e., assigning longs to ints). Here, we
188 // quiet the warnings, although it wouldn't be bad for someone to
189 // evaluate them. (toml 09-14-96)
191 #pragma warning 389 9
194 // Important constants
196 #define MAX_VALUE ((1 << LZW_BITS) - 1) // end-of-compress-data code
197 #define MAX_CODE (MAX_VALUE - 2) // maximum real code allows
198 #define FLUSH_CODE (MAX_VALUE - 1) // code to lzw string table
199 #define HASHING_SHIFT (LZW_BITS-8) // # bits to shift when hashing
200 #define FLUSH_PAUSE 1000 // wait on full table before flush
202 // we use read & write buffer when doing short-circuit file to memory (toml 01-06-97)
203 #define LZW_FD_TO_MEM_READ_BUFF_SIZE (LZW_FD_READ_BUFF_SIZE + LZW_FD_WRITE_BUFF_SIZE)
205 // LZW thread lock
207 static cThreadLock g_LzwThreadLock;
209 // Overall lzw buffer info
211 void *lzwBuffer; // total buffer
212 bool lzwBufferMalloced; // buffer malloced?
214 // Global tables used for compression & expansion
216 tLzwCodeValue *lzwCodeValue; // code value array
217 tLzwPrefixCode *lzwPrefixCode; // prefix code array
218 tLzwAppendChar *lzwAppendChar; // appended chars array
219 uchar *lzwDecodeStack; // decoded string
221 uchar *lzwFdReadBuff; // buffer for file descriptor source
222 uchar *lzwFdWriteBuff; // buffer for file descriptor dest
224 // Prototypes of internal routines
226 int LzwFindMatch(int hash_prefix, unsigned int hash_character);
227 uchar *LzwDecodeString(uchar * buffer, unsigned int code);
228 // --------------------------------------------------------
229 // INITIALIZATION AND TERMINATION
230 // --------------------------------------------------------
232 // LzwInit() needs to be called once before any of the compression
233 // routines are used.
235 void LzwInit(void)
237 AtExit(LzwTerm);
239 // ------------------------------------------------------------
241 // LzwTerm() needs to be called once when the lzw compression
242 // routines are no longer needed.
244 void LzwTerm(void)
246 LzwFreeBuffer();
248 // ------------------------------------------------------------
249 // BUFFER SETTING
250 // --------------------------------------------------------
252 // LzwSetBuffer() inits and sets buffer to use.
254 // Returns: 0 if ok, -1 if buffer not ok
256 void LzwSetBufferPointers(void *buff)
258 lzwBuffer = buff;
259 lzwDecodeStack = (uchar *) lzwBuffer;
260 lzwFdReadBuff = ((uchar *) lzwDecodeStack) + LZW_DECODE_STACK_SIZE;
261 lzwFdWriteBuff = ((uchar *) lzwFdReadBuff) + LZW_FD_READ_BUFF_SIZE;
262 lzwCodeValue = (tLzwCodeValue *) (((uchar *) lzwFdWriteBuff) + LZW_FD_WRITE_BUFF_SIZE);
263 lzwPrefixCode = (tLzwPrefixCode *) (((uchar *) lzwCodeValue) + (LZW_TABLE_BUFF_SIZE * sizeof(tLzwCodeValue)));
264 lzwAppendChar = ((tLzwAppendChar *) lzwPrefixCode) + (LZW_TABLE_BUFF_SIZE * sizeof(tLzwPrefixCode));
267 int LzwSetBuffer(void *buff, long buffSize)
269 cAutoLock lock(g_LzwThreadLock);
270 // Check buffer size
272 if (buffSize < LZW_BUFF_SIZE)
274 Warning(("LzwSetBuffer: buffer too small!\n"));
275 return (-1);
278 // De-allocate current buffer if malloced
280 LzwTerm();
282 // Set buffer pointers
283 LzwSetBufferPointers(buff);
284 lzwBufferMalloced = FALSE;
286 return (0);
288 // ------------------------------------------------------------
290 // LzwMallocBuffer() allocates buffer with Malloc.
292 // Returns: 0 if success, -1 if error.
294 int LzwMallocBuffer()
296 cAutoLock lock(g_LzwThreadLock);
297 void *buff;
298 if ((lzwBuffer == NULL) || (!lzwBufferMalloced))
300 buff = Malloc(LZW_BUFF_SIZE);
301 if (buff == NULL)
303 Warning(("LzwMallocBuffer: failed to allocate buffers\n"));
304 return (-1);
306 else
308 LzwSetBuffer(buff, LZW_BUFF_SIZE);
309 lzwBufferMalloced = TRUE;
312 return (0);
314 // ------------------------------------------------------------
316 // LzwFreeBuffer() frees buffer.
318 void LzwFreeBuffer()
320 cAutoLock lock(g_LzwThreadLock);
321 if (lzwBufferMalloced)
323 Free(lzwBuffer);
324 lzwBuffer = NULL;
325 lzwBufferMalloced = FALSE;
328 // ------------------------------------------------------------
329 // COMPRESSION
330 // ------------------------------------------------------------
332 // LzwCompress() does lzw compression. It reads uncompressed bytes
333 // from an input source and outputs compressed bytes to an output
334 // destination. It returns the number of bytes the compressed data
335 // took up, or -1 if the compressed data size exceeds the allowed space.
337 // f_ScrCtrl = routine to call to control source data stream
338 // f_SrcGet = routine to call to get next input data byte
339 // srcLoc = source data "location", actual type undefined
340 // srcSize = size of source (input) data
341 // f_DestCtrl = routine to call to control destination data stream
342 // f_DestPut = routine to call to put next output data byte
343 // destLoc = dest data "location", actual type undefined
344 // destSizeMax = maximum allowed size of output data
346 // Returns: actual output compressed size, or -1 if exceeded outputSizeMax
347 // (in which case compression has been aborted)
349 // This macro is used to accumulate output codes into a bit buffer
350 // and call the destination put routine whenever more than 8 bits
351 // are available. If the output data size ever exceeds the alloted
352 // size, the source and destination are shut down and -1 is returned.
354 typedef struct
356 unsigned int next_code; // next available string code
357 unsigned int character; // current character read from source
358 unsigned int string_code; // current string compress code
359 unsigned int index; // index into string table
360 long lzwInputCharCount; // input character count
361 long lzwOutputSize; // current size of output
362 int lzwOutputBitCount; // current bit location in output
363 ulong lzwOutputBitBuffer; // 32-bit buffer holding output bits
364 } LzwC;
366 LzwC lzwc; // current compress state
368 #define LzwOutputCode(code) { \
369 lzwc.lzwOutputBitBuffer |= ((ulong) code) << (32-LZW_BITS-lzwc.lzwOutputBitCount); \
370 lzwc.lzwOutputBitCount += LZW_BITS; \
371 while (lzwc.lzwOutputBitCount >= 8) \
373 (*f_DestPut)(lzwc.lzwOutputBitBuffer >> 24); \
374 if (++lzwc.lzwOutputSize > destSizeMax) \
376 (*f_SrcCtrl)(srcLoc, END); \
377 (*f_DestCtrl)(destLoc, END); \
378 return -1L; \
380 lzwc.lzwOutputBitBuffer <<= 8; \
381 lzwc.lzwOutputBitCount -= 8; \
385 long LzwCompress( tLzwCompressCtrlFunc f_SrcCtrl,// func to control source
386 tLzwCompressSrcGetFunc f_SrcGet, // func to get bytes from source
387 long srcLoc, // source "location" (ptr, FILE *, etc.)
388 long srcSize, // size of source in bytes
389 void (*f_DestCtrl) (long destLoc, LzwCtrl ctrl), // func to control dest
390 void (*f_DestPut) (uchar b), // func to put bytes to dest
391 long destLoc, // dest "location" (ptr, FILE *, etc.)
392 long destSizeMax // max size of dest (or LZW_MAXSIZE)
396 cAutoLock lock(g_LzwThreadLock);
397 // If not already initialized, do it
399 if (lzwBuffer == NULL)
401 if (LzwMallocBuffer() < 0)
402 return (0);
405 // Set up for compress loop
407 lzwc.next_code = 256; // skip over real 256 char values
408 memset(lzwCodeValue, -1, sizeof(tLzwCodeValue) * LZW_TABLE_SIZE);
410 lzwc.lzwOutputSize = 0;
411 lzwc.lzwOutputBitCount = 0;
412 lzwc.lzwOutputBitBuffer = 0;
414 (*f_SrcCtrl) (srcLoc, BEGIN);
415 (*f_DestCtrl) (destLoc, BEGIN);
417 lzwc.string_code = (*f_SrcGet) ();
418 lzwc.lzwInputCharCount = 1;
420 // This is the main loop where it all happens. This loop runs until all of
421 // the input has been exhausted. Note that it stops adding codes to the
422 // table after all of the possible codes have been defined.
424 while (TRUE)
427 // Get next input char, if read all data then exit loop
429 lzwc.character = (*f_SrcGet) ();
430 if (lzwc.lzwInputCharCount++ >= srcSize)
431 break;
433 // See if string is in string table. If it is, get the code value.
435 lzwc.index = LzwFindMatch(lzwc.string_code, lzwc.character);
436 if (lzwCodeValue[lzwc.index] != -1)
437 lzwc.string_code = lzwCodeValue[lzwc.index];
439 // Else if string not in string table, try to add it.
441 else
443 if (lzwc.next_code <= MAX_CODE)
445 lzwCodeValue[lzwc.index] = lzwc.next_code++;
446 lzwPrefixCode[lzwc.index] = lzwc.string_code;
447 lzwAppendChar[lzwc.index] = lzwc.character;
448 LzwOutputCode(lzwc.string_code);
449 lzwc.string_code = lzwc.character;
452 // Else if table is full and has been for a while, flush it, and drain
453 // the code value table too.
455 else if (lzwc.next_code > MAX_CODE + FLUSH_PAUSE)
457 LzwOutputCode(lzwc.string_code);
458 LzwOutputCode(FLUSH_CODE);
459 memset(lzwCodeValue, -1, sizeof(tLzwCodeValue) * LZW_TABLE_SIZE);
460 lzwc.string_code = lzwc.character;
461 lzwc.next_code = 256;
464 // Else if can't add but table not full, just output the code.
466 else
468 lzwc.next_code++;
469 LzwOutputCode(lzwc.string_code);
470 lzwc.string_code = lzwc.character;
475 // Done with processing loop, output current code, end-of-data code,
476 // and a final 0 to flush the buffer.
478 LzwOutputCode(lzwc.string_code);
479 LzwOutputCode(MAX_VALUE);
480 LzwOutputCode(0);
482 // Shut down source and destination and return size of output
484 (*f_SrcCtrl) (srcLoc, END);
485 (*f_DestCtrl) (destLoc, END);
487 return (lzwc.lzwOutputSize);
489 // -----------------------------------------------------------
490 // EXPANSION
491 // -----------------------------------------------------------
493 // LzwExpand() does lzw expansion. It reads compressed bytes
494 // from an input source and outputs uncompressed bytes to an output
495 // destination. It returns the number of bytes the uncompressed data
496 // took up.
498 // f_ScrCtrl = routine to call to control source data stream
499 // f_SrcGet = routine to call to get next input data byte
500 // srcLoc = source data "location", actual type undefined
501 // f_DestCtrl = routine to call to control destination data stream
502 // f_DestPut = routine to call to put next output data byte
503 // destLoc = dest data "location", actual type undefined
504 // destSkip = # bytes of output to skip over before storing
505 // destSize = # bytes of output to store (if 0, everything)
507 // Returns: # bytes in uncompressed output
509 // need to change LZW_PARTIAL_STATE_SIZE if this changes in size!
510 typedef struct
512 int lzwInputBitCount;
513 ulong lzwInputBitBuffer;
514 unsigned int next_code; // next available string code
515 unsigned int new_code; // next code from source
516 unsigned int old_code; // last code gotten from source
517 unsigned int character; // current char for string stack
518 uchar *string; // used to output string in reverse order
519 long outputSize; // size of uncompressed data
520 long destSkip; // # bytes to skip over
521 long destSize; // destination size
522 uchar *last_buffer; // for partial expand, covers old buffer state
523 uchar *curr_buffer; // for partial, the buffer we are using for globals
524 void *in_state; // for partial, pointer for the input installable func to use to save state
525 void *out_state; // for partial, pointer for the output installable func to use to save state
526 } LzwE;
528 LzwE lzwe; // current expand state
530 ///////////////////////////////////////////////////////////////////////////////
532 static unsigned int LzwInputCodeGeneric(tLzwCompressSrcGetFunc f_SrcGet, LzwE * plzwe)
534 unsigned int return_value;
535 while (plzwe->lzwInputBitCount <= 24)
537 plzwe->lzwInputBitBuffer |= ((ulong) (*f_SrcGet) ()) << (24 - plzwe->lzwInputBitCount);
538 plzwe->lzwInputBitCount += 8;
540 return_value = plzwe->lzwInputBitBuffer >> (32 - LZW_BITS);
542 plzwe->lzwInputBitBuffer <<= LZW_BITS;
543 plzwe->lzwInputBitCount -= LZW_BITS;
545 return (return_value);
548 ///////////////////////////////////////
550 static long LzwExpandGeneric(tLzwCompressCtrlFunc f_SrcCtrl, // func to control source
551 tLzwCompressSrcGetFunc f_SrcGet, // func to get bytes from source
552 long srcLoc, // source "location" (ptr, FILE *, etc.)
553 tLzwCompressCtrlFunc f_DestCtrl, // func to control dest
554 tLzwCompressDestPutFunc f_DestPut,// func to put bytes to dest
555 long destLoc // dest "location" (ptr, FILE *, etc.)
558 // Notify the control routines
560 (*f_SrcCtrl) (srcLoc, BEGIN);
561 (*f_DestCtrl) (destLoc, BEGIN);
563 // Get first code & output it.
565 lzwe.old_code = LzwInputCodeGeneric(f_SrcGet, &lzwe);
566 lzwe.character = lzwe.old_code;
568 if (--lzwe.destSkip < 0)
570 (*f_DestPut) (lzwe.old_code);
571 lzwe.outputSize++;
574 // This is the expansion loop. It reads in codes from the source until
575 // it sees the special end-of-data code.
577 while ((lzwe.new_code = LzwInputCodeGeneric(f_SrcGet, &lzwe)) != MAX_VALUE)
580 // If flush code, flush the string table & restart from top of loop
582 if (lzwe.new_code == FLUSH_CODE)
584 lzwe.next_code = 256;
585 lzwe.old_code = LzwInputCodeGeneric(f_SrcGet, &lzwe);
586 lzwe.character = lzwe.old_code;
587 if (--lzwe.destSkip < 0)
589 if (lzwe.outputSize++ >= lzwe.destSize)
590 break;
591 (*f_DestPut) (lzwe.old_code);
593 continue;
596 // Check for the special STRING+CHARACTER+STRING+CHARACTER+STRING, which
597 // generates an undefined code. Handle it by decoding the last code,
598 // adding a single character to the end of the decode string.
600 if (lzwe.new_code >= lzwe.next_code)
602 *lzwDecodeStack = lzwe.character;
603 lzwe.string = LzwDecodeString(lzwDecodeStack + 1, lzwe.old_code);
606 // Otherwise we do a straight decode of the new code.
608 else
610 lzwe.string = LzwDecodeString(lzwDecodeStack, lzwe.new_code);
613 // Output the decode string to the destination, in reverse order.
615 lzwe.character = *lzwe.string;
616 while (lzwe.string >= lzwDecodeStack)
618 if (--lzwe.destSkip < 0)
620 if (lzwe.outputSize++ >= lzwe.destSize)
621 goto DONE_EXPAND;
622 (*f_DestPut) (*lzwe.string);
624 --lzwe.string;
627 // If possible, add a new code to the string table.
629 if (lzwe.next_code <= MAX_CODE)
631 lzwPrefixCode[lzwe.next_code] = lzwe.old_code;
632 lzwAppendChar[lzwe.next_code] = lzwe.character;
633 lzwe.next_code++;
635 lzwe.old_code = lzwe.new_code;
638 // When break out of expansion loop, shut down source & dest & return size.
640 DONE_EXPAND:
641 (*f_SrcCtrl) (srcLoc, END);
642 (*f_DestCtrl) (destLoc, END);
644 return (lzwe.outputSize);
647 ///////////////////////////////////////////////////////////////////////////////
649 static uchar *lzwBuffDestPtr;
650 static int lzwFdSrc;
651 static int lzwReadBuffIndex;
653 static unsigned int LzwInputCodeFileToMemory()
655 unsigned int return_value;
656 register ulong c;
657 register ulong temp;
658 while (lzwe.lzwInputBitCount <= 24)
660 if (lzwReadBuffIndex != LZW_FD_TO_MEM_READ_BUFF_SIZE)
662 c = lzwFdReadBuff[lzwReadBuffIndex];
663 temp = 24 - lzwe.lzwInputBitCount;
664 lzwReadBuffIndex++;
665 lzwe.lzwInputBitBuffer |= (c << temp);
666 lzwe.lzwInputBitCount += 8;
668 else
670 //LZWTimerStop(0);
671 read(lzwFdSrc, lzwFdReadBuff, LZW_FD_TO_MEM_READ_BUFF_SIZE);
672 //LZWTimerStart();
673 lzwReadBuffIndex = 0;
676 return_value = lzwe.lzwInputBitBuffer >> (32 - LZW_BITS);
678 lzwe.lzwInputBitCount -= LZW_BITS;
679 lzwe.lzwInputBitBuffer <<= LZW_BITS;
681 return (return_value);
684 ///////////////////////////////////////
686 static long LzwExpandFileToMemory(long srcLoc, long destLoc)
689 lzwFdSrc = (int) srcLoc;
690 lzwReadBuffIndex = LZW_FD_TO_MEM_READ_BUFF_SIZE;
691 lzwBuffDestPtr = (uchar *) destLoc;
693 // Get first code & output it.
694 lzwe.old_code = LzwInputCodeFileToMemory();
696 lzwe.character = lzwe.old_code;
698 if (--lzwe.destSkip < 0)
700 *lzwBuffDestPtr++ = lzwe.old_code;
701 lzwe.outputSize++;
704 // This is the expansion loop. It reads in codes from the source until
705 // it sees the special end-of-data code.
707 while ((lzwe.new_code = LzwInputCodeFileToMemory()) != MAX_VALUE)
709 // If flush code, flush the string table & restart from top of loop.
710 // Also check for the special STRING+CHARACTER+STRING+CHARACTER+STRING,
711 // which generates an undefined code. Handle it by decoding the last code,
712 // adding a single character to the end of the decode string.
714 if (lzwe.new_code >= lzwe.next_code)
716 if (lzwe.new_code == FLUSH_CODE)
718 lzwe.next_code = 256;
719 lzwe.old_code = LzwInputCodeFileToMemory();
720 lzwe.character = lzwe.old_code;
721 if (--lzwe.destSkip < 0)
723 if (lzwe.outputSize++ >= lzwe.destSize)
724 break;
725 *lzwBuffDestPtr++ = lzwe.old_code;
727 continue;
729 else
731 *lzwDecodeStack = lzwe.character;
732 lzwe.string = LzwDecodeString(lzwDecodeStack + 1, lzwe.old_code);
736 // Otherwise we do a straight decode of the new code.
738 else
740 if (lzwe.new_code < 256)
742 lzwe.string = lzwDecodeStack;
743 *lzwe.string = lzwe.new_code;
745 else
746 lzwe.string = LzwDecodeString(lzwDecodeStack, lzwe.new_code);
749 // Output the decode string to the destination, in reverse order.
751 lzwe.character = *lzwe.string;
752 while (lzwe.string >= lzwDecodeStack)
754 if (--lzwe.destSkip < 0)
756 if (lzwe.outputSize++ >= lzwe.destSize)
757 goto DONE_EXPAND;
758 *lzwBuffDestPtr++ = *lzwe.string;
760 --lzwe.string;
763 // If possible, add a new code to the string table.
765 if (lzwe.next_code <= MAX_CODE)
767 lzwPrefixCode[lzwe.next_code] = lzwe.old_code;
768 lzwAppendChar[lzwe.next_code] = lzwe.character;
769 lzwe.next_code++;
771 lzwe.old_code = lzwe.new_code;
774 // When break out of expansion loop, return size.
776 DONE_EXPAND:
778 return (lzwe.outputSize);
781 ///////////////////////////////////////
783 long LzwExpand( tLzwCompressCtrlFunc f_SrcCtrl, // func to control source
784 tLzwCompressSrcGetFunc f_SrcGet, // func to get bytes from source
785 long srcLoc, // source "location" (ptr, FILE *, etc.)
786 tLzwCompressCtrlFunc f_DestCtrl, // func to control dest
787 tLzwCompressDestPutFunc f_DestPut,// func to put bytes to dest
788 long destLoc, // dest "location" (ptr, FILE *, etc.)
789 long destSkip, // # dest bytes to skip over (or 0)
790 long destSize // # dest bytes to capture (if 0, all)
793 cAutoLock lock(g_LzwThreadLock);
794 long retVal;
795 LZWTimerStart();
797 // If not already initialized, do it
799 if (lzwBuffer == NULL)
801 if (LzwMallocBuffer() < 0)
802 return (0);
805 // Set up for expansion loop
807 lzwe.lzwInputBitCount = 0;
808 lzwe.lzwInputBitBuffer = 0;
809 lzwe.next_code = 256; // next available char after regular 256 chars
810 lzwe.outputSize = 0;
811 lzwe.destSkip = destSkip;
812 lzwe.destSize = destSize ? destSize : LZW_MAXSIZE;
814 if (f_SrcCtrl == LzwFdSrcCtrl && f_SrcGet == LzwFdSrcGet &&
815 f_DestCtrl == LzwBuffDestCtrl && f_DestPut == LzwBuffDestPut)
817 retVal = LzwExpandFileToMemory(srcLoc, destLoc);
819 else
821 retVal = LzwExpandGeneric(f_SrcCtrl, f_SrcGet, srcLoc,
822 f_DestCtrl, f_DestPut, destLoc);
824 LZWTimerStop(lzwe.outputSize)
826 return retVal;
829 ///////////////////////////////////////////////////////////////////////////////
831 // Okay, clearly there is a lot of code overlay between this and LzwExpand but for
832 // performance reasons in LzwExpand I didn't want to attempt to make them wholly
833 // integrated -- Xemu 5/1/96
834 #pragma off(unreferenced)
835 long LzwExpandPartial( tLzwCompressCtrlFunc f_SrcCtrl, // func to control source
836 tLzwCompressSrcGetFunc f_SrcGet, // func to get bytes from source
837 long srcLoc, // source "location" (ptr, FILE *, etc.)
838 tLzwCompressCtrlFunc f_DestCtrl, // func to control dest
839 tLzwCompressDestPutFunc f_DestPut, // func to put bytes to dest
840 long destLoc, // dest "location" (ptr, FILE *, etc.)
841 long destSkip, // # dest bytes to skip over (or 0)
842 long destSize, // # dest bytes to capture (if 0, all)
843 void *state, // internal state structure
844 long cycles // how many iterations of actual processing
847 cAutoLock lock(g_LzwThreadLock);
848 LzwE *plzwe;
849 int count = 0;
850 plzwe = (LzwE *) state;
851 LzwSetBufferPointers(plzwe->curr_buffer);
853 // notify IO installables
854 (*f_SrcCtrl) ((long) plzwe, RESUME);
855 (*f_DestCtrl) ((long) plzwe, RESUME);
857 // This is the expansion loop. It reads in codes from the source until
858 // it sees the special end-of-data code.
860 plzwe->new_code = LzwInputCodeGeneric(f_SrcGet, plzwe);
861 while ((plzwe->new_code != MAX_VALUE) && (count < cycles))
864 // If flush code, flush the string table & restart from top of loop
866 if (plzwe->new_code == FLUSH_CODE)
868 plzwe->next_code = 256;
869 plzwe->old_code = LzwInputCodeGeneric(f_SrcGet, plzwe);
870 plzwe->character = plzwe->old_code;
871 if (--plzwe->destSkip < 0)
873 if (plzwe->outputSize++ >= plzwe->destSize)
874 goto DONE_EXPAND;
875 // break;
876 (*f_DestPut) (plzwe->old_code);
878 goto LOOP_BOTTOM;
881 // Check for the special STRING+CHARACTER+STRING+CHARACTER+STRING, which
882 // generates an undefined code. Handle it by decoding the last code,
883 // adding a single character to the end of the decode string.
885 if (plzwe->new_code >= plzwe->next_code)
887 *lzwDecodeStack = plzwe->character;
888 plzwe->string = LzwDecodeString(lzwDecodeStack + 1, plzwe->old_code);
891 // Otherwise we do a straight decode of the new code.
893 else
895 plzwe->string = LzwDecodeString(lzwDecodeStack, plzwe->new_code);
898 // Output the decode string to the destination, in reverse order.
900 plzwe->character = *plzwe->string;
901 while (plzwe->string >= lzwDecodeStack)
903 if (--plzwe->destSkip < 0)
905 if (plzwe->outputSize++ >= plzwe->destSize)
906 goto DONE_EXPAND;
907 (*f_DestPut) (*plzwe->string);
909 --plzwe->string;
912 // If possible, add a new code to the string table.
914 if (plzwe->next_code <= MAX_CODE)
916 lzwPrefixCode[plzwe->next_code] = plzwe->old_code;
917 lzwAppendChar[plzwe->next_code] = plzwe->character;
918 plzwe->next_code++;
920 plzwe->old_code = plzwe->new_code;
921 LOOP_BOTTOM:
922 count++;
923 if (count < cycles)
924 plzwe->new_code = LzwInputCodeGeneric(f_SrcGet, plzwe);
927 if (plzwe->new_code != MAX_VALUE) // then we are still iterating, cause otherwise this is false or
928 // we jumped down to DONE_EXPAND
930 // notify IO installables so that we can restore again later
931 (*f_SrcCtrl) ((long) plzwe, SUSPEND);
932 (*f_DestCtrl) ((long) plzwe, SUSPEND);
933 LzwSetBufferPointers(plzwe->last_buffer);
934 return (0);
937 // When break out of expansion loop, shut down source & dest & return size.
939 DONE_EXPAND:
941 (*f_SrcCtrl) (srcLoc, END);
942 (*f_DestCtrl) (destLoc, END);
944 LzwSetBufferPointers(plzwe->last_buffer);
946 return (plzwe->outputSize);
948 #pragma on(unreferenced)
950 void LzwExpandPartialStart(
951 tLzwCompressCtrlFunc f_SrcCtrl, // func to control source
952 tLzwCompressSrcGetFunc f_SrcGet, // func to get bytes from source
953 long srcLoc, // source "location" (ptr, FILE *, etc.)
954 tLzwCompressCtrlFunc f_DestCtrl, // func to control dest
955 tLzwCompressDestPutFunc f_DestPut, // func to put bytes to dest
956 long destLoc, // dest "location" (ptr, FILE *, etc.)
957 long destSkip, // # dest bytes to skip over (or 0)
958 long destSize, // # dest bytes to capture (if 0, all)
959 void *state, // internal state structure
960 uchar * buffer // buffer for storing "global" lzw state
963 cAutoLock lock(g_LzwThreadLock);
964 LzwE *plzwe;
965 plzwe = (LzwE *) state;
967 // Check initialization
969 if (buffer == NULL)
971 Warning(("LzwExpandPartialStart: cannot do partial expand with NULL buffer!\n"));
972 return;
975 plzwe->last_buffer = (uchar *) lzwBuffer;
976 plzwe->curr_buffer = buffer;
977 LzwSetBufferPointers(buffer);
979 // Set up for expansion loop
981 plzwe->lzwInputBitCount = 0;
982 plzwe->lzwInputBitBuffer = 0;
983 plzwe->next_code = 256; // next available char after regular 256 chars
984 plzwe->outputSize = 0;
985 plzwe->destSkip = destSkip;
986 plzwe->destSize = destSize ? destSize : LZW_MAXSIZE;
988 // Notify the control routines
990 (*f_SrcCtrl) (srcLoc, BEGIN);
991 (*f_DestCtrl) (destLoc, BEGIN);
993 // Get first code & output it.
995 plzwe->old_code = LzwInputCodeGeneric(f_SrcGet, plzwe);
996 plzwe->character = plzwe->old_code;
998 if (--plzwe->destSkip < 0)
1000 (*f_DestPut) (plzwe->old_code);
1001 plzwe->outputSize++;
1004 (*f_SrcCtrl) ((long) plzwe, SUSPEND);
1005 (*f_DestCtrl) ((long) plzwe, SUSPEND);
1007 LzwSetBufferPointers(plzwe->last_buffer);
1009 // --------------------------------------------------------------
1010 // STANDARD INPUT SOURCES
1011 // --------------------------------------------------------------
1013 // LzwBuffSrcCtrl() and LzwBuffSrcGet() implement a memory buffer
1014 // source for lzw compression and expansion.
1016 static uchar *lzwBuffSrcPtr;
1018 void LzwBuffSrcCtrl(long srcLoc, LzwCtrl ctrl)
1020 LzwE *lzwep;
1021 if (ctrl == BEGIN)
1022 lzwBuffSrcPtr = (uchar *) srcLoc;
1023 else if (ctrl == SUSPEND)
1025 lzwep = (LzwE *) srcLoc;
1026 lzwep->in_state = (void *) lzwBuffSrcPtr;
1028 else if (ctrl == RESUME)
1030 lzwep = (LzwE *) srcLoc;
1031 lzwBuffSrcPtr = (uchar *) lzwep->in_state;
1035 uchar LzwBuffSrcGet()
1037 return (*lzwBuffSrcPtr++);
1039 // ---------------------------------------------------------------
1041 // LzwFdSrcCtrl() and LzwFdSrcGet() implement a file-descriptor
1042 // source (fd = open()) for lzw compression and expansion.
1044 void LzwFdSrcCtrl(long srcLoc, LzwCtrl ctrl)
1046 if (ctrl == BEGIN)
1048 lzwFdSrc = (int) srcLoc;
1049 lzwReadBuffIndex = LZW_FD_READ_BUFF_SIZE;
1053 uchar LzwFdSrcGet()
1055 if (lzwReadBuffIndex == LZW_FD_READ_BUFF_SIZE)
1057 //LZWTimerStop(0);
1058 read(lzwFdSrc, lzwFdReadBuff, LZW_FD_READ_BUFF_SIZE);
1059 //LZWTimerStart();
1060 lzwReadBuffIndex = 0;
1062 return (lzwFdReadBuff[lzwReadBuffIndex++]);
1064 // ---------------------------------------------------------------
1066 // LzwFpSrcCtrl() and LzwFpSrcGet() implement a file-ptr source
1067 // (fp = fopen()) for lzw compression and expansion.
1069 static FILE *lzwFpSrc;
1071 void LzwFpSrcCtrl(long srcLoc, LzwCtrl ctrl)
1073 if (ctrl == BEGIN)
1074 lzwFpSrc = (FILE *) srcLoc;
1077 uchar LzwFpSrcGet()
1079 return (fgetc(lzwFpSrc));
1081 // ---------------------------------------------------------------
1082 // STANDARD OUTPUT SOURCES
1083 // ---------------------------------------------------------------
1085 // LzwBuffDestCtrl() and LzwBuffDestPut() implement a memory
1086 // buffer destination for lzw compression and expansion.
1088 void LzwBuffDestCtrl(long destLoc, LzwCtrl ctrl)
1090 LzwE *lzwep;
1091 if (ctrl == BEGIN)
1092 lzwBuffDestPtr = (uchar *) destLoc;
1093 else if (ctrl == SUSPEND)
1095 lzwep = (LzwE *) destLoc;
1096 lzwep->out_state = (void *) lzwBuffDestPtr;
1098 else if (ctrl == RESUME)
1100 lzwep = (LzwE *) destLoc;
1101 lzwBuffDestPtr = (uchar *) lzwep->out_state;
1106 void LzwBuffDestPut(uchar b)
1108 *lzwBuffDestPtr++ = b;
1110 // ---------------------------------------------------------------
1112 // LzwFdDestCtrl() and LzwFdDestPut() implement a file-descriptor
1113 // destination (fd = open()) for lzw compression and expansion.
1115 static int lzwFdDest;
1116 static int lzwWriteBuffIndex;
1118 void LzwFdDestCtrl(long destLoc, LzwCtrl ctrl)
1120 if (ctrl == BEGIN)
1122 lzwFdDest = (int) destLoc;
1123 lzwWriteBuffIndex = 0;
1125 else if (ctrl == END)
1127 if (lzwWriteBuffIndex)
1128 write(lzwFdDest, lzwFdWriteBuff, lzwWriteBuffIndex);
1132 void LzwFdDestPut(uchar b)
1134 lzwFdWriteBuff[lzwWriteBuffIndex++] = b;
1135 if (lzwWriteBuffIndex == LZW_FD_WRITE_BUFF_SIZE)
1137 write(lzwFdDest, lzwFdWriteBuff, LZW_FD_WRITE_BUFF_SIZE);
1138 lzwWriteBuffIndex = 0;
1141 // ---------------------------------------------------------------
1143 // LzwFpDestCtrl() and LzwFpDestPut() implement a file-ptr destination
1144 // (fp = fopen()) for lzw compression and expansion.
1146 static FILE *lzwFpDest;
1148 void LzwFpDestCtrl(long destLoc, LzwCtrl ctrl)
1150 if (ctrl == BEGIN)
1151 lzwFpDest = (FILE *) destLoc;
1154 void LzwFpDestPut(uchar b)
1156 fputc(b, lzwFpDest);
1158 // ---------------------------------------------------------------
1160 // LzwNullDestCtrl() and LzwNullDestPut() implement a bit-bucket
1161 // destination for lzw compression and expansion. Used to size
1162 // results of compression or expansion.
1164 #pragma off(unreferenced);
1166 void LzwNullDestCtrl(long destLoc, LzwCtrl ctrl)
1170 void LzwNullDestPut(uchar b)
1173 #pragma on(unreferenced);
1175 // -----------------------------------------------------------
1176 // INTERNAL ROUTINES - COMPRESSION
1177 // -----------------------------------------------------------
1179 // LzwFindMatch() is the hashing routine. It tries to find a match
1180 // for the prefix+char string in the string table. If it finds it,
1181 // the index is returned. If the string is not found, the first available
1182 // index in the string table is returned instead.
1184 // hash_prefix = prefix to this code
1185 // hash_character = new character
1187 // Returns: string table index
1189 int LzwFindMatch(int hash_prefix, unsigned int hash_character)
1191 int index;
1192 int offset;
1193 index = (hash_character << HASHING_SHIFT) ^ hash_prefix;
1194 if (index == 0)
1195 offset = 1;
1196 else
1197 offset = LZW_TABLE_SIZE - index;
1198 while (1)
1200 if (lzwCodeValue[index] == -1)
1201 return (index);
1202 if ((lzwPrefixCode[index] == hash_prefix) &&
1203 (lzwAppendChar[index] == hash_character))
1204 return (index);
1205 index -= offset;
1206 if (index < 0)
1207 index += LZW_TABLE_SIZE;
1210 // ------------------------------------------------------------
1211 // INTERNAL ROUTINES - EXPANSION
1212 // ------------------------------------------------------------
1214 // LzwDecodeString() decodes a string from the string table,
1215 // storing it in a buffer. The buffer can then be output in
1216 // reverse order by the expansion program.
1218 uchar *LzwDecodeString(uchar * buffer, unsigned int code)
1220 #if defined(DBG_ON) || defined(ONEOPT)
1221 int i = 0;
1222 #endif
1224 while (code > 255)
1226 *buffer++ = lzwAppendChar[code];
1227 code = lzwPrefixCode[code];
1229 #if defined(DBG_ON) || defined(ONEOPT)
1230 if (i++ >= 4094)
1231 CriticalMsg("LzwDecodeString: Fatal error during code expansion");
1232 #endif
1235 *buffer = code;
1236 return (buffer);