Commit | Line | Data |
---|---|---|
b47c832e RR |
1 | /* $Header$ */ |
2 | ||
3 | /* | |
4 | * Copyright (c) 1988-1997 Sam Leffler | |
5 | * Copyright (c) 1991-1997 Silicon Graphics, Inc. | |
6 | * | |
615a9936 | 7 | * Permission to use, copy, modify, distribute, and sell this software and |
b47c832e RR |
8 | * its documentation for any purpose is hereby granted without fee, provided |
9 | * that (i) the above copyright notices and this permission notice appear in | |
10 | * all copies of the software and related documentation, and (ii) the names of | |
11 | * Sam Leffler and Silicon Graphics may not be used in any advertising or | |
12 | * publicity relating to the software without the specific, prior written | |
13 | * permission of Sam Leffler and Silicon Graphics. | |
615a9936 DW |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, | |
16 | * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY | |
17 | * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | |
18 | * | |
b47c832e RR |
19 | * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR |
20 | * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, | |
21 | * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
615a9936 DW |
22 | * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF |
23 | * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE | |
b47c832e RR |
24 | * OF THIS SOFTWARE. |
25 | */ | |
26 | ||
27 | #include "tiffiop.h" | |
28 | #ifdef LZW_SUPPORT | |
29 | /* | |
30 | * TIFF Library. | |
31 | * Rev 5.0 Lempel-Ziv & Welch Compression Support | |
32 | * | |
33 | * This code is derived from the compress program whose code is | |
34 | * derived from software contributed to Berkeley by James A. Woods, | |
35 | * derived from original work by Spencer Thomas and Joseph Orost. | |
36 | * | |
37 | * The original Berkeley copyright notice appears below in its entirety. | |
38 | */ | |
f6bcfd97 BP |
39 | /* Watcom C++ (or its make utility) doesn't like long filenames */ |
40 | #ifdef __WATCOMC__ | |
41 | #include "tif_pred.h" | |
42 | #else | |
b47c832e | 43 | #include "tif_predict.h" |
f6bcfd97 | 44 | #endif |
b47c832e RR |
45 | |
46 | #include <assert.h> | |
47 | #include <stdio.h> | |
48 | ||
49 | /* | |
50 | * NB: The 5.0 spec describes a different algorithm than Aldus | |
51 | * implements. Specifically, Aldus does code length transitions | |
52 | * one code earlier than should be done (for real LZW). | |
53 | * Earlier versions of this library implemented the correct | |
54 | * LZW algorithm, but emitted codes in a bit order opposite | |
55 | * to the TIFF spec. Thus, to maintain compatibility w/ Aldus | |
56 | * we interpret MSB-LSB ordered codes to be images written w/ | |
57 | * old versions of this library, but otherwise adhere to the | |
58 | * Aldus "off by one" algorithm. | |
59 | * | |
60 | * Future revisions to the TIFF spec are expected to "clarify this issue". | |
61 | */ | |
62 | #define LZW_COMPAT /* include backwards compatibility code */ | |
63 | /* | |
64 | * Each strip of data is supposed to be terminated by a CODE_EOI. | |
65 | * If the following #define is included, the decoder will also | |
66 | * check for end-of-strip w/o seeing this code. This makes the | |
67 | * library more robust, but also slower. | |
68 | */ | |
69 | #define LZW_CHECKEOS /* include checks for strips w/o EOI code */ | |
70 | ||
71 | #define MAXCODE(n) ((1L<<(n))-1) | |
72 | /* | |
73 | * The TIFF spec specifies that encoded bit | |
74 | * strings range from 9 to 12 bits. | |
75 | */ | |
76 | #define BITS_MIN 9 /* start with 9 bits */ | |
77 | #define BITS_MAX 12 /* max of 12 bit strings */ | |
78 | /* predefined codes */ | |
79 | #define CODE_CLEAR 256 /* code to clear string table */ | |
80 | #define CODE_EOI 257 /* end-of-information code */ | |
81 | #define CODE_FIRST 258 /* first free code entry */ | |
82 | #define CODE_MAX MAXCODE(BITS_MAX) | |
83 | #define HSIZE 9001L /* 91% occupancy */ | |
84 | #define HSHIFT (13-8) | |
85 | #ifdef LZW_COMPAT | |
86 | /* NB: +1024 is for compatibility with old files */ | |
87 | #define CSIZE (MAXCODE(BITS_MAX)+1024L) | |
88 | #else | |
89 | #define CSIZE (MAXCODE(BITS_MAX)+1L) | |
90 | #endif | |
91 | ||
92 | /* | |
93 | * State block for each open TIFF file using LZW | |
94 | * compression/decompression. Note that the predictor | |
95 | * state block must be first in this data structure. | |
96 | */ | |
97 | typedef struct { | |
98 | TIFFPredictorState predict; /* predictor super class */ | |
99 | ||
100 | u_short nbits; /* # of bits/code */ | |
101 | u_short maxcode; /* maximum code for lzw_nbits */ | |
102 | u_short free_ent; /* next free entry in hash table */ | |
103 | long nextdata; /* next bits of i/o */ | |
104 | long nextbits; /* # of valid bits in lzw_nextdata */ | |
105 | } LZWBaseState; | |
106 | ||
107 | #define lzw_nbits base.nbits | |
108 | #define lzw_maxcode base.maxcode | |
109 | #define lzw_free_ent base.free_ent | |
110 | #define lzw_nextdata base.nextdata | |
111 | #define lzw_nextbits base.nextbits | |
112 | ||
113 | /* | |
114 | * Decoding-specific state. | |
115 | */ | |
116 | typedef struct code_ent { | |
117 | struct code_ent *next; | |
118 | u_short length; /* string len, including this token */ | |
119 | u_char value; /* data value */ | |
120 | u_char firstchar; /* first token of string */ | |
121 | } code_t; | |
122 | ||
615a9936 | 123 | typedef int (LINKAGEMODE *decodeFunc)(TIFF*, tidata_t, tsize_t, tsample_t); |
b47c832e RR |
124 | |
125 | typedef struct { | |
126 | LZWBaseState base; | |
127 | long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ | |
128 | long dec_restart; /* restart count */ | |
129 | #ifdef LZW_CHECKEOS | |
130 | long dec_bitsleft; /* available bits in raw data */ | |
131 | #endif | |
132 | decodeFunc dec_decode; /* regular or backwards compatible */ | |
133 | code_t* dec_codep; /* current recognized code */ | |
134 | code_t* dec_oldcodep; /* previously recognized code */ | |
135 | code_t* dec_free_entp; /* next free entry */ | |
136 | code_t* dec_maxcodep; /* max available entry */ | |
137 | code_t* dec_codetab; /* kept separate for small machines */ | |
138 | } LZWDecodeState; | |
139 | ||
140 | /* | |
141 | * Encoding-specific state. | |
142 | */ | |
143 | typedef uint16 hcode_t; /* codes fit in 16 bits */ | |
144 | typedef struct { | |
145 | long hash; | |
146 | hcode_t code; | |
147 | } hash_t; | |
148 | ||
149 | typedef struct { | |
150 | LZWBaseState base; | |
151 | int enc_oldcode; /* last code encountered */ | |
152 | long enc_checkpoint; /* point at which to clear table */ | |
153 | #define CHECK_GAP 10000 /* enc_ratio check interval */ | |
154 | long enc_ratio; /* current compression ratio */ | |
155 | long enc_incount; /* (input) data bytes encoded */ | |
156 | long enc_outcount; /* encoded (output) bytes */ | |
157 | tidata_t enc_rawlimit; /* bound on tif_rawdata buffer */ | |
158 | hash_t* enc_hashtab; /* kept separate for small machines */ | |
159 | } LZWEncodeState; | |
160 | ||
161 | #define LZWState(tif) ((LZWBaseState*) (tif)->tif_data) | |
162 | #define DecoderState(tif) ((LZWDecodeState*) LZWState(tif)) | |
163 | #define EncoderState(tif) ((LZWEncodeState*) LZWState(tif)) | |
164 | ||
615a9936 | 165 | static int LINKAGEMODE LZWDecode(TIFF*, tidata_t, tsize_t, tsample_t); |
b47c832e | 166 | #ifdef LZW_COMPAT |
615a9936 | 167 | static int LINKAGEMODE LZWDecodeCompat(TIFF*, tidata_t, tsize_t, tsample_t); |
b47c832e RR |
168 | #endif |
169 | static void cl_hash(LZWEncodeState*); | |
170 | ||
171 | /* | |
172 | * LZW Decoder. | |
173 | */ | |
174 | ||
175 | #ifdef LZW_CHECKEOS | |
176 | /* | |
177 | * This check shouldn't be necessary because each | |
178 | * strip is suppose to be terminated with CODE_EOI. | |
179 | */ | |
180 | #define NextCode(_tif, _sp, _bp, _code, _get) { \ | |
181 | if ((_sp)->dec_bitsleft < nbits) { \ | |
182 | TIFFWarning(_tif->tif_name, \ | |
183 | "LZWDecode: Strip %d not terminated with EOI code", \ | |
184 | _tif->tif_curstrip); \ | |
185 | _code = CODE_EOI; \ | |
186 | } else { \ | |
187 | _get(_sp,_bp,_code); \ | |
188 | (_sp)->dec_bitsleft -= nbits; \ | |
189 | } \ | |
190 | } | |
191 | #else | |
192 | #define NextCode(tif, sp, bp, code, get) get(sp, bp, code) | |
193 | #endif | |
194 | ||
195 | static int | |
196 | LZWSetupDecode(TIFF* tif) | |
197 | { | |
198 | LZWDecodeState* sp = DecoderState(tif); | |
199 | static const char module[] = " LZWSetupDecode"; | |
200 | int code; | |
201 | ||
202 | assert(sp != NULL); | |
203 | if (sp->dec_codetab == NULL) { | |
204 | sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); | |
205 | if (sp->dec_codetab == NULL) { | |
206 | TIFFError(module, "No space for LZW code table"); | |
207 | return (0); | |
208 | } | |
209 | /* | |
210 | * Pre-load the table. | |
211 | */ | |
212 | for (code = 255; code >= 0; code--) { | |
213 | sp->dec_codetab[code].value = code; | |
214 | sp->dec_codetab[code].firstchar = code; | |
215 | sp->dec_codetab[code].length = 1; | |
216 | sp->dec_codetab[code].next = NULL; | |
217 | } | |
218 | } | |
219 | return (1); | |
220 | } | |
221 | ||
222 | /* | |
223 | * Setup state for decoding a strip. | |
224 | */ | |
225 | static int | |
226 | LZWPreDecode(TIFF* tif, tsample_t s) | |
227 | { | |
228 | LZWDecodeState *sp = DecoderState(tif); | |
229 | ||
230 | (void) s; | |
231 | assert(sp != NULL); | |
232 | /* | |
233 | * Check for old bit-reversed codes. | |
234 | */ | |
235 | if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) { | |
236 | #ifdef LZW_COMPAT | |
237 | if (!sp->dec_decode) { | |
238 | TIFFWarning(tif->tif_name, | |
239 | "Old-style LZW codes, convert file"); | |
240 | /* | |
241 | * Override default decoding methods with | |
242 | * ones that deal with the old coding. | |
243 | * Otherwise the predictor versions set | |
244 | * above will call the compatibility routines | |
245 | * through the dec_decode method. | |
246 | */ | |
247 | tif->tif_decoderow = LZWDecodeCompat; | |
248 | tif->tif_decodestrip = LZWDecodeCompat; | |
249 | tif->tif_decodetile = LZWDecodeCompat; | |
250 | /* | |
251 | * If doing horizontal differencing, must | |
252 | * re-setup the predictor logic since we | |
253 | * switched the basic decoder methods... | |
254 | */ | |
255 | (*tif->tif_setupdecode)(tif); | |
256 | sp->dec_decode = LZWDecodeCompat; | |
257 | } | |
258 | sp->lzw_maxcode = MAXCODE(BITS_MIN); | |
259 | #else /* !LZW_COMPAT */ | |
260 | if (!sp->dec_decode) { | |
261 | TIFFError(tif->tif_name, | |
262 | "Old-style LZW codes not supported"); | |
263 | sp->dec_decode = LZWDecode; | |
264 | } | |
265 | return (0); | |
266 | #endif/* !LZW_COMPAT */ | |
267 | } else { | |
268 | sp->lzw_maxcode = MAXCODE(BITS_MIN)-1; | |
269 | sp->dec_decode = LZWDecode; | |
270 | } | |
271 | sp->lzw_nbits = BITS_MIN; | |
272 | sp->lzw_nextbits = 0; | |
273 | sp->lzw_nextdata = 0; | |
274 | ||
275 | sp->dec_restart = 0; | |
276 | sp->dec_nbitsmask = MAXCODE(BITS_MIN); | |
277 | #ifdef LZW_CHECKEOS | |
278 | sp->dec_bitsleft = tif->tif_rawcc << 3; | |
279 | #endif | |
280 | sp->dec_free_entp = sp->dec_codetab + CODE_FIRST; | |
281 | /* | |
282 | * Zero entries that are not yet filled in. We do | |
283 | * this to guard against bogus input data that causes | |
284 | * us to index into undefined entries. If you can | |
285 | * come up with a way to safely bounds-check input codes | |
286 | * while decoding then you can remove this operation. | |
287 | */ | |
288 | _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t)); | |
289 | sp->dec_oldcodep = &sp->dec_codetab[-1]; | |
290 | sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1]; | |
291 | return (1); | |
292 | } | |
293 | ||
294 | /* | |
295 | * Decode a "hunk of data". | |
296 | */ | |
297 | #define GetNextCode(sp, bp, code) { \ | |
298 | nextdata = (nextdata<<8) | *(bp)++; \ | |
299 | nextbits += 8; \ | |
300 | if (nextbits < nbits) { \ | |
301 | nextdata = (nextdata<<8) | *(bp)++; \ | |
302 | nextbits += 8; \ | |
303 | } \ | |
304 | code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \ | |
305 | nextbits -= nbits; \ | |
306 | } | |
307 | ||
308 | static void | |
309 | codeLoop(TIFF* tif) | |
310 | { | |
311 | TIFFError(tif->tif_name, | |
312 | "LZWDecode: Bogus encoding, loop in the code table; scanline %d", | |
313 | tif->tif_row); | |
314 | } | |
315 | ||
316 | static int | |
317 | LZWDecode(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s) | |
318 | { | |
319 | LZWDecodeState *sp = DecoderState(tif); | |
320 | char *op = (char*) op0; | |
321 | long occ = (long) occ0; | |
322 | char *tp; | |
323 | u_char *bp; | |
324 | hcode_t code; | |
325 | int len; | |
326 | long nbits, nextbits, nextdata, nbitsmask; | |
327 | code_t *codep, *free_entp, *maxcodep, *oldcodep; | |
328 | ||
329 | (void) s; | |
330 | assert(sp != NULL); | |
331 | /* | |
332 | * Restart interrupted output operation. | |
333 | */ | |
334 | if (sp->dec_restart) { | |
335 | long residue; | |
336 | ||
337 | codep = sp->dec_codep; | |
338 | residue = codep->length - sp->dec_restart; | |
339 | if (residue > occ) { | |
340 | /* | |
341 | * Residue from previous decode is sufficient | |
342 | * to satisfy decode request. Skip to the | |
343 | * start of the decoded string, place decoded | |
344 | * values in the output buffer, and return. | |
345 | */ | |
346 | sp->dec_restart += occ; | |
347 | do { | |
348 | codep = codep->next; | |
349 | } while (--residue > occ && codep); | |
350 | if (codep) { | |
351 | tp = op + occ; | |
352 | do { | |
353 | *--tp = codep->value; | |
354 | codep = codep->next; | |
355 | } while (--occ && codep); | |
356 | } | |
357 | return (1); | |
358 | } | |
359 | /* | |
360 | * Residue satisfies only part of the decode request. | |
361 | */ | |
362 | op += residue, occ -= residue; | |
363 | tp = op; | |
364 | do { | |
365 | int t; | |
366 | --tp; | |
367 | t = codep->value; | |
368 | codep = codep->next; | |
369 | *tp = t; | |
370 | } while (--residue && codep); | |
371 | sp->dec_restart = 0; | |
372 | } | |
373 | ||
374 | bp = (u_char *)tif->tif_rawcp; | |
375 | nbits = sp->lzw_nbits; | |
376 | nextdata = sp->lzw_nextdata; | |
377 | nextbits = sp->lzw_nextbits; | |
378 | nbitsmask = sp->dec_nbitsmask; | |
379 | oldcodep = sp->dec_oldcodep; | |
380 | free_entp = sp->dec_free_entp; | |
381 | maxcodep = sp->dec_maxcodep; | |
382 | ||
383 | while (occ > 0) { | |
384 | NextCode(tif, sp, bp, code, GetNextCode); | |
385 | if (code == CODE_EOI) | |
386 | break; | |
387 | if (code == CODE_CLEAR) { | |
388 | free_entp = sp->dec_codetab + CODE_FIRST; | |
389 | nbits = BITS_MIN; | |
390 | nbitsmask = MAXCODE(BITS_MIN); | |
391 | maxcodep = sp->dec_codetab + nbitsmask-1; | |
392 | NextCode(tif, sp, bp, code, GetNextCode); | |
393 | if (code == CODE_EOI) | |
394 | break; | |
395 | *op++ = code, occ--; | |
396 | oldcodep = sp->dec_codetab + code; | |
397 | continue; | |
398 | } | |
399 | codep = sp->dec_codetab + code; | |
400 | ||
401 | /* | |
402 | * Add the new entry to the code table. | |
403 | */ | |
404 | assert(&sp->dec_codetab[0] <= free_entp && free_entp < &sp->dec_codetab[CSIZE]); | |
405 | free_entp->next = oldcodep; | |
406 | free_entp->firstchar = free_entp->next->firstchar; | |
407 | free_entp->length = free_entp->next->length+1; | |
408 | free_entp->value = (codep < free_entp) ? | |
409 | codep->firstchar : free_entp->firstchar; | |
410 | if (++free_entp > maxcodep) { | |
411 | if (++nbits > BITS_MAX) /* should not happen */ | |
412 | nbits = BITS_MAX; | |
413 | nbitsmask = MAXCODE(nbits); | |
414 | maxcodep = sp->dec_codetab + nbitsmask-1; | |
415 | } | |
416 | oldcodep = codep; | |
417 | if (code >= 256) { | |
418 | /* | |
419 | * Code maps to a string, copy string | |
420 | * value to output (written in reverse). | |
421 | */ | |
422 | if (codep->length > occ) { | |
423 | /* | |
424 | * String is too long for decode buffer, | |
425 | * locate portion that will fit, copy to | |
426 | * the decode buffer, and setup restart | |
427 | * logic for the next decoding call. | |
428 | */ | |
429 | sp->dec_codep = codep; | |
430 | do { | |
431 | codep = codep->next; | |
432 | } while (codep && codep->length > occ); | |
433 | if (codep) { | |
434 | sp->dec_restart = occ; | |
435 | tp = op + occ; | |
436 | do { | |
437 | *--tp = codep->value; | |
438 | codep = codep->next; | |
439 | } while (--occ && codep); | |
440 | if (codep) | |
441 | codeLoop(tif); | |
442 | } | |
443 | break; | |
444 | } | |
445 | len = codep->length; | |
446 | tp = op + len; | |
447 | do { | |
448 | int t; | |
449 | --tp; | |
450 | t = codep->value; | |
451 | codep = codep->next; | |
452 | *tp = t; | |
453 | } while (codep && tp > op); | |
454 | if (codep) { | |
455 | codeLoop(tif); | |
456 | break; | |
457 | } | |
458 | op += len, occ -= len; | |
459 | } else | |
460 | *op++ = code, occ--; | |
461 | } | |
462 | ||
463 | tif->tif_rawcp = (tidata_t) bp; | |
464 | sp->lzw_nbits = (u_short) nbits; | |
465 | sp->lzw_nextdata = nextdata; | |
466 | sp->lzw_nextbits = nextbits; | |
467 | sp->dec_nbitsmask = nbitsmask; | |
468 | sp->dec_oldcodep = oldcodep; | |
469 | sp->dec_free_entp = free_entp; | |
470 | sp->dec_maxcodep = maxcodep; | |
471 | ||
472 | if (occ > 0) { | |
473 | TIFFError(tif->tif_name, | |
474 | "LZWDecode: Not enough data at scanline %d (short %d bytes)", | |
475 | tif->tif_row, occ); | |
476 | return (0); | |
477 | } | |
478 | return (1); | |
479 | } | |
480 | ||
481 | #ifdef LZW_COMPAT | |
482 | /* | |
483 | * Decode a "hunk of data" for old images. | |
484 | */ | |
485 | #define GetNextCodeCompat(sp, bp, code) { \ | |
486 | nextdata |= (u_long) *(bp)++ << nextbits; \ | |
487 | nextbits += 8; \ | |
488 | if (nextbits < nbits) { \ | |
489 | nextdata |= (u_long) *(bp)++ << nextbits; \ | |
490 | nextbits += 8; \ | |
491 | } \ | |
492 | code = (hcode_t)(nextdata & nbitsmask); \ | |
493 | nextdata >>= nbits; \ | |
494 | nextbits -= nbits; \ | |
495 | } | |
496 | ||
615a9936 | 497 | static int LINKAGEMODE |
b47c832e RR |
498 | LZWDecodeCompat(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s) |
499 | { | |
500 | LZWDecodeState *sp = DecoderState(tif); | |
501 | char *op = (char*) op0; | |
502 | long occ = (long) occ0; | |
503 | char *tp; | |
504 | u_char *bp; | |
505 | int code, nbits; | |
506 | long nextbits, nextdata, nbitsmask; | |
507 | code_t *codep, *free_entp, *maxcodep, *oldcodep; | |
508 | ||
509 | (void) s; | |
510 | assert(sp != NULL); | |
511 | /* | |
512 | * Restart interrupted output operation. | |
513 | */ | |
514 | if (sp->dec_restart) { | |
515 | long residue; | |
516 | ||
517 | codep = sp->dec_codep; | |
518 | residue = codep->length - sp->dec_restart; | |
519 | if (residue > occ) { | |
520 | /* | |
521 | * Residue from previous decode is sufficient | |
522 | * to satisfy decode request. Skip to the | |
523 | * start of the decoded string, place decoded | |
524 | * values in the output buffer, and return. | |
525 | */ | |
526 | sp->dec_restart += occ; | |
527 | do { | |
528 | codep = codep->next; | |
529 | } while (--residue > occ); | |
530 | tp = op + occ; | |
531 | do { | |
532 | *--tp = codep->value; | |
533 | codep = codep->next; | |
534 | } while (--occ); | |
535 | return (1); | |
536 | } | |
537 | /* | |
538 | * Residue satisfies only part of the decode request. | |
539 | */ | |
540 | op += residue, occ -= residue; | |
541 | tp = op; | |
542 | do { | |
543 | *--tp = codep->value; | |
544 | codep = codep->next; | |
545 | } while (--residue); | |
546 | sp->dec_restart = 0; | |
547 | } | |
548 | ||
549 | bp = (u_char *)tif->tif_rawcp; | |
550 | nbits = sp->lzw_nbits; | |
551 | nextdata = sp->lzw_nextdata; | |
552 | nextbits = sp->lzw_nextbits; | |
553 | nbitsmask = sp->dec_nbitsmask; | |
554 | oldcodep = sp->dec_oldcodep; | |
555 | free_entp = sp->dec_free_entp; | |
556 | maxcodep = sp->dec_maxcodep; | |
557 | ||
558 | while (occ > 0) { | |
559 | NextCode(tif, sp, bp, code, GetNextCodeCompat); | |
560 | if (code == CODE_EOI) | |
561 | break; | |
562 | if (code == CODE_CLEAR) { | |
563 | free_entp = sp->dec_codetab + CODE_FIRST; | |
564 | nbits = BITS_MIN; | |
565 | nbitsmask = MAXCODE(BITS_MIN); | |
566 | maxcodep = sp->dec_codetab + nbitsmask; | |
567 | NextCode(tif, sp, bp, code, GetNextCodeCompat); | |
568 | if (code == CODE_EOI) | |
569 | break; | |
570 | *op++ = code, occ--; | |
571 | oldcodep = sp->dec_codetab + code; | |
572 | continue; | |
573 | } | |
574 | codep = sp->dec_codetab + code; | |
575 | ||
576 | /* | |
577 | * Add the new entry to the code table. | |
578 | */ | |
579 | assert(&sp->dec_codetab[0] <= free_entp && free_entp < &sp->dec_codetab[CSIZE]); | |
580 | free_entp->next = oldcodep; | |
581 | free_entp->firstchar = free_entp->next->firstchar; | |
582 | free_entp->length = free_entp->next->length+1; | |
583 | free_entp->value = (codep < free_entp) ? | |
584 | codep->firstchar : free_entp->firstchar; | |
585 | if (++free_entp > maxcodep) { | |
586 | if (++nbits > BITS_MAX) /* should not happen */ | |
587 | nbits = BITS_MAX; | |
588 | nbitsmask = MAXCODE(nbits); | |
589 | maxcodep = sp->dec_codetab + nbitsmask; | |
590 | } | |
591 | oldcodep = codep; | |
592 | if (code >= 256) { | |
593 | /* | |
594 | * Code maps to a string, copy string | |
595 | * value to output (written in reverse). | |
596 | */ | |
597 | if (codep->length > occ) { | |
598 | /* | |
599 | * String is too long for decode buffer, | |
600 | * locate portion that will fit, copy to | |
601 | * the decode buffer, and setup restart | |
602 | * logic for the next decoding call. | |
603 | */ | |
604 | sp->dec_codep = codep; | |
605 | do { | |
606 | codep = codep->next; | |
607 | } while (codep->length > occ); | |
608 | sp->dec_restart = occ; | |
609 | tp = op + occ; | |
610 | do { | |
611 | *--tp = codep->value; | |
612 | codep = codep->next; | |
613 | } while (--occ); | |
614 | break; | |
615 | } | |
616 | op += codep->length, occ -= codep->length; | |
617 | tp = op; | |
618 | do { | |
619 | *--tp = codep->value; | |
620 | } while( (codep = codep->next) != NULL); | |
621 | } else | |
622 | *op++ = code, occ--; | |
623 | } | |
624 | ||
625 | tif->tif_rawcp = (tidata_t) bp; | |
626 | sp->lzw_nbits = nbits; | |
627 | sp->lzw_nextdata = nextdata; | |
628 | sp->lzw_nextbits = nextbits; | |
629 | sp->dec_nbitsmask = nbitsmask; | |
630 | sp->dec_oldcodep = oldcodep; | |
631 | sp->dec_free_entp = free_entp; | |
632 | sp->dec_maxcodep = maxcodep; | |
633 | ||
634 | if (occ > 0) { | |
635 | TIFFError(tif->tif_name, | |
636 | "LZWDecodeCompat: Not enough data at scanline %d (short %d bytes)", | |
637 | tif->tif_row, occ); | |
638 | return (0); | |
639 | } | |
640 | return (1); | |
641 | } | |
642 | #endif /* LZW_COMPAT */ | |
643 | ||
644 | /* | |
645 | * LZW Encoding. | |
646 | */ | |
647 | ||
648 | static int | |
649 | LZWSetupEncode(TIFF* tif) | |
650 | { | |
651 | LZWEncodeState* sp = EncoderState(tif); | |
652 | static const char module[] = "LZWSetupEncode"; | |
653 | ||
654 | assert(sp != NULL); | |
655 | sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t)); | |
656 | if (sp->enc_hashtab == NULL) { | |
657 | TIFFError(module, "No space for LZW hash table"); | |
658 | return (0); | |
659 | } | |
660 | return (1); | |
661 | } | |
662 | ||
663 | /* | |
664 | * Reset encoding state at the start of a strip. | |
665 | */ | |
666 | static int | |
667 | LZWPreEncode(TIFF* tif, tsample_t s) | |
668 | { | |
669 | LZWEncodeState *sp = EncoderState(tif); | |
670 | ||
671 | (void) s; | |
672 | assert(sp != NULL); | |
673 | sp->lzw_nbits = BITS_MIN; | |
674 | sp->lzw_maxcode = MAXCODE(BITS_MIN); | |
675 | sp->lzw_free_ent = CODE_FIRST; | |
676 | sp->lzw_nextbits = 0; | |
677 | sp->lzw_nextdata = 0; | |
678 | sp->enc_checkpoint = CHECK_GAP; | |
679 | sp->enc_ratio = 0; | |
680 | sp->enc_incount = 0; | |
681 | sp->enc_outcount = 0; | |
682 | /* | |
683 | * The 4 here insures there is space for 2 max-sized | |
684 | * codes in LZWEncode and LZWPostDecode. | |
685 | */ | |
686 | sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4; | |
687 | cl_hash(sp); /* clear hash table */ | |
688 | sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */ | |
689 | return (1); | |
690 | } | |
691 | ||
692 | #define CALCRATIO(sp, rat) { \ | |
693 | if (incount > 0x007fffff) { /* NB: shift will overflow */\ | |
694 | rat = outcount >> 8; \ | |
695 | rat = (rat == 0 ? 0x7fffffff : incount/rat); \ | |
696 | } else \ | |
697 | rat = (incount<<8) / outcount; \ | |
698 | } | |
699 | #define PutNextCode(op, c) { \ | |
700 | nextdata = (nextdata << nbits) | c; \ | |
701 | nextbits += nbits; \ | |
702 | *op++ = (u_char)(nextdata >> (nextbits-8)); \ | |
703 | nextbits -= 8; \ | |
704 | if (nextbits >= 8) { \ | |
705 | *op++ = (u_char)(nextdata >> (nextbits-8)); \ | |
706 | nextbits -= 8; \ | |
707 | } \ | |
708 | outcount += nbits; \ | |
709 | } | |
710 | ||
711 | /* | |
712 | * Encode a chunk of pixels. | |
713 | * | |
615a9936 | 714 | * Uses an open addressing double hashing (no chaining) on the |
b47c832e RR |
715 | * prefix code/next character combination. We do a variant of |
716 | * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's | |
717 | * relatively-prime secondary probe. Here, the modular division | |
615a9936 | 718 | * first probe is gives way to a faster exclusive-or manipulation. |
b47c832e RR |
719 | * Also do block compression with an adaptive reset, whereby the |
720 | * code table is cleared when the compression ratio decreases, | |
721 | * but after the table fills. The variable-length output codes | |
722 | * are re-sized at this point, and a CODE_CLEAR is generated | |
615a9936 | 723 | * for the decoder. |
b47c832e | 724 | */ |
615a9936 | 725 | static int LINKAGEMODE |
b47c832e RR |
726 | LZWEncode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s) |
727 | { | |
728 | register LZWEncodeState *sp = EncoderState(tif); | |
729 | register long fcode; | |
730 | register hash_t *hp; | |
731 | register int h, c; | |
732 | hcode_t ent; | |
733 | long disp; | |
734 | long incount, outcount, checkpoint; | |
735 | long nextdata, nextbits; | |
736 | int free_ent, maxcode, nbits; | |
737 | tidata_t op, limit; | |
738 | ||
739 | (void) s; | |
740 | if (sp == NULL) | |
741 | return (0); | |
742 | /* | |
743 | * Load local state. | |
744 | */ | |
745 | incount = sp->enc_incount; | |
746 | outcount = sp->enc_outcount; | |
747 | checkpoint = sp->enc_checkpoint; | |
748 | nextdata = sp->lzw_nextdata; | |
749 | nextbits = sp->lzw_nextbits; | |
750 | free_ent = sp->lzw_free_ent; | |
751 | maxcode = sp->lzw_maxcode; | |
752 | nbits = sp->lzw_nbits; | |
753 | op = tif->tif_rawcp; | |
754 | limit = sp->enc_rawlimit; | |
755 | ent = sp->enc_oldcode; | |
756 | ||
757 | if (ent == (hcode_t) -1 && cc > 0) { | |
758 | /* | |
759 | * NB: This is safe because it can only happen | |
760 | * at the start of a strip where we know there | |
761 | * is space in the data buffer. | |
762 | */ | |
763 | PutNextCode(op, CODE_CLEAR); | |
764 | ent = *bp++; cc--; incount++; | |
765 | } | |
766 | while (cc > 0) { | |
767 | c = *bp++; cc--; incount++; | |
768 | fcode = ((long)c << BITS_MAX) + ent; | |
769 | h = (c << HSHIFT) ^ ent; /* xor hashing */ | |
770 | #ifdef _WINDOWS | |
771 | /* | |
772 | * Check hash index for an overflow. | |
773 | */ | |
774 | if (h >= HSIZE) | |
775 | h -= HSIZE; | |
776 | #endif | |
777 | hp = &sp->enc_hashtab[h]; | |
778 | if (hp->hash == fcode) { | |
779 | ent = hp->code; | |
780 | continue; | |
781 | } | |
782 | if (hp->hash >= 0) { | |
783 | /* | |
784 | * Primary hash failed, check secondary hash. | |
785 | */ | |
786 | disp = HSIZE - h; | |
787 | if (h == 0) | |
788 | disp = 1; | |
789 | do { | |
790 | /* | |
791 | * Avoid pointer arithmetic 'cuz of | |
792 | * wraparound problems with segments. | |
793 | */ | |
794 | if ((h -= disp) < 0) | |
795 | h += HSIZE; | |
796 | hp = &sp->enc_hashtab[h]; | |
797 | if (hp->hash == fcode) { | |
798 | ent = hp->code; | |
799 | goto hit; | |
800 | } | |
801 | } while (hp->hash >= 0); | |
802 | } | |
803 | /* | |
804 | * New entry, emit code and add to table. | |
805 | */ | |
806 | /* | |
807 | * Verify there is space in the buffer for the code | |
808 | * and any potential Clear code that might be emitted | |
809 | * below. The value of limit is setup so that there | |
810 | * are at least 4 bytes free--room for 2 codes. | |
811 | */ | |
812 | if (op > limit) { | |
813 | tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata); | |
814 | TIFFFlushData1(tif); | |
815 | op = tif->tif_rawdata; | |
816 | } | |
817 | PutNextCode(op, ent); | |
818 | ent = c; | |
819 | hp->code = free_ent++; | |
820 | hp->hash = fcode; | |
821 | if (free_ent == CODE_MAX-1) { | |
822 | /* table is full, emit clear code and reset */ | |
823 | cl_hash(sp); | |
824 | sp->enc_ratio = 0; | |
825 | incount = 0; | |
826 | outcount = 0; | |
827 | free_ent = CODE_FIRST; | |
828 | PutNextCode(op, CODE_CLEAR); | |
829 | nbits = BITS_MIN; | |
830 | maxcode = MAXCODE(BITS_MIN); | |
831 | } else { | |
832 | /* | |
833 | * If the next entry is going to be too big for | |
834 | * the code size, then increase it, if possible. | |
835 | */ | |
836 | if (free_ent > maxcode) { | |
837 | nbits++; | |
838 | assert(nbits <= BITS_MAX); | |
839 | maxcode = (int) MAXCODE(nbits); | |
840 | } else if (incount >= checkpoint) { | |
841 | long rat; | |
842 | /* | |
843 | * Check compression ratio and, if things seem | |
844 | * to be slipping, clear the hash table and | |
845 | * reset state. The compression ratio is a | |
846 | * 24+8-bit fractional number. | |
847 | */ | |
848 | checkpoint = incount+CHECK_GAP; | |
849 | CALCRATIO(sp, rat); | |
850 | if (rat <= sp->enc_ratio) { | |
851 | cl_hash(sp); | |
852 | sp->enc_ratio = 0; | |
853 | incount = 0; | |
854 | outcount = 0; | |
855 | free_ent = CODE_FIRST; | |
856 | PutNextCode(op, CODE_CLEAR); | |
857 | nbits = BITS_MIN; | |
858 | maxcode = MAXCODE(BITS_MIN); | |
859 | } else | |
860 | sp->enc_ratio = rat; | |
861 | } | |
862 | } | |
863 | hit: | |
864 | ; | |
865 | } | |
866 | ||
867 | /* | |
868 | * Restore global state. | |
869 | */ | |
870 | sp->enc_incount = incount; | |
871 | sp->enc_outcount = outcount; | |
872 | sp->enc_checkpoint = checkpoint; | |
873 | sp->enc_oldcode = ent; | |
874 | sp->lzw_nextdata = nextdata; | |
875 | sp->lzw_nextbits = nextbits; | |
876 | sp->lzw_free_ent = free_ent; | |
877 | sp->lzw_maxcode = maxcode; | |
878 | sp->lzw_nbits = nbits; | |
879 | tif->tif_rawcp = op; | |
880 | return (1); | |
881 | } | |
882 | ||
883 | /* | |
884 | * Finish off an encoded strip by flushing the last | |
885 | * string and tacking on an End Of Information code. | |
886 | */ | |
887 | static int | |
888 | LZWPostEncode(TIFF* tif) | |
889 | { | |
890 | register LZWEncodeState *sp = EncoderState(tif); | |
891 | tidata_t op = tif->tif_rawcp; | |
892 | long nextbits = sp->lzw_nextbits; | |
893 | long nextdata = sp->lzw_nextdata; | |
894 | long outcount = sp->enc_outcount; | |
895 | int nbits = sp->lzw_nbits; | |
896 | ||
897 | if (op > sp->enc_rawlimit) { | |
898 | tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata); | |
899 | TIFFFlushData1(tif); | |
900 | op = tif->tif_rawdata; | |
901 | } | |
902 | if (sp->enc_oldcode != (hcode_t) -1) { | |
903 | PutNextCode(op, sp->enc_oldcode); | |
904 | sp->enc_oldcode = (hcode_t) -1; | |
905 | } | |
906 | PutNextCode(op, CODE_EOI); | |
615a9936 | 907 | if (nextbits > 0) |
b47c832e RR |
908 | *op++ = (u_char)(nextdata << (8-nextbits)); |
909 | tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata); | |
910 | return (1); | |
911 | } | |
912 | ||
913 | /* | |
914 | * Reset encoding hash table. | |
915 | */ | |
916 | static void | |
917 | cl_hash(LZWEncodeState* sp) | |
918 | { | |
919 | register hash_t *hp = &sp->enc_hashtab[HSIZE-1]; | |
920 | register long i = HSIZE-8; | |
921 | ||
922 | do { | |
923 | i -= 8; | |
924 | hp[-7].hash = -1; | |
925 | hp[-6].hash = -1; | |
926 | hp[-5].hash = -1; | |
927 | hp[-4].hash = -1; | |
928 | hp[-3].hash = -1; | |
929 | hp[-2].hash = -1; | |
930 | hp[-1].hash = -1; | |
931 | hp[ 0].hash = -1; | |
932 | hp -= 8; | |
933 | } while (i >= 0); | |
934 | for (i += 8; i > 0; i--, hp--) | |
935 | hp->hash = -1; | |
936 | } | |
937 | ||
938 | static void | |
939 | LZWCleanup(TIFF* tif) | |
940 | { | |
941 | if (tif->tif_data) { | |
942 | if (tif->tif_mode == O_RDONLY) { | |
943 | if (DecoderState(tif)->dec_codetab) | |
944 | _TIFFfree(DecoderState(tif)->dec_codetab); | |
945 | } else { | |
946 | if (EncoderState(tif)->enc_hashtab) | |
947 | _TIFFfree(EncoderState(tif)->enc_hashtab); | |
948 | } | |
949 | _TIFFfree(tif->tif_data); | |
950 | tif->tif_data = NULL; | |
951 | } | |
952 | } | |
953 | ||
954 | int | |
955 | TIFFInitLZW(TIFF* tif, int scheme) | |
956 | { | |
957 | assert(scheme == COMPRESSION_LZW); | |
958 | /* | |
959 | * Allocate state block so tag methods have storage to record values. | |
960 | */ | |
961 | if (tif->tif_mode == O_RDONLY) { | |
962 | tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LZWDecodeState)); | |
963 | if (tif->tif_data == NULL) | |
964 | goto bad; | |
965 | DecoderState(tif)->dec_codetab = NULL; | |
966 | DecoderState(tif)->dec_decode = NULL; | |
967 | } else { | |
968 | tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LZWEncodeState)); | |
969 | if (tif->tif_data == NULL) | |
970 | goto bad; | |
971 | EncoderState(tif)->enc_hashtab = NULL; | |
972 | } | |
973 | /* | |
974 | * Install codec methods. | |
975 | */ | |
976 | tif->tif_setupdecode = LZWSetupDecode; | |
977 | tif->tif_predecode = LZWPreDecode; | |
978 | tif->tif_decoderow = LZWDecode; | |
979 | tif->tif_decodestrip = LZWDecode; | |
980 | tif->tif_decodetile = LZWDecode; | |
981 | tif->tif_setupencode = LZWSetupEncode; | |
982 | tif->tif_preencode = LZWPreEncode; | |
983 | tif->tif_postencode = LZWPostEncode; | |
984 | tif->tif_encoderow = LZWEncode; | |
985 | tif->tif_encodestrip = LZWEncode; | |
986 | tif->tif_encodetile = LZWEncode; | |
987 | tif->tif_cleanup = LZWCleanup; | |
988 | /* | |
989 | * Setup predictor setup. | |
990 | */ | |
991 | (void) TIFFPredictorInit(tif); | |
992 | return (1); | |
993 | bad: | |
994 | TIFFError("TIFFInitLZW", "No space for LZW state block"); | |
995 | return (0); | |
996 | } | |
997 | ||
998 | /* | |
999 | * Copyright (c) 1985, 1986 The Regents of the University of California. | |
1000 | * All rights reserved. | |
1001 | * | |
1002 | * This code is derived from software contributed to Berkeley by | |
1003 | * James A. Woods, derived from original work by Spencer Thomas | |
1004 | * and Joseph Orost. | |
1005 | * | |
1006 | * Redistribution and use in source and binary forms are permitted | |
1007 | * provided that the above copyright notice and this paragraph are | |
1008 | * duplicated in all such forms and that any documentation, | |
1009 | * advertising materials, and other materials related to such | |
1010 | * distribution and use acknowledge that the software was developed | |
1011 | * by the University of California, Berkeley. The name of the | |
1012 | * University may not be used to endorse or promote products derived | |
1013 | * from this software without specific prior written permission. | |
1014 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR | |
1015 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED | |
1016 | * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. | |
1017 | */ | |
1018 | #endif /* LZW_SUPPORT */ |