src/tiff/tif_lzw.c

   1 /* $Header$ */
   2
   3 /*
   4  * Copyright (c) 1988-1997 Sam Leffler
   5  * Copyright (c) 1991-1997 Silicon Graphics, Inc.
   6  *
   7  * Permission to use, copy, modify, distribute, and sell this software and
   8  * its documentation for any purpose is hereby granted without fee, provided
   9  * that (i) the above copyright notices and this permission notice appear in
  10  * all copies of the software and related documentation, and (ii) the names of
  11  * Sam Leffler and Silicon Graphics may not be used in any advertising or
  12  * publicity relating to the software without the specific, prior written
  13  * permission of Sam Leffler and Silicon Graphics.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
  17  * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
  18  *
  19  * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
  20  * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
  21  * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  22  * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
  23  * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  24  * OF THIS SOFTWARE.
  25  */
  26
  27 #include "tiffiop.h"
  28 #ifdef LZW_SUPPORT
  29 /*
  30  * TIFF Library.
  31  * Rev 5.0 Lempel-Ziv & Welch Compression Support
  32  *
  33  * This code is derived from the compress program whose code is
  34  * derived from software contributed to Berkeley by James A. Woods,
  35  * derived from original work by Spencer Thomas and Joseph Orost.
  36  *
  37  * The original Berkeley copyright notice appears below in its entirety.
  38  */
  39 #include "tif_predict.h"
  40
  41 #include <assert.h>
  42 #include <stdio.h>
  43
  44 /*
  45  * NB: The 5.0 spec describes a different algorithm than Aldus
  46  *     implements.  Specifically, Aldus does code length transitions
  47  *     one code earlier than should be done (for real LZW).
  48  *     Earlier versions of this library implemented the correct
  49  *     LZW algorithm, but emitted codes in a bit order opposite
  50  *     to the TIFF spec.  Thus, to maintain compatibility w/ Aldus
  51  *     we interpret MSB-LSB ordered codes to be images written w/
  52  *     old versions of this library, but otherwise adhere to the
  53  *     Aldus "off by one" algorithm.
  54  *
  55  * Future revisions to the TIFF spec are expected to "clarify this issue".
  56  */
  57 #define LZW_COMPAT              /* include backwards compatibility code */
  58 /*
  59  * Each strip of data is supposed to be terminated by a CODE_EOI.
  60  * If the following #define is included, the decoder will also
  61  * check for end-of-strip w/o seeing this code.  This makes the
  62  * library more robust, but also slower.
  63  */
  64 #define LZW_CHECKEOS            /* include checks for strips w/o EOI code */
  65
  66 #define MAXCODE(n)      ((1L<<(n))-1)
  67 /*
  68  * The TIFF spec specifies that encoded bit
  69  * strings range from 9 to 12 bits.
  70  */
  71 #define BITS_MIN        9               /* start with 9 bits */
  72 #define BITS_MAX        12              /* max of 12 bit strings */
  73 /* predefined codes */
  74 #define CODE_CLEAR      256             /* code to clear string table */
  75 #define CODE_EOI        257             /* end-of-information code */
  76 #define CODE_FIRST      258             /* first free code entry */
  77 #define CODE_MAX        MAXCODE(BITS_MAX)
  78 #define HSIZE           9001L           /* 91% occupancy */
  79 #define HSHIFT          (13-8)
  80 #ifdef LZW_COMPAT
  81 /* NB: +1024 is for compatibility with old files */
  82 #define CSIZE           (MAXCODE(BITS_MAX)+1024L)
  83 #else
  84 #define CSIZE           (MAXCODE(BITS_MAX)+1L)
  85 #endif
  86
  87 /*
  88  * State block for each open TIFF file using LZW
  89  * compression/decompression.  Note that the predictor
  90  * state block must be first in this data structure.
  91  */
  92 typedef struct {
  93         TIFFPredictorState predict;     /* predictor super class */
  94
  95         u_short         nbits;          /* # of bits/code */
  96         u_short         maxcode;        /* maximum code for lzw_nbits */
  97         u_short         free_ent;       /* next free entry in hash table */
  98         long            nextdata;       /* next bits of i/o */
  99         long            nextbits;       /* # of valid bits in lzw_nextdata */
 100 } LZWBaseState;
 101
 102 #define lzw_nbits       base.nbits
 103 #define lzw_maxcode     base.maxcode
 104 #define lzw_free_ent    base.free_ent
 105 #define lzw_nextdata    base.nextdata
 106 #define lzw_nextbits    base.nextbits
 107
 108 /*
 109  * Decoding-specific state.
 110  */
 111 typedef struct code_ent {
 112         struct code_ent *next;
 113         u_short length;                 /* string len, including this token */
 114         u_char  value;                  /* data value */
 115         u_char  firstchar;              /* first token of string */
 116 } code_t;
 117
 118 typedef int (*decodeFunc)(TIFF*, tidata_t, tsize_t, tsample_t);
 119
 120 typedef struct {
 121         LZWBaseState base;
 122         long    dec_nbitsmask;          /* lzw_nbits 1 bits, right adjusted */
 123         long    dec_restart;            /* restart count */
 124 #ifdef LZW_CHECKEOS
 125         long    dec_bitsleft;           /* available bits in raw data */
 126 #endif
 127         decodeFunc dec_decode;          /* regular or backwards compatible */
 128         code_t* dec_codep;              /* current recognized code */
 129         code_t* dec_oldcodep;           /* previously recognized code */
 130         code_t* dec_free_entp;          /* next free entry */
 131         code_t* dec_maxcodep;           /* max available entry */
 132         code_t* dec_codetab;            /* kept separate for small machines */
 133 } LZWDecodeState;
 134
 135 /*
 136  * Encoding-specific state.
 137  */
 138 typedef uint16 hcode_t;                 /* codes fit in 16 bits */
 139 typedef struct {
 140         long    hash;
 141         hcode_t code;
 142 } hash_t;
 143
 144 typedef struct {
 145         LZWBaseState base;
 146         int     enc_oldcode;            /* last code encountered */
 147         long    enc_checkpoint;         /* point at which to clear table */
 148 #define CHECK_GAP       10000           /* enc_ratio check interval */
 149         long    enc_ratio;              /* current compression ratio */
 150         long    enc_incount;            /* (input) data bytes encoded */
 151         long    enc_outcount;           /* encoded (output) bytes */
 152         tidata_t enc_rawlimit;          /* bound on tif_rawdata buffer */
 153         hash_t* enc_hashtab;            /* kept separate for small machines */
 154 } LZWEncodeState;
 155
 156 #define LZWState(tif)           ((LZWBaseState*) (tif)->tif_data)
 157 #define DecoderState(tif)       ((LZWDecodeState*) LZWState(tif))
 158 #define EncoderState(tif)       ((LZWEncodeState*) LZWState(tif))
 159
 160 static  int LZWDecode(TIFF*, tidata_t, tsize_t, tsample_t);
 161 #ifdef LZW_COMPAT
 162 static  int LZWDecodeCompat(TIFF*, tidata_t, tsize_t, tsample_t);
 163 #endif
 164 static  void cl_hash(LZWEncodeState*);
 165
 166 /*
 167  * LZW Decoder.
 168  */
 169
 170 #ifdef LZW_CHECKEOS
 171 /*
 172  * This check shouldn't be necessary because each
 173  * strip is suppose to be terminated with CODE_EOI.
 174  */
 175 #define NextCode(_tif, _sp, _bp, _code, _get) {                         \
 176         if ((_sp)->dec_bitsleft < nbits) {                              \
 177                 TIFFWarning(_tif->tif_name,                             \
 178                     "LZWDecode: Strip %d not terminated with EOI code", \
 179                     _tif->tif_curstrip);                                \
 180                 _code = CODE_EOI;                                       \
 181         } else {                                                        \
 182                 _get(_sp,_bp,_code);                                    \
 183                 (_sp)->dec_bitsleft -= nbits;                           \
 184         }                                                               \
 185 }
 186 #else
 187 #define NextCode(tif, sp, bp, code, get) get(sp, bp, code)
 188 #endif
 189
 190 static int
 191 LZWSetupDecode(TIFF* tif)
 192 {
 193         LZWDecodeState* sp = DecoderState(tif);
 194         static const char module[] = " LZWSetupDecode";
 195         int code;
 196
 197         assert(sp != NULL);
 198         if (sp->dec_codetab == NULL) {
 199                 sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
 200                 if (sp->dec_codetab == NULL) {
 201                         TIFFError(module, "No space for LZW code table");
 202                         return (0);
 203                 }
 204                 /*
 205                  * Pre-load the table.
 206                  */
 207                 for (code = 255; code >= 0; code--) {
 208                         sp->dec_codetab[code].value = code;
 209                         sp->dec_codetab[code].firstchar = code;
 210                         sp->dec_codetab[code].length = 1;
 211                         sp->dec_codetab[code].next = NULL;
 212                 }
 213         }
 214         return (1);
 215 }
 216
 217 /*
 218  * Setup state for decoding a strip.
 219  */
 220 static int
 221 LZWPreDecode(TIFF* tif, tsample_t s)
 222 {
 223         LZWDecodeState *sp = DecoderState(tif);
 224
 225         (void) s;
 226         assert(sp != NULL);
 227         /*
 228          * Check for old bit-reversed codes.
 229          */
 230         if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
 231 #ifdef LZW_COMPAT
 232                 if (!sp->dec_decode) {
 233                         TIFFWarning(tif->tif_name,
 234                             "Old-style LZW codes, convert file");
 235                         /*
 236                          * Override default decoding methods with
 237                          * ones that deal with the old coding.
 238                          * Otherwise the predictor versions set
 239                          * above will call the compatibility routines
 240                          * through the dec_decode method.
 241                          */
 242                         tif->tif_decoderow = LZWDecodeCompat;
 243                         tif->tif_decodestrip = LZWDecodeCompat;
 244                         tif->tif_decodetile = LZWDecodeCompat;
 245                         /*
 246                          * If doing horizontal differencing, must
 247                          * re-setup the predictor logic since we
 248                          * switched the basic decoder methods...
 249                          */
 250                         (*tif->tif_setupdecode)(tif);
 251                         sp->dec_decode = LZWDecodeCompat;
 252                 }
 253                 sp->lzw_maxcode = MAXCODE(BITS_MIN);
 254 #else /* !LZW_COMPAT */
 255                 if (!sp->dec_decode) {
 256                         TIFFError(tif->tif_name,
 257                             "Old-style LZW codes not supported");
 258                         sp->dec_decode = LZWDecode;
 259                 }
 260                 return (0);
 261 #endif/* !LZW_COMPAT */
 262         } else {
 263                 sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
 264                 sp->dec_decode = LZWDecode;
 265         }
 266         sp->lzw_nbits = BITS_MIN;
 267         sp->lzw_nextbits = 0;
 268         sp->lzw_nextdata = 0;
 269
 270         sp->dec_restart = 0;
 271         sp->dec_nbitsmask = MAXCODE(BITS_MIN);
 272 #ifdef LZW_CHECKEOS
 273         sp->dec_bitsleft = tif->tif_rawcc << 3;
 274 #endif
 275         sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
 276         /*
 277          * Zero entries that are not yet filled in.  We do
 278          * this to guard against bogus input data that causes
 279          * us to index into undefined entries.  If you can
 280          * come up with a way to safely bounds-check input codes
 281          * while decoding then you can remove this operation.
 282          */
 283         _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
 284         sp->dec_oldcodep = &sp->dec_codetab[-1];
 285         sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
 286         return (1);
 287 }
 288
 289 /*
 290  * Decode a "hunk of data".
 291  */
 292 #define GetNextCode(sp, bp, code) {                             \
 293         nextdata = (nextdata<<8) | *(bp)++;                     \
 294         nextbits += 8;                                          \
 295         if (nextbits < nbits) {                                 \
 296                 nextdata = (nextdata<<8) | *(bp)++;             \
 297                 nextbits += 8;                                  \
 298         }                                                       \
 299         code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);   \
 300         nextbits -= nbits;                                      \
 301 }
 302
 303 static void
 304 codeLoop(TIFF* tif)
 305 {
 306         TIFFError(tif->tif_name,
 307             "LZWDecode: Bogus encoding, loop in the code table; scanline %d",
 308             tif->tif_row);
 309 }
 310
 311 static int
 312 LZWDecode(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
 313 {
 314         LZWDecodeState *sp = DecoderState(tif);
 315         char *op = (char*) op0;
 316         long occ = (long) occ0;
 317         char *tp;
 318         u_char *bp;
 319         hcode_t code;
 320         int len;
 321         long nbits, nextbits, nextdata, nbitsmask;
 322         code_t *codep, *free_entp, *maxcodep, *oldcodep;
 323
 324         (void) s;
 325         assert(sp != NULL);
 326         /*
 327          * Restart interrupted output operation.
 328          */
 329         if (sp->dec_restart) {
 330                 long residue;
 331
 332                 codep = sp->dec_codep;
 333                 residue = codep->length - sp->dec_restart;
 334                 if (residue > occ) {
 335                         /*
 336                          * Residue from previous decode is sufficient
 337                          * to satisfy decode request.  Skip to the
 338                          * start of the decoded string, place decoded
 339                          * values in the output buffer, and return.
 340                          */
 341                         sp->dec_restart += occ;
 342                         do {
 343                                 codep = codep->next;
 344                         } while (--residue > occ && codep);
 345                         if (codep) {
 346                                 tp = op + occ;
 347                                 do {
 348                                         *--tp = codep->value;
 349                                         codep = codep->next;
 350                                 } while (--occ && codep);
 351                         }
 352                         return (1);
 353                 }
 354                 /*
 355                  * Residue satisfies only part of the decode request.
 356                  */
 357                 op += residue, occ -= residue;
 358                 tp = op;
 359                 do {
 360                         int t;
 361                         --tp;
 362                         t = codep->value;
 363                         codep = codep->next;
 364                         *tp = t;
 365                 } while (--residue && codep);
 366                 sp->dec_restart = 0;
 367         }
 368
 369         bp = (u_char *)tif->tif_rawcp;
 370         nbits = sp->lzw_nbits;
 371         nextdata = sp->lzw_nextdata;
 372         nextbits = sp->lzw_nextbits;
 373         nbitsmask = sp->dec_nbitsmask;
 374         oldcodep = sp->dec_oldcodep;
 375         free_entp = sp->dec_free_entp;
 376         maxcodep = sp->dec_maxcodep;
 377
 378         while (occ > 0) {
 379                 NextCode(tif, sp, bp, code, GetNextCode);
 380                 if (code == CODE_EOI)
 381                         break;
 382                 if (code == CODE_CLEAR) {
 383                         free_entp = sp->dec_codetab + CODE_FIRST;
 384                         nbits = BITS_MIN;
 385                         nbitsmask = MAXCODE(BITS_MIN);
 386                         maxcodep = sp->dec_codetab + nbitsmask-1;
 387                         NextCode(tif, sp, bp, code, GetNextCode);
 388                         if (code == CODE_EOI)
 389                                 break;
 390                         *op++ = code, occ--;
 391                         oldcodep = sp->dec_codetab + code;
 392                         continue;
 393                 }
 394                 codep = sp->dec_codetab + code;
 395
 396                 /*
 397                  * Add the new entry to the code table.
 398                  */
 399                 assert(&sp->dec_codetab[0] <= free_entp && free_entp < &sp->dec_codetab[CSIZE]);
 400                 free_entp->next = oldcodep;
 401                 free_entp->firstchar = free_entp->next->firstchar;
 402                 free_entp->length = free_entp->next->length+1;
 403                 free_entp->value = (codep < free_entp) ?
 404                     codep->firstchar : free_entp->firstchar;
 405                 if (++free_entp > maxcodep) {
 406                         if (++nbits > BITS_MAX)         /* should not happen */
 407                                 nbits = BITS_MAX;
 408                         nbitsmask = MAXCODE(nbits);
 409                         maxcodep = sp->dec_codetab + nbitsmask-1;
 410                 }
 411                 oldcodep = codep;
 412                 if (code >= 256) {
 413                         /*
 414                          * Code maps to a string, copy string
 415                          * value to output (written in reverse).
 416                          */
 417                         if (codep->length > occ) {
 418                                 /*
 419                                  * String is too long for decode buffer,
 420                                  * locate portion that will fit, copy to
 421                                  * the decode buffer, and setup restart
 422                                  * logic for the next decoding call.
 423                                  */
 424                                 sp->dec_codep = codep;
 425                                 do {
 426                                         codep = codep->next;
 427                                 } while (codep && codep->length > occ);
 428                                 if (codep) {
 429                                         sp->dec_restart = occ;
 430                                         tp = op + occ;
 431                                         do  {
 432                                                 *--tp = codep->value;
 433                                                 codep = codep->next;
 434                                         }  while (--occ && codep);
 435                                         if (codep)
 436                                                 codeLoop(tif);
 437                                 }
 438                                 break;
 439                         }
 440                         len = codep->length;
 441                         tp = op + len;
 442                         do {
 443                                 int t;
 444                                 --tp;
 445                                 t = codep->value;
 446                                 codep = codep->next;
 447                                 *tp = t;
 448                         } while (codep && tp > op);
 449                         if (codep) {
 450                             codeLoop(tif);
 451                             break;
 452                         }
 453                         op += len, occ -= len;
 454                 } else
 455                         *op++ = code, occ--;
 456         }
 457
 458         tif->tif_rawcp = (tidata_t) bp;
 459         sp->lzw_nbits = (u_short) nbits;
 460         sp->lzw_nextdata = nextdata;
 461         sp->lzw_nextbits = nextbits;
 462         sp->dec_nbitsmask = nbitsmask;
 463         sp->dec_oldcodep = oldcodep;
 464         sp->dec_free_entp = free_entp;
 465         sp->dec_maxcodep = maxcodep;
 466
 467         if (occ > 0) {
 468                 TIFFError(tif->tif_name,
 469                 "LZWDecode: Not enough data at scanline %d (short %d bytes)",
 470                     tif->tif_row, occ);
 471                 return (0);
 472         }
 473         return (1);
 474 }
 475
 476 #ifdef LZW_COMPAT
 477 /*
 478  * Decode a "hunk of data" for old images.
 479  */
 480 #define GetNextCodeCompat(sp, bp, code) {                       \
 481         nextdata |= (u_long) *(bp)++ << nextbits;               \
 482         nextbits += 8;                                          \
 483         if (nextbits < nbits) {                                 \
 484                 nextdata |= (u_long) *(bp)++ << nextbits;       \
 485                 nextbits += 8;                                  \
 486         }                                                       \
 487         code = (hcode_t)(nextdata & nbitsmask);                 \
 488         nextdata >>= nbits;                                     \
 489         nextbits -= nbits;                                      \
 490 }
 491
 492 static int
 493 LZWDecodeCompat(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
 494 {
 495         LZWDecodeState *sp = DecoderState(tif);
 496         char *op = (char*) op0;
 497         long occ = (long) occ0;
 498         char *tp;
 499         u_char *bp;
 500         int code, nbits;
 501         long nextbits, nextdata, nbitsmask;
 502         code_t *codep, *free_entp, *maxcodep, *oldcodep;
 503
 504         (void) s;
 505         assert(sp != NULL);
 506         /*
 507          * Restart interrupted output operation.
 508          */
 509         if (sp->dec_restart) {
 510                 long residue;
 511
 512                 codep = sp->dec_codep;
 513                 residue = codep->length - sp->dec_restart;
 514                 if (residue > occ) {
 515                         /*
 516                          * Residue from previous decode is sufficient
 517                          * to satisfy decode request.  Skip to the
 518                          * start of the decoded string, place decoded
 519                          * values in the output buffer, and return.
 520                          */
 521                         sp->dec_restart += occ;
 522                         do {
 523                                 codep = codep->next;
 524                         } while (--residue > occ);
 525                         tp = op + occ;
 526                         do {
 527                                 *--tp = codep->value;
 528                                 codep = codep->next;
 529                         } while (--occ);
 530                         return (1);
 531                 }
 532                 /*
 533                  * Residue satisfies only part of the decode request.
 534                  */
 535                 op += residue, occ -= residue;
 536                 tp = op;
 537                 do {
 538                         *--tp = codep->value;
 539                         codep = codep->next;
 540                 } while (--residue);
 541                 sp->dec_restart = 0;
 542         }
 543
 544         bp = (u_char *)tif->tif_rawcp;
 545         nbits = sp->lzw_nbits;
 546         nextdata = sp->lzw_nextdata;
 547         nextbits = sp->lzw_nextbits;
 548         nbitsmask = sp->dec_nbitsmask;
 549         oldcodep = sp->dec_oldcodep;
 550         free_entp = sp->dec_free_entp;
 551         maxcodep = sp->dec_maxcodep;
 552
 553         while (occ > 0) {
 554                 NextCode(tif, sp, bp, code, GetNextCodeCompat);
 555                 if (code == CODE_EOI)
 556                         break;
 557                 if (code == CODE_CLEAR) {
 558                         free_entp = sp->dec_codetab + CODE_FIRST;
 559                         nbits = BITS_MIN;
 560                         nbitsmask = MAXCODE(BITS_MIN);
 561                         maxcodep = sp->dec_codetab + nbitsmask;
 562                         NextCode(tif, sp, bp, code, GetNextCodeCompat);
 563                         if (code == CODE_EOI)
 564                                 break;
 565                         *op++ = code, occ--;
 566                         oldcodep = sp->dec_codetab + code;
 567                         continue;
 568                 }
 569                 codep = sp->dec_codetab + code;
 570
 571                 /*
 572                  * Add the new entry to the code table.
 573                  */
 574                 assert(&sp->dec_codetab[0] <= free_entp && free_entp < &sp->dec_codetab[CSIZE]);
 575                 free_entp->next = oldcodep;
 576                 free_entp->firstchar = free_entp->next->firstchar;
 577                 free_entp->length = free_entp->next->length+1;
 578                 free_entp->value = (codep < free_entp) ?
 579                     codep->firstchar : free_entp->firstchar;
 580                 if (++free_entp > maxcodep) {
 581                         if (++nbits > BITS_MAX)         /* should not happen */
 582                                 nbits = BITS_MAX;
 583                         nbitsmask = MAXCODE(nbits);
 584                         maxcodep = sp->dec_codetab + nbitsmask;
 585                 }
 586                 oldcodep = codep;
 587                 if (code >= 256) {
 588                         /*
 589                          * Code maps to a string, copy string
 590                          * value to output (written in reverse).
 591                          */
 592                         if (codep->length > occ) {
 593                                 /*
 594                                  * String is too long for decode buffer,
 595                                  * locate portion that will fit, copy to
 596                                  * the decode buffer, and setup restart
 597                                  * logic for the next decoding call.
 598                                  */
 599                                 sp->dec_codep = codep;
 600                                 do {
 601                                         codep = codep->next;
 602                                 } while (codep->length > occ);
 603                                 sp->dec_restart = occ;
 604                                 tp = op + occ;
 605                                 do  {
 606                                         *--tp = codep->value;
 607                                         codep = codep->next;
 608                                 }  while (--occ);
 609                                 break;
 610                         }
 611                         op += codep->length, occ -= codep->length;
 612                         tp = op;
 613                         do {
 614                                 *--tp = codep->value;
 615                         } while( (codep = codep->next) != NULL);
 616                 } else
 617                         *op++ = code, occ--;
 618         }
 619
 620         tif->tif_rawcp = (tidata_t) bp;
 621         sp->lzw_nbits = nbits;
 622         sp->lzw_nextdata = nextdata;
 623         sp->lzw_nextbits = nextbits;
 624         sp->dec_nbitsmask = nbitsmask;
 625         sp->dec_oldcodep = oldcodep;
 626         sp->dec_free_entp = free_entp;
 627         sp->dec_maxcodep = maxcodep;
 628
 629         if (occ > 0) {
 630                 TIFFError(tif->tif_name,
 631             "LZWDecodeCompat: Not enough data at scanline %d (short %d bytes)",
 632                     tif->tif_row, occ);
 633                 return (0);
 634         }
 635         return (1);
 636 }
 637 #endif /* LZW_COMPAT */
 638
 639 /*
 640  * LZW Encoding.
 641  */
 642
 643 static int
 644 LZWSetupEncode(TIFF* tif)
 645 {
 646         LZWEncodeState* sp = EncoderState(tif);
 647         static const char module[] = "LZWSetupEncode";
 648
 649         assert(sp != NULL);
 650         sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
 651         if (sp->enc_hashtab == NULL) {
 652                 TIFFError(module, "No space for LZW hash table");
 653                 return (0);
 654         }
 655         return (1);
 656 }
 657
 658 /*
 659  * Reset encoding state at the start of a strip.
 660  */
 661 static int
 662 LZWPreEncode(TIFF* tif, tsample_t s)
 663 {
 664         LZWEncodeState *sp = EncoderState(tif);
 665
 666         (void) s;
 667         assert(sp != NULL);
 668         sp->lzw_nbits = BITS_MIN;
 669         sp->lzw_maxcode = MAXCODE(BITS_MIN);
 670         sp->lzw_free_ent = CODE_FIRST;
 671         sp->lzw_nextbits = 0;
 672         sp->lzw_nextdata = 0;
 673         sp->enc_checkpoint = CHECK_GAP;
 674         sp->enc_ratio = 0;
 675         sp->enc_incount = 0;
 676         sp->enc_outcount = 0;
 677         /*
 678          * The 4 here insures there is space for 2 max-sized
 679          * codes in LZWEncode and LZWPostDecode.
 680          */
 681         sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
 682         cl_hash(sp);            /* clear hash table */
 683         sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */
 684         return (1);
 685 }
 686
 687 #define CALCRATIO(sp, rat) {                                    \
 688         if (incount > 0x007fffff) { /* NB: shift will overflow */\
 689                 rat = outcount >> 8;                            \
 690                 rat = (rat == 0 ? 0x7fffffff : incount/rat);    \
 691         } else                                                  \
 692                 rat = (incount<<8) / outcount;                  \
 693 }
 694 #define PutNextCode(op, c) {                                    \
 695         nextdata = (nextdata << nbits) | c;                     \
 696         nextbits += nbits;                                      \
 697         *op++ = (u_char)(nextdata >> (nextbits-8));             \
 698         nextbits -= 8;                                          \
 699         if (nextbits >= 8) {                                    \
 700                 *op++ = (u_char)(nextdata >> (nextbits-8));     \
 701                 nextbits -= 8;                                  \
 702         }                                                       \
 703         outcount += nbits;                                      \
 704 }
 705
 706 /*
 707  * Encode a chunk of pixels.
 708  *
 709  * Uses an open addressing double hashing (no chaining) on the
 710  * prefix code/next character combination.  We do a variant of
 711  * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
 712  * relatively-prime secondary probe.  Here, the modular division
 713  * first probe is gives way to a faster exclusive-or manipulation.
 714  * Also do block compression with an adaptive reset, whereby the
 715  * code table is cleared when the compression ratio decreases,
 716  * but after the table fills.  The variable-length output codes
 717  * are re-sized at this point, and a CODE_CLEAR is generated
 718  * for the decoder.
 719  */
 720 static int
 721 LZWEncode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
 722 {
 723         register LZWEncodeState *sp = EncoderState(tif);
 724         register long fcode;
 725         register hash_t *hp;
 726         register int h, c;
 727         hcode_t ent;
 728         long disp;
 729         long incount, outcount, checkpoint;
 730         long nextdata, nextbits;
 731         int free_ent, maxcode, nbits;
 732         tidata_t op, limit;
 733
 734         (void) s;
 735         if (sp == NULL)
 736                 return (0);
 737         /*
 738          * Load local state.
 739          */
 740         incount = sp->enc_incount;
 741         outcount = sp->enc_outcount;
 742         checkpoint = sp->enc_checkpoint;
 743         nextdata = sp->lzw_nextdata;
 744         nextbits = sp->lzw_nextbits;
 745         free_ent = sp->lzw_free_ent;
 746         maxcode = sp->lzw_maxcode;
 747         nbits = sp->lzw_nbits;
 748         op = tif->tif_rawcp;
 749         limit = sp->enc_rawlimit;
 750         ent = sp->enc_oldcode;
 751
 752         if (ent == (hcode_t) -1 && cc > 0) {
 753                 /*
 754                  * NB: This is safe because it can only happen
 755                  *     at the start of a strip where we know there
 756                  *     is space in the data buffer.
 757                  */
 758                 PutNextCode(op, CODE_CLEAR);
 759                 ent = *bp++; cc--; incount++;
 760         }
 761         while (cc > 0) {
 762                 c = *bp++; cc--; incount++;
 763                 fcode = ((long)c << BITS_MAX) + ent;
 764                 h = (c << HSHIFT) ^ ent;        /* xor hashing */
 765 #ifdef _WINDOWS
 766                 /*
 767                  * Check hash index for an overflow.
 768                  */
 769                 if (h >= HSIZE)
 770                         h -= HSIZE;
 771 #endif
 772                 hp = &sp->enc_hashtab[h];
 773                 if (hp->hash == fcode) {
 774                         ent = hp->code;
 775                         continue;
 776                 }
 777                 if (hp->hash >= 0) {
 778                         /*
 779                          * Primary hash failed, check secondary hash.
 780                          */
 781                         disp = HSIZE - h;
 782                         if (h == 0)
 783                                 disp = 1;
 784                         do {
 785                                 /*
 786                                  * Avoid pointer arithmetic 'cuz of
 787                                  * wraparound problems with segments.
 788                                  */
 789                                 if ((h -= disp) < 0)
 790                                         h += HSIZE;
 791                                 hp = &sp->enc_hashtab[h];
 792                                 if (hp->hash == fcode) {
 793                                         ent = hp->code;
 794                                         goto hit;
 795                                 }
 796                         } while (hp->hash >= 0);
 797                 }
 798                 /*
 799                  * New entry, emit code and add to table.
 800                  */
 801                 /*
 802                  * Verify there is space in the buffer for the code
 803                  * and any potential Clear code that might be emitted
 804                  * below.  The value of limit is setup so that there
 805                  * are at least 4 bytes free--room for 2 codes.
 806                  */
 807                 if (op > limit) {
 808                         tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
 809                         TIFFFlushData1(tif);
 810                         op = tif->tif_rawdata;
 811                 }
 812                 PutNextCode(op, ent);
 813                 ent = c;
 814                 hp->code = free_ent++;
 815                 hp->hash = fcode;
 816                 if (free_ent == CODE_MAX-1) {
 817                         /* table is full, emit clear code and reset */
 818                         cl_hash(sp);
 819                         sp->enc_ratio = 0;
 820                         incount = 0;
 821                         outcount = 0;
 822                         free_ent = CODE_FIRST;
 823                         PutNextCode(op, CODE_CLEAR);
 824                         nbits = BITS_MIN;
 825                         maxcode = MAXCODE(BITS_MIN);
 826                 } else {
 827                         /*
 828                          * If the next entry is going to be too big for
 829                          * the code size, then increase it, if possible.
 830                          */
 831                         if (free_ent > maxcode) {
 832                                 nbits++;
 833                                 assert(nbits <= BITS_MAX);
 834                                 maxcode = (int) MAXCODE(nbits);
 835                         } else if (incount >= checkpoint) {
 836                                 long rat;
 837                                 /*
 838                                  * Check compression ratio and, if things seem
 839                                  * to be slipping, clear the hash table and
 840                                  * reset state.  The compression ratio is a
 841                                  * 24+8-bit fractional number.
 842                                  */
 843                                 checkpoint = incount+CHECK_GAP;
 844                                 CALCRATIO(sp, rat);
 845                                 if (rat <= sp->enc_ratio) {
 846                                         cl_hash(sp);
 847                                         sp->enc_ratio = 0;
 848                                         incount = 0;
 849                                         outcount = 0;
 850                                         free_ent = CODE_FIRST;
 851                                         PutNextCode(op, CODE_CLEAR);
 852                                         nbits = BITS_MIN;
 853                                         maxcode = MAXCODE(BITS_MIN);
 854                                 } else
 855                                         sp->enc_ratio = rat;
 856                         }
 857                 }
 858         hit:
 859                 ;
 860         }
 861
 862         /*
 863          * Restore global state.
 864          */
 865         sp->enc_incount = incount;
 866         sp->enc_outcount = outcount;
 867         sp->enc_checkpoint = checkpoint;
 868         sp->enc_oldcode = ent;
 869         sp->lzw_nextdata = nextdata;
 870         sp->lzw_nextbits = nextbits;
 871         sp->lzw_free_ent = free_ent;
 872         sp->lzw_maxcode = maxcode;
 873         sp->lzw_nbits = nbits;
 874         tif->tif_rawcp = op;
 875         return (1);
 876 }
 877
 878 /*
 879  * Finish off an encoded strip by flushing the last
 880  * string and tacking on an End Of Information code.
 881  */
 882 static int
 883 LZWPostEncode(TIFF* tif)
 884 {
 885         register LZWEncodeState *sp = EncoderState(tif);
 886         tidata_t op = tif->tif_rawcp;
 887         long nextbits = sp->lzw_nextbits;
 888         long nextdata = sp->lzw_nextdata;
 889         long outcount = sp->enc_outcount;
 890         int nbits = sp->lzw_nbits;
 891
 892         if (op > sp->enc_rawlimit) {
 893                 tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
 894                 TIFFFlushData1(tif);
 895                 op = tif->tif_rawdata;
 896         }
 897         if (sp->enc_oldcode != (hcode_t) -1) {
 898                 PutNextCode(op, sp->enc_oldcode);
 899                 sp->enc_oldcode = (hcode_t) -1;
 900         }
 901         PutNextCode(op, CODE_EOI);
 902         if (nextbits > 0)
 903                 *op++ = (u_char)(nextdata << (8-nextbits));
 904         tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
 905         return (1);
 906 }
 907
 908 /*
 909  * Reset encoding hash table.
 910  */
 911 static void
 912 cl_hash(LZWEncodeState* sp)
 913 {
 914         register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
 915         register long i = HSIZE-8;
 916
 917         do {
 918                 i -= 8;
 919                 hp[-7].hash = -1;
 920                 hp[-6].hash = -1;
 921                 hp[-5].hash = -1;
 922                 hp[-4].hash = -1;
 923                 hp[-3].hash = -1;
 924                 hp[-2].hash = -1;
 925                 hp[-1].hash = -1;
 926                 hp[ 0].hash = -1;
 927                 hp -= 8;
 928         } while (i >= 0);
 929         for (i += 8; i > 0; i--, hp--)
 930                 hp->hash = -1;
 931 }
 932
 933 static void
 934 LZWCleanup(TIFF* tif)
 935 {
 936         if (tif->tif_data) {
 937                 if (tif->tif_mode == O_RDONLY) {
 938                         if (DecoderState(tif)->dec_codetab)
 939                                 _TIFFfree(DecoderState(tif)->dec_codetab);
 940                 } else {
 941                         if (EncoderState(tif)->enc_hashtab)
 942                                 _TIFFfree(EncoderState(tif)->enc_hashtab);
 943                 }
 944                 _TIFFfree(tif->tif_data);
 945                 tif->tif_data = NULL;
 946         }
 947 }
 948
 949 int
 950 TIFFInitLZW(TIFF* tif, int scheme)
 951 {
 952         assert(scheme == COMPRESSION_LZW);
 953         /*
 954          * Allocate state block so tag methods have storage to record values.
 955          */
 956         if (tif->tif_mode == O_RDONLY) {
 957                 tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LZWDecodeState));
 958                 if (tif->tif_data == NULL)
 959                         goto bad;
 960                 DecoderState(tif)->dec_codetab = NULL;
 961                 DecoderState(tif)->dec_decode = NULL;
 962         } else {
 963                 tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LZWEncodeState));
 964                 if (tif->tif_data == NULL)
 965                         goto bad;
 966                 EncoderState(tif)->enc_hashtab = NULL;
 967         }
 968         /*
 969          * Install codec methods.
 970          */
 971         tif->tif_setupdecode = LZWSetupDecode;
 972         tif->tif_predecode = LZWPreDecode;
 973         tif->tif_decoderow = LZWDecode;
 974         tif->tif_decodestrip = LZWDecode;
 975         tif->tif_decodetile = LZWDecode;
 976         tif->tif_setupencode = LZWSetupEncode;
 977         tif->tif_preencode = LZWPreEncode;
 978         tif->tif_postencode = LZWPostEncode;
 979         tif->tif_encoderow = LZWEncode;
 980         tif->tif_encodestrip = LZWEncode;
 981         tif->tif_encodetile = LZWEncode;
 982         tif->tif_cleanup = LZWCleanup;
 983         /*
 984          * Setup predictor setup.
 985          */
 986         (void) TIFFPredictorInit(tif);
 987         return (1);
 988 bad:
 989         TIFFError("TIFFInitLZW", "No space for LZW state block");
 990         return (0);
 991 }
 992
 993 /*
 994  * Copyright (c) 1985, 1986 The Regents of the University of California.
 995  * All rights reserved.
 996  *
 997  * This code is derived from software contributed to Berkeley by
 998  * James A. Woods, derived from original work by Spencer Thomas
 999  * and Joseph Orost.
1000  *
1001  * Redistribution and use in source and binary forms are permitted
1002  * provided that the above copyright notice and this paragraph are
1003  * duplicated in all such forms and that any documentation,
1004  * advertising materials, and other materials related to such
1005  * distribution and use acknowledge that the software was developed
1006  * by the University of California, Berkeley.  The name of the
1007  * University may not be used to endorse or promote products derived
1008  * from this software without specific prior written permission.
1009  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1010  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1011  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1012  */
1013 #endif /* LZW_SUPPORT */