1 /* deflate.h -- internal compression state 
   2  * Copyright (C) 1995-2004 Jean-loup Gailly 
   3  * For conditions of distribution and use, see copyright notice in zlib.h 
   6 /* WARNING: this file should *not* be used by applications. It is 
   7    part of the implementation of the compression library and is 
   8    subject to change. Applications should only use zlib.h. 
  18 /* define NO_GZIP when compiling if you want to disable gzip header and 
  19    trailer creation by deflate().  NO_GZIP would be used to avoid linking in 
  20    the crc code when it is not needed.  For shared libraries, gzip encoding 
  21    should be left enabled. */ 
  26 /* =========================================================================== 
  27  * Internal compression state. 
  30 #define LENGTH_CODES 29 
  31 /* number of length codes, not counting the special END_BLOCK code */ 
  34 /* number of literal bytes 0..255 */ 
  36 #define L_CODES (LITERALS+1+LENGTH_CODES) 
  37 /* number of Literal or Length codes, including the END_BLOCK code */ 
  40 /* number of distance codes */ 
  43 /* number of codes used to transfer the bit lengths */ 
  45 #define HEAP_SIZE (2*L_CODES+1) 
  46 /* maximum heap size */ 
  49 /* All codes must not exceed MAX_BITS bits */ 
  52 #define EXTRA_STATE   69 
  54 #define COMMENT_STATE 91 
  55 #define HCRC_STATE   103 
  56 #define BUSY_STATE   113 
  57 #define FINISH_STATE 666 
  61 /* Data structure describing a single value and its code string. */ 
  62 typedef struct ct_data_s 
{ 
  64         ush  freq
;       /* frequency count */ 
  65         ush  code
;       /* bit string */ 
  68         ush  dad
;        /* father node in Huffman tree */ 
  69         ush  len
;        /* length of bit string */ 
  78 typedef struct static_tree_desc_s  static_tree_desc
; 
  80 typedef struct tree_desc_s 
{ 
  81     ct_data 
*dyn_tree
;           /* the dynamic tree */ 
  82     int     max_code
;            /* largest code with non zero frequency */ 
  83     static_tree_desc 
*stat_desc
; /* the corresponding static tree */ 
  88 typedef unsigned IPos
; 
  90 /* A Pos is an index in the character window. We use short instead of int to 
  91  * save space in the various tables. IPos is used only for parameter passing. 
  94 typedef struct internal_state 
{ 
  95     z_streamp strm
;      /* pointer back to this zlib stream */ 
  96     int   status
;        /* as the name implies */ 
  97     Bytef 
*pending_buf
;  /* output still pending */ 
  98     ulg   pending_buf_size
; /* size of pending_buf */ 
  99     Bytef 
*pending_out
;  /* next pending byte to output to the stream */ 
 100     uInt   pending
;      /* nb of bytes in the pending buffer */ 
 101     int   wrap
;          /* bit 0 true for zlib, bit 1 true for gzip */ 
 102     gz_headerp  gzhead
;  /* gzip header information to write */ 
 103     uInt   gzindex
;      /* where in extra, name, or comment */ 
 104     Byte  method
;        /* STORED (for zip only) or DEFLATED */ 
 105     int   last_flush
;    /* value of flush param for previous deflate call */ 
 107                 /* used by deflate.c: */ 
 109     uInt  w_size
;        /* LZ77 window size (32K by default) */ 
 110     uInt  w_bits
;        /* log2(w_size)  (8..16) */ 
 111     uInt  w_mask
;        /* w_size - 1 */ 
 114     /* Sliding window. Input bytes are read into the second half of the window, 
 115      * and move to the first half later to keep a dictionary of at least wSize 
 116      * bytes. With this organization, matches are limited to a distance of 
 117      * wSize-MAX_MATCH bytes, but this ensures that IO is always 
 118      * performed with a length multiple of the block size. Also, it limits 
 119      * the window size to 64K, which is quite useful on MSDOS. 
 120      * To do: use the user input buffer as sliding window. 
 124     /* Actual size of window: 2*wSize, except when the user input buffer 
 125      * is directly used as sliding window. 
 129     /* Link to older string with same hash index. To limit the size of this 
 130      * array to 64K, this link is maintained only for the last 32K strings. 
 131      * An index in this array is thus a window index modulo 32K. 
 134     Posf 
*head
; /* Heads of the hash chains or NIL. */ 
 136     uInt  ins_h
;          /* hash index of string to be inserted */ 
 137     uInt  hash_size
;      /* number of elements in hash table */ 
 138     uInt  hash_bits
;      /* log2(hash_size) */ 
 139     uInt  hash_mask
;      /* hash_size-1 */ 
 142     /* Number of bits by which ins_h must be shifted at each input 
 143      * step. It must be such that after MIN_MATCH steps, the oldest 
 144      * byte no longer takes part in the hash key, that is: 
 145      *   hash_shift * MIN_MATCH >= hash_bits 
 149     /* Window position at the beginning of the current output block. Gets 
 150      * negative when the window is moved backwards. 
 153     uInt match_length
;           /* length of best match */ 
 154     IPos prev_match
;             /* previous match */ 
 155     int match_available
;         /* set if previous match exists */ 
 156     uInt strstart
;               /* start of string to insert */ 
 157     uInt match_start
;            /* start of matching string */ 
 158     uInt lookahead
;              /* number of valid bytes ahead in window */ 
 161     /* Length of the best match at previous step. Matches not greater than this 
 162      * are discarded. This is used in the lazy match evaluation. 
 165     uInt max_chain_length
; 
 166     /* To speed up deflation, hash chains are never searched beyond this 
 167      * length.  A higher limit improves compression ratio but degrades the 
 172     /* Attempt to find a better match only when the current match is strictly 
 173      * smaller than this value. This mechanism is used only for compression 
 176 #   define max_insert_length  max_lazy_match 
 177     /* Insert new strings in the hash table only if the match length is not 
 178      * greater than this length. This saves time but degrades compression. 
 179      * max_insert_length is used only for compression levels <= 3. 
 182     int level
;    /* compression level (1..9) */ 
 183     int strategy
; /* favor or force Huffman coding*/ 
 186     /* Use a faster search when the previous match is longer than this */ 
 188     int nice_match
; /* Stop searching when current match exceeds this */ 
 190                 /* used by trees.c: */ 
 191     /* Didn't use ct_data typedef below to supress compiler warning */ 
 192     struct ct_data_s dyn_ltree
[HEAP_SIZE
];   /* literal and length tree */ 
 193     struct ct_data_s dyn_dtree
[2*D_CODES
+1]; /* distance tree */ 
 194     struct ct_data_s bl_tree
[2*BL_CODES
+1];  /* Huffman tree for bit lengths */ 
 196     struct tree_desc_s l_desc
;               /* desc. for literal tree */ 
 197     struct tree_desc_s d_desc
;               /* desc. for distance tree */ 
 198     struct tree_desc_s bl_desc
;              /* desc. for bit length tree */ 
 200     ush bl_count
[MAX_BITS
+1]; 
 201     /* number of codes at each bit length for an optimal tree */ 
 203     int heap
[2*L_CODES
+1];      /* heap used to build the Huffman trees */ 
 204     int heap_len
;               /* number of elements in the heap */ 
 205     int heap_max
;               /* element of largest frequency */ 
 206     /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 
 207      * The same heap array is used to build all trees. 
 210     uch depth
[2*L_CODES
+1]; 
 211     /* Depth of each subtree used as tie breaker for trees of equal frequency 
 214     uchf 
*l_buf
;          /* buffer for literals or lengths */ 
 217     /* Size of match buffer for literals/lengths.  There are 4 reasons for 
 218      * limiting lit_bufsize to 64K: 
 219      *   - frequencies can be kept in 16 bit counters 
 220      *   - if compression is not successful for the first block, all input 
 221      *     data is still in the window so we can still emit a stored block even 
 222      *     when input comes from standard input.  (This can also be done for 
 223      *     all blocks if lit_bufsize is not greater than 32K.) 
 224      *   - if compression is not successful for a file smaller than 64K, we can 
 225      *     even emit a stored file instead of a stored block (saving 5 bytes). 
 226      *     This is applicable only for zip (not gzip or zlib). 
 227      *   - creating new Huffman trees less frequently may not provide fast 
 228      *     adaptation to changes in the input data statistics. (Take for 
 229      *     example a binary file with poorly compressible code followed by 
 230      *     a highly compressible string table.) Smaller buffer sizes give 
 231      *     fast adaptation but have of course the overhead of transmitting 
 232      *     trees more frequently. 
 233      *   - I can't count above 4 
 236     uInt last_lit
;      /* running index in l_buf */ 
 239     /* Buffer for distances. To simplify the code, d_buf and l_buf have 
 240      * the same number of elements. To use different lengths, an extra flag 
 241      * array would be necessary. 
 244     ulg opt_len
;        /* bit length of current block with optimal trees */ 
 245     ulg static_len
;     /* bit length of current block with static trees */ 
 246     uInt matches
;       /* number of string matches in current block */ 
 247     int last_eob_len
;   /* bit length of EOB code for last block */ 
 250     ulg compressed_len
; /* total bit length of compressed file mod 2^32 */ 
 251     ulg bits_sent
;      /* bit length of compressed data sent mod 2^32 */ 
 255     /* Output buffer. bits are inserted starting at the bottom (least 
 259     /* Number of valid bits in bi_buf.  All bits above the last valid bit 
 265 /* Output a byte on the stream. 
 266  * IN assertion: there is enough room in pending_buf. 
 268 #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 
 271 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 
 272 /* Minimum amount of lookahead, except at the end of the input file. 
 273  * See deflate.c for comments about the MIN_MATCH+1. 
 276 #define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD) 
 277 /* In order to simplify the code, particularly on 16 bit machines, match 
 278  * distances are limited to MAX_DIST instead of WSIZE. 
 282 void _tr_init         
OF((deflate_state 
*s
)); 
 283 int  _tr_tally        
OF((deflate_state 
*s
, unsigned dist
, unsigned lc
)); 
 284 void _tr_flush_block  
OF((deflate_state 
*s
, charf 
*buf
, ulg stored_len
, 
 286 void _tr_align        
OF((deflate_state 
*s
)); 
 287 void _tr_stored_block 
OF((deflate_state 
*s
, charf 
*buf
, ulg stored_len
, 
 290 #define d_code(dist) \ 
 291    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 
 292 /* Mapping from a distance to a distance code. dist is the distance - 1 and 
 293  * must not have side effects. _dist_code[256] and _dist_code[257] are never 
 298 /* Inline versions of _tr_tally for speed: */ 
 300 #if defined(GEN_TREES_H) || !defined(STDC) 
 301   extern uch _length_code
[]; 
 302   extern uch _dist_code
[]; 
 304   extern const uch _length_code
[]; 
 305   extern const uch _dist_code
[]; 
 308 # define _tr_tally_lit(s, c, flush) \ 
 310     s->d_buf[s->last_lit] = 0; \ 
 311     s->l_buf[s->last_lit++] = cc; \ 
 312     s->dyn_ltree[cc].Freq++; \ 
 313     flush = (s->last_lit == s->lit_bufsize-1); \ 
 315 # define _tr_tally_dist(s, distance, length, flush) \ 
 316   { uch len = (length); \ 
 317     ush dist = (distance); \ 
 318     s->d_buf[s->last_lit] = dist; \ 
 319     s->l_buf[s->last_lit++] = len; \ 
 321     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 
 322     s->dyn_dtree[d_code(dist)].Freq++; \ 
 323     flush = (s->last_lit == s->lit_bufsize-1); \ 
 326 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 
 327 # define _tr_tally_dist(s, distance, length, flush) \ 
 328               flush = _tr_tally(s, distance, length) 
 331 #endif /* DEFLATE_H */