1 /* deflate.h -- internal compression state 
   2  * Copyright (C) 1995-2002 Jean-loup Gailly 
   3  * For conditions of distribution and use, see copyright notice in zlib.h 
   6 /* WARNING: this file should *not* be used by applications. It is 
   7    part of the implementation of the compression library and is 
   8    subject to change. Applications should only use zlib.h. 
  18 /* define NO_GZIP when compiling if you want to disable gzip header and 
  19    trailer creation by deflate().  NO_GZIP would be used to avoid linking in 
  20    the crc code when it is not needed.  For shared libraries, gzip encoding 
  21    should be left enabled. */ 
  26 /* =========================================================================== 
  27  * Internal compression state. 
  30 #define LENGTH_CODES 29 
  31 /* number of length codes, not counting the special END_BLOCK code */ 
  34 /* number of literal bytes 0..255 */ 
  36 #define L_CODES (LITERALS+1+LENGTH_CODES) 
  37 /* number of Literal or Length codes, including the END_BLOCK code */ 
  40 /* number of distance codes */ 
  43 /* number of codes used to transfer the bit lengths */ 
  45 #define HEAP_SIZE (2*L_CODES+1) 
  46 /* maximum heap size */ 
  49 /* All codes must not exceed MAX_BITS bits */ 
  52 #define BUSY_STATE   113 
  53 #define FINISH_STATE 666 
  57 /* Data structure describing a single value and its code string. */ 
  58 typedef struct ct_data_s 
{ 
  60         ush  freq
;       /* frequency count */ 
  61         ush  code
;       /* bit string */ 
  64         ush  dad
;        /* father node in Huffman tree */ 
  65         ush  len
;        /* length of bit string */ 
  74 typedef struct static_tree_desc_s  static_tree_desc
; 
  76 typedef struct tree_desc_s 
{ 
  77     ct_data 
*dyn_tree
;           /* the dynamic tree */ 
  78     int     max_code
;            /* largest code with non zero frequency */ 
  79     static_tree_desc 
*stat_desc
; /* the corresponding static tree */ 
  84 typedef unsigned IPos
; 
  86 /* A Pos is an index in the character window. We use short instead of int to 
  87  * save space in the various tables. IPos is used only for parameter passing. 
  90 typedef struct internal_state 
{ 
  91     z_streamp strm
;      /* pointer back to this zlib stream */ 
  92     int   status
;        /* as the name implies */ 
  93     Bytef 
*pending_buf
;  /* output still pending */ 
  94     ulg   pending_buf_size
; /* size of pending_buf */ 
  95     Bytef 
*pending_out
;  /* next pending byte to output to the stream */ 
  96     int   pending
;       /* nb of bytes in the pending buffer */ 
  97     int   wrap
;          /* bit 0 true for zlib, bit 1 true for gzip */ 
  98     Byte  data_type
;     /* UNKNOWN, BINARY or ASCII */ 
  99     Byte  method
;        /* STORED (for zip only) or DEFLATED */ 
 100     int   last_flush
;    /* value of flush param for previous deflate call */ 
 102                 /* used by deflate.c: */ 
 104     uInt  w_size
;        /* LZ77 window size (32K by default) */ 
 105     uInt  w_bits
;        /* log2(w_size)  (8..16) */ 
 106     uInt  w_mask
;        /* w_size - 1 */ 
 109     /* Sliding window. Input bytes are read into the second half of the window, 
 110      * and move to the first half later to keep a dictionary of at least wSize 
 111      * bytes. With this organization, matches are limited to a distance of 
 112      * wSize-MAX_MATCH bytes, but this ensures that IO is always 
 113      * performed with a length multiple of the block size. Also, it limits 
 114      * the window size to 64K, which is quite useful on MSDOS. 
 115      * To do: use the user input buffer as sliding window. 
 119     /* Actual size of window: 2*wSize, except when the user input buffer 
 120      * is directly used as sliding window. 
 124     /* Link to older string with same hash index. To limit the size of this 
 125      * array to 64K, this link is maintained only for the last 32K strings. 
 126      * An index in this array is thus a window index modulo 32K. 
 129     Posf 
*head
; /* Heads of the hash chains or NIL. */ 
 131     uInt  ins_h
;          /* hash index of string to be inserted */ 
 132     uInt  hash_size
;      /* number of elements in hash table */ 
 133     uInt  hash_bits
;      /* log2(hash_size) */ 
 134     uInt  hash_mask
;      /* hash_size-1 */ 
 137     /* Number of bits by which ins_h must be shifted at each input 
 138      * step. It must be such that after MIN_MATCH steps, the oldest 
 139      * byte no longer takes part in the hash key, that is: 
 140      *   hash_shift * MIN_MATCH >= hash_bits 
 144     /* Window position at the beginning of the current output block. Gets 
 145      * negative when the window is moved backwards. 
 148     uInt match_length
;           /* length of best match */ 
 149     IPos prev_match
;             /* previous match */ 
 150     int match_available
;         /* set if previous match exists */ 
 151     uInt strstart
;               /* start of string to insert */ 
 152     uInt match_start
;            /* start of matching string */ 
 153     uInt lookahead
;              /* number of valid bytes ahead in window */ 
 156     /* Length of the best match at previous step. Matches not greater than this 
 157      * are discarded. This is used in the lazy match evaluation. 
 160     uInt max_chain_length
; 
 161     /* To speed up deflation, hash chains are never searched beyond this 
 162      * length.  A higher limit improves compression ratio but degrades the 
 167     /* Attempt to find a better match only when the current match is strictly 
 168      * smaller than this value. This mechanism is used only for compression 
 171 #   define max_insert_length  max_lazy_match 
 172     /* Insert new strings in the hash table only if the match length is not 
 173      * greater than this length. This saves time but degrades compression. 
 174      * max_insert_length is used only for compression levels <= 3. 
 177     int level
;    /* compression level (1..9) */ 
 178     int strategy
; /* favor or force Huffman coding*/ 
 181     /* Use a faster search when the previous match is longer than this */ 
 183     int nice_match
; /* Stop searching when current match exceeds this */ 
 185                 /* used by trees.c: */ 
 186     /* Didn't use ct_data typedef below to supress compiler warning */ 
 187     struct ct_data_s dyn_ltree
[HEAP_SIZE
];   /* literal and length tree */ 
 188     struct ct_data_s dyn_dtree
[2*D_CODES
+1]; /* distance tree */ 
 189     struct ct_data_s bl_tree
[2*BL_CODES
+1];  /* Huffman tree for bit lengths */ 
 191     struct tree_desc_s l_desc
;               /* desc. for literal tree */ 
 192     struct tree_desc_s d_desc
;               /* desc. for distance tree */ 
 193     struct tree_desc_s bl_desc
;              /* desc. for bit length tree */ 
 195     ush bl_count
[MAX_BITS
+1]; 
 196     /* number of codes at each bit length for an optimal tree */ 
 198     int heap
[2*L_CODES
+1];      /* heap used to build the Huffman trees */ 
 199     int heap_len
;               /* number of elements in the heap */ 
 200     int heap_max
;               /* element of largest frequency */ 
 201     /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 
 202      * The same heap array is used to build all trees. 
 205     uch depth
[2*L_CODES
+1]; 
 206     /* Depth of each subtree used as tie breaker for trees of equal frequency 
 209     uchf 
*l_buf
;          /* buffer for literals or lengths */ 
 212     /* Size of match buffer for literals/lengths.  There are 4 reasons for 
 213      * limiting lit_bufsize to 64K: 
 214      *   - frequencies can be kept in 16 bit counters 
 215      *   - if compression is not successful for the first block, all input 
 216      *     data is still in the window so we can still emit a stored block even 
 217      *     when input comes from standard input.  (This can also be done for 
 218      *     all blocks if lit_bufsize is not greater than 32K.) 
 219      *   - if compression is not successful for a file smaller than 64K, we can 
 220      *     even emit a stored file instead of a stored block (saving 5 bytes). 
 221      *     This is applicable only for zip (not gzip or zlib). 
 222      *   - creating new Huffman trees less frequently may not provide fast 
 223      *     adaptation to changes in the input data statistics. (Take for 
 224      *     example a binary file with poorly compressible code followed by 
 225      *     a highly compressible string table.) Smaller buffer sizes give 
 226      *     fast adaptation but have of course the overhead of transmitting 
 227      *     trees more frequently. 
 228      *   - I can't count above 4 
 231     uInt last_lit
;      /* running index in l_buf */ 
 234     /* Buffer for distances. To simplify the code, d_buf and l_buf have 
 235      * the same number of elements. To use different lengths, an extra flag 
 236      * array would be necessary. 
 239     ulg opt_len
;        /* bit length of current block with optimal trees */ 
 240     ulg static_len
;     /* bit length of current block with static trees */ 
 241     uInt matches
;       /* number of string matches in current block */ 
 242     int last_eob_len
;   /* bit length of EOB code for last block */ 
 245     ulg compressed_len
; /* total bit length of compressed file mod 2^32 */ 
 246     ulg bits_sent
;      /* bit length of compressed data sent mod 2^32 */ 
 250     /* Output buffer. bits are inserted starting at the bottom (least 
 254     /* Number of valid bits in bi_buf.  All bits above the last valid bit 
 260 /* Output a byte on the stream. 
 261  * IN assertion: there is enough room in pending_buf. 
 263 #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 
 266 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 
 267 /* Minimum amount of lookahead, except at the end of the input file. 
 268  * See deflate.c for comments about the MIN_MATCH+1. 
 271 #define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD) 
 272 /* In order to simplify the code, particularly on 16 bit machines, match 
 273  * distances are limited to MAX_DIST instead of WSIZE. 
 277 void _tr_init         
OF((deflate_state 
*s
)); 
 278 int  _tr_tally        
OF((deflate_state 
*s
, unsigned dist
, unsigned lc
)); 
 279 void _tr_flush_block  
OF((deflate_state 
*s
, charf 
*buf
, ulg stored_len
, 
 281 void _tr_align        
OF((deflate_state 
*s
)); 
 282 void _tr_stored_block 
OF((deflate_state 
*s
, charf 
*buf
, ulg stored_len
, 
 285 #define d_code(dist) \ 
 286    ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 
 287 /* Mapping from a distance to a distance code. dist is the distance - 1 and 
 288  * must not have side effects. _dist_code[256] and _dist_code[257] are never 
 293 /* Inline versions of _tr_tally for speed: */ 
 295 #if defined(GEN_TREES_H) || !defined(STDC) 
 296   extern uch _length_code
[]; 
 297   extern uch _dist_code
[]; 
 299   extern const uch _length_code
[]; 
 300   extern const uch _dist_code
[]; 
 303 # define _tr_tally_lit(s, c, flush) \ 
 305     s->d_buf[s->last_lit] = 0; \ 
 306     s->l_buf[s->last_lit++] = cc; \ 
 307     s->dyn_ltree[cc].Freq++; \ 
 308     flush = (s->last_lit == s->lit_bufsize-1); \ 
 310 # define _tr_tally_dist(s, distance, length, flush) \ 
 311   { uch len = (length); \ 
 312     ush dist = (distance); \ 
 313     s->d_buf[s->last_lit] = dist; \ 
 314     s->l_buf[s->last_lit++] = len; \ 
 316     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 
 317     s->dyn_dtree[d_code(dist)].Freq++; \ 
 318     flush = (s->last_lit == s->lit_bufsize-1); \ 
 321 # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 
 322 # define _tr_tally_dist(s, distance, length, flush) \ 
 323               flush = _tr_tally(s, distance, length) 
 326 #endif /* DEFLATE_H */