]>
Commit | Line | Data |
---|---|---|
44a7a5ab A |
1 | /*- |
2 | * Copyright (c) 1985, 1986, 1992, 1993 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Diomidis Spinellis and James A. Woods, derived from original | |
7 | * work by Spencer Thomas and Joseph Orost. | |
8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | |
12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in the | |
16 | * documentation and/or other materials provided with the distribution. | |
44a7a5ab A |
17 | * 4. Neither the name of the University nor the names of its contributors |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #if defined(LIBC_SCCS) && !defined(lint) | |
44a7a5ab | 35 | static char sccsid[] = "@(#)zopen.c 8.1 (Berkeley) 6/27/93"; |
44a7a5ab A |
36 | #endif /* LIBC_SCCS and not lint */ |
37 | ||
c59d3020 | 38 | #include <sys/cdefs.h> |
e19e38b2 | 39 | __FBSDID("$FreeBSD: src/usr.bin/compress/zopen.c,v 1.17 2011/09/28 08:47:17 bz Exp $"); |
c59d3020 | 40 | |
44a7a5ab A |
41 | /*- |
42 | * fcompress.c - File compression ala IEEE Computer, June 1984. | |
43 | * | |
44 | * Compress authors: | |
45 | * Spencer W. Thomas (decvax!utah-cs!thomas) | |
46 | * Jim McKie (decvax!mcvax!jim) | |
47 | * Steve Davies (decvax!vax135!petsd!peora!srd) | |
48 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) | |
49 | * James A. Woods (decvax!ihnp4!ames!jaw) | |
50 | * Joe Orost (decvax!vax135!petsd!joe) | |
51 | * | |
52 | * Cleaned up and converted to library returning I/O streams by | |
53 | * Diomidis Spinellis <dds@doc.ic.ac.uk>. | |
54 | * | |
55 | * zopen(filename, mode, bits) | |
56 | * Returns a FILE * that can be used for read or write. The modes | |
57 | * supported are only "r" and "w". Seeking is not allowed. On | |
58 | * reading the file is decompressed, on writing it is compressed. | |
59 | * The output is compatible with compress(1) with 16 bit tables. | |
60 | * Any file produced by compress(1) can be read. | |
61 | */ | |
62 | ||
63 | #include <sys/param.h> | |
64 | #include <sys/stat.h> | |
65 | ||
66 | #include <ctype.h> | |
67 | #include <errno.h> | |
68 | #include <signal.h> | |
69 | #include <stdio.h> | |
70 | #include <stdlib.h> | |
71 | #include <string.h> | |
72 | #include <unistd.h> | |
c59d3020 | 73 | #include "zopen.h" |
44a7a5ab A |
74 | |
75 | #define BITS 16 /* Default bits. */ | |
76 | #define HSIZE 69001 /* 95% occupancy */ | |
77 | ||
78 | /* A code_int must be able to hold 2**BITS values of type int, and also -1. */ | |
79 | typedef long code_int; | |
80 | typedef long count_int; | |
81 | ||
82 | typedef u_char char_type; | |
83 | static char_type magic_header[] = | |
84 | {'\037', '\235'}; /* 1F 9D */ | |
85 | ||
86 | #define BIT_MASK 0x1f /* Defines for third byte of header. */ | |
87 | #define BLOCK_MASK 0x80 | |
88 | ||
89 | /* | |
90 | * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is | |
91 | * a fourth header byte (for expansion). | |
92 | */ | |
93 | #define INIT_BITS 9 /* Initial number of bits/code. */ | |
94 | ||
95 | #define MAXCODE(n_bits) ((1 << (n_bits)) - 1) | |
96 | ||
97 | struct s_zstate { | |
98 | FILE *zs_fp; /* File stream for I/O */ | |
99 | char zs_mode; /* r or w */ | |
100 | enum { | |
101 | S_START, S_MIDDLE, S_EOF | |
102 | } zs_state; /* State of computation */ | |
c59d3020 A |
103 | u_int zs_n_bits; /* Number of bits/code. */ |
104 | u_int zs_maxbits; /* User settable max # bits/code. */ | |
44a7a5ab A |
105 | code_int zs_maxcode; /* Maximum code, given n_bits. */ |
106 | code_int zs_maxmaxcode; /* Should NEVER generate this code. */ | |
107 | count_int zs_htab [HSIZE]; | |
108 | u_short zs_codetab [HSIZE]; | |
109 | code_int zs_hsize; /* For dynamic table sizing. */ | |
110 | code_int zs_free_ent; /* First unused entry. */ | |
111 | /* | |
112 | * Block compression parameters -- after all codes are used up, | |
113 | * and compression rate changes, start over. | |
114 | */ | |
115 | int zs_block_compress; | |
116 | int zs_clear_flg; | |
117 | long zs_ratio; | |
118 | count_int zs_checkpoint; | |
c59d3020 | 119 | u_int zs_offset; |
44a7a5ab A |
120 | long zs_in_count; /* Length of input. */ |
121 | long zs_bytes_out; /* Length of compressed output. */ | |
122 | long zs_out_count; /* # of codes output (for debugging). */ | |
123 | char_type zs_buf[BITS]; | |
124 | union { | |
125 | struct { | |
126 | long zs_fcode; | |
127 | code_int zs_ent; | |
128 | code_int zs_hsize_reg; | |
129 | int zs_hshift; | |
e19e38b2 | 130 | } w; /* Write parameters */ |
44a7a5ab A |
131 | struct { |
132 | char_type *zs_stackp; | |
133 | int zs_finchar; | |
134 | code_int zs_code, zs_oldcode, zs_incode; | |
135 | int zs_roffset, zs_size; | |
136 | char_type zs_gbuf[BITS]; | |
137 | } r; /* Read parameters */ | |
138 | } u; | |
139 | }; | |
140 | ||
141 | /* Definitions to retain old variable names */ | |
142 | #define fp zs->zs_fp | |
143 | #define zmode zs->zs_mode | |
144 | #define state zs->zs_state | |
145 | #define n_bits zs->zs_n_bits | |
146 | #define maxbits zs->zs_maxbits | |
147 | #define maxcode zs->zs_maxcode | |
148 | #define maxmaxcode zs->zs_maxmaxcode | |
149 | #define htab zs->zs_htab | |
150 | #define codetab zs->zs_codetab | |
151 | #define hsize zs->zs_hsize | |
152 | #define free_ent zs->zs_free_ent | |
153 | #define block_compress zs->zs_block_compress | |
154 | #define clear_flg zs->zs_clear_flg | |
155 | #define ratio zs->zs_ratio | |
156 | #define checkpoint zs->zs_checkpoint | |
157 | #define offset zs->zs_offset | |
158 | #define in_count zs->zs_in_count | |
159 | #define bytes_out zs->zs_bytes_out | |
160 | #define out_count zs->zs_out_count | |
161 | #define buf zs->zs_buf | |
162 | #define fcode zs->u.w.zs_fcode | |
163 | #define hsize_reg zs->u.w.zs_hsize_reg | |
164 | #define ent zs->u.w.zs_ent | |
165 | #define hshift zs->u.w.zs_hshift | |
166 | #define stackp zs->u.r.zs_stackp | |
167 | #define finchar zs->u.r.zs_finchar | |
168 | #define code zs->u.r.zs_code | |
169 | #define oldcode zs->u.r.zs_oldcode | |
170 | #define incode zs->u.r.zs_incode | |
171 | #define roffset zs->u.r.zs_roffset | |
172 | #define size zs->u.r.zs_size | |
173 | #define gbuf zs->u.r.zs_gbuf | |
174 | ||
175 | /* | |
176 | * To save much memory, we overlay the table used by compress() with those | |
177 | * used by decompress(). The tab_prefix table is the same size and type as | |
178 | * the codetab. The tab_suffix table needs 2**BITS characters. We get this | |
179 | * from the beginning of htab. The output stack uses the rest of htab, and | |
180 | * contains characters. There is plenty of room for any possible stack | |
181 | * (stack used to be 8000 characters). | |
182 | */ | |
183 | ||
184 | #define htabof(i) htab[i] | |
185 | #define codetabof(i) codetab[i] | |
186 | ||
187 | #define tab_prefixof(i) codetabof(i) | |
188 | #define tab_suffixof(i) ((char_type *)(htab))[i] | |
189 | #define de_stack ((char_type *)&tab_suffixof(1 << BITS)) | |
190 | ||
191 | #define CHECK_GAP 10000 /* Ratio check interval. */ | |
192 | ||
193 | /* | |
194 | * the next two codes should not be changed lightly, as they must not | |
195 | * lie within the contiguous general code space. | |
196 | */ | |
197 | #define FIRST 257 /* First free entry. */ | |
198 | #define CLEAR 256 /* Table clear output code. */ | |
199 | ||
c59d3020 A |
200 | static int cl_block(struct s_zstate *); |
201 | static void cl_hash(struct s_zstate *, count_int); | |
202 | static code_int getcode(struct s_zstate *); | |
203 | static int output(struct s_zstate *, code_int); | |
204 | static int zclose(void *); | |
205 | static int zread(void *, char *, int); | |
206 | static int zwrite(void *, const char *, int); | |
44a7a5ab A |
207 | |
208 | /*- | |
209 | * Algorithm from "A Technique for High Performance Data Compression", | |
210 | * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. | |
211 | * | |
212 | * Algorithm: | |
213 | * Modified Lempel-Ziv method (LZW). Basically finds common | |
214 | * substrings and replaces them with a variable size code. This is | |
215 | * deterministic, and can be done on the fly. Thus, the decompression | |
216 | * procedure needs no input table, but tracks the way the table was built. | |
217 | */ | |
218 | ||
219 | /*- | |
220 | * compress write | |
221 | * | |
222 | * Algorithm: use open addressing double hashing (no chaining) on the | |
223 | * prefix code / next character combination. We do a variant of Knuth's | |
224 | * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime | |
225 | * secondary probe. Here, the modular division first probe is gives way | |
226 | * to a faster exclusive-or manipulation. Also do block compression with | |
227 | * an adaptive reset, whereby the code table is cleared when the compression | |
228 | * ratio decreases, but after the table fills. The variable-length output | |
229 | * codes are re-sized at this point, and a special CLEAR code is generated | |
230 | * for the decompressor. Late addition: construct the table according to | |
231 | * file size for noticeable speed improvement on small files. Please direct | |
232 | * questions about this implementation to ames!jaw. | |
233 | */ | |
234 | static int | |
c59d3020 | 235 | zwrite(void *cookie, const char *wbp, int num) |
44a7a5ab A |
236 | { |
237 | code_int i; | |
238 | int c, disp; | |
239 | struct s_zstate *zs; | |
240 | const u_char *bp; | |
241 | u_char tmp; | |
242 | int count; | |
243 | ||
244 | if (num == 0) | |
245 | return (0); | |
246 | ||
247 | zs = cookie; | |
248 | count = num; | |
40bf83fe | 249 | bp = (const u_char *)wbp; |
44a7a5ab A |
250 | if (state == S_MIDDLE) |
251 | goto middle; | |
252 | state = S_MIDDLE; | |
253 | ||
254 | maxmaxcode = 1L << maxbits; | |
255 | if (fwrite(magic_header, | |
256 | sizeof(char), sizeof(magic_header), fp) != sizeof(magic_header)) | |
257 | return (-1); | |
c59d3020 | 258 | tmp = (u_char)((maxbits) | block_compress); |
44a7a5ab A |
259 | if (fwrite(&tmp, sizeof(char), sizeof(tmp), fp) != sizeof(tmp)) |
260 | return (-1); | |
261 | ||
262 | offset = 0; | |
263 | bytes_out = 3; /* Includes 3-byte header mojo. */ | |
264 | out_count = 0; | |
265 | clear_flg = 0; | |
266 | ratio = 0; | |
267 | in_count = 1; | |
268 | checkpoint = CHECK_GAP; | |
269 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
270 | free_ent = ((block_compress) ? FIRST : 256); | |
271 | ||
272 | ent = *bp++; | |
273 | --count; | |
274 | ||
275 | hshift = 0; | |
276 | for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) | |
277 | hshift++; | |
278 | hshift = 8 - hshift; /* Set hash code range bound. */ | |
279 | ||
280 | hsize_reg = hsize; | |
281 | cl_hash(zs, (count_int)hsize_reg); /* Clear hash table. */ | |
282 | ||
4d0bb651 | 283 | middle: for (; count--;) { |
44a7a5ab A |
284 | c = *bp++; |
285 | in_count++; | |
286 | fcode = (long)(((long)c << maxbits) + ent); | |
287 | i = ((c << hshift) ^ ent); /* Xor hashing. */ | |
288 | ||
289 | if (htabof(i) == fcode) { | |
290 | ent = codetabof(i); | |
291 | continue; | |
292 | } else if ((long)htabof(i) < 0) /* Empty slot. */ | |
293 | goto nomatch; | |
294 | disp = hsize_reg - i; /* Secondary hash (after G. Knott). */ | |
295 | if (i == 0) | |
296 | disp = 1; | |
297 | probe: if ((i -= disp) < 0) | |
298 | i += hsize_reg; | |
299 | ||
300 | if (htabof(i) == fcode) { | |
301 | ent = codetabof(i); | |
302 | continue; | |
303 | } | |
304 | if ((long)htabof(i) >= 0) | |
305 | goto probe; | |
306 | nomatch: if (output(zs, (code_int) ent) == -1) | |
307 | return (-1); | |
308 | out_count++; | |
309 | ent = c; | |
310 | if (free_ent < maxmaxcode) { | |
311 | codetabof(i) = free_ent++; /* code -> hashtable */ | |
312 | htabof(i) = fcode; | |
313 | } else if ((count_int)in_count >= | |
314 | checkpoint && block_compress) { | |
315 | if (cl_block(zs) == -1) | |
316 | return (-1); | |
317 | } | |
318 | } | |
319 | return (num); | |
320 | } | |
321 | ||
322 | static int | |
c59d3020 | 323 | zclose(void *cookie) |
44a7a5ab A |
324 | { |
325 | struct s_zstate *zs; | |
326 | int rval; | |
327 | ||
328 | zs = cookie; | |
329 | if (zmode == 'w') { /* Put out the final code. */ | |
330 | if (output(zs, (code_int) ent) == -1) { | |
331 | (void)fclose(fp); | |
332 | free(zs); | |
333 | return (-1); | |
334 | } | |
335 | out_count++; | |
336 | if (output(zs, (code_int) - 1) == -1) { | |
337 | (void)fclose(fp); | |
338 | free(zs); | |
339 | return (-1); | |
340 | } | |
341 | } | |
342 | rval = fclose(fp) == EOF ? -1 : 0; | |
343 | free(zs); | |
344 | return (rval); | |
345 | } | |
346 | ||
347 | /*- | |
348 | * Output the given code. | |
349 | * Inputs: | |
350 | * code: A n_bits-bit integer. If == -1, then EOF. This assumes | |
351 | * that n_bits =< (long)wordsize - 1. | |
352 | * Outputs: | |
353 | * Outputs code to the file. | |
354 | * Assumptions: | |
355 | * Chars are 8 bits long. | |
356 | * Algorithm: | |
357 | * Maintain a BITS character long buffer (so that 8 codes will | |
358 | * fit in it exactly). Use the VAX insv instruction to insert each | |
359 | * code in turn. When the buffer fills up empty it and start over. | |
360 | */ | |
361 | ||
362 | static char_type lmask[9] = | |
363 | {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; | |
364 | static char_type rmask[9] = | |
365 | {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; | |
366 | ||
367 | static int | |
c59d3020 | 368 | output(struct s_zstate *zs, code_int ocode) |
44a7a5ab | 369 | { |
c59d3020 A |
370 | int r_off; |
371 | u_int bits; | |
44a7a5ab A |
372 | char_type *bp; |
373 | ||
374 | r_off = offset; | |
375 | bits = n_bits; | |
376 | bp = buf; | |
377 | if (ocode >= 0) { | |
378 | /* Get to the first byte. */ | |
379 | bp += (r_off >> 3); | |
380 | r_off &= 7; | |
381 | /* | |
382 | * Since ocode is always >= 8 bits, only need to mask the first | |
383 | * hunk on the left. | |
384 | */ | |
385 | *bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]); | |
386 | bp++; | |
387 | bits -= (8 - r_off); | |
388 | ocode >>= 8 - r_off; | |
389 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
390 | if (bits >= 8) { | |
391 | *bp++ = ocode; | |
392 | ocode >>= 8; | |
393 | bits -= 8; | |
394 | } | |
395 | /* Last bits. */ | |
396 | if (bits) | |
397 | *bp = ocode; | |
398 | offset += n_bits; | |
399 | if (offset == (n_bits << 3)) { | |
400 | bp = buf; | |
401 | bits = n_bits; | |
402 | bytes_out += bits; | |
403 | if (fwrite(bp, sizeof(char), bits, fp) != bits) | |
404 | return (-1); | |
4d0bb651 A |
405 | // bp += bits; |
406 | // bits = 0; | |
44a7a5ab A |
407 | offset = 0; |
408 | } | |
409 | /* | |
410 | * If the next entry is going to be too big for the ocode size, | |
411 | * then increase it, if possible. | |
412 | */ | |
413 | if (free_ent > maxcode || (clear_flg > 0)) { | |
414 | /* | |
415 | * Write the whole buffer, because the input side won't | |
416 | * discover the size increase until after it has read it. | |
417 | */ | |
418 | if (offset > 0) { | |
419 | if (fwrite(buf, 1, n_bits, fp) != n_bits) | |
420 | return (-1); | |
421 | bytes_out += n_bits; | |
422 | } | |
423 | offset = 0; | |
424 | ||
425 | if (clear_flg) { | |
426 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
427 | clear_flg = 0; | |
428 | } else { | |
429 | n_bits++; | |
430 | if (n_bits == maxbits) | |
431 | maxcode = maxmaxcode; | |
432 | else | |
433 | maxcode = MAXCODE(n_bits); | |
434 | } | |
435 | } | |
436 | } else { | |
437 | /* At EOF, write the rest of the buffer. */ | |
438 | if (offset > 0) { | |
439 | offset = (offset + 7) / 8; | |
440 | if (fwrite(buf, 1, offset, fp) != offset) | |
441 | return (-1); | |
442 | bytes_out += offset; | |
443 | } | |
444 | offset = 0; | |
445 | } | |
446 | return (0); | |
447 | } | |
448 | ||
449 | /* | |
450 | * Decompress read. This routine adapts to the codes in the file building | |
451 | * the "string" table on-the-fly; requiring no table to be stored in the | |
452 | * compressed file. The tables used herein are shared with those of the | |
453 | * compress() routine. See the definitions above. | |
454 | */ | |
455 | static int | |
c59d3020 | 456 | zread(void *cookie, char *rbp, int num) |
44a7a5ab A |
457 | { |
458 | u_int count; | |
459 | struct s_zstate *zs; | |
460 | u_char *bp, header[3]; | |
461 | ||
462 | if (num == 0) | |
463 | return (0); | |
464 | ||
465 | zs = cookie; | |
466 | count = num; | |
467 | bp = (u_char *)rbp; | |
468 | switch (state) { | |
469 | case S_START: | |
470 | state = S_MIDDLE; | |
471 | break; | |
472 | case S_MIDDLE: | |
473 | goto middle; | |
474 | case S_EOF: | |
475 | goto eof; | |
476 | } | |
477 | ||
478 | /* Check the magic number */ | |
479 | if (fread(header, | |
480 | sizeof(char), sizeof(header), fp) != sizeof(header) || | |
481 | memcmp(header, magic_header, sizeof(magic_header)) != 0) { | |
482 | errno = EFTYPE; | |
483 | return (-1); | |
484 | } | |
485 | maxbits = header[2]; /* Set -b from file. */ | |
486 | block_compress = maxbits & BLOCK_MASK; | |
487 | maxbits &= BIT_MASK; | |
488 | maxmaxcode = 1L << maxbits; | |
e19e38b2 | 489 | if (maxbits > BITS || maxbits < 12) { |
44a7a5ab A |
490 | errno = EFTYPE; |
491 | return (-1); | |
492 | } | |
493 | /* As above, initialize the first 256 entries in the table. */ | |
494 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
495 | for (code = 255; code >= 0; code--) { | |
496 | tab_prefixof(code) = 0; | |
497 | tab_suffixof(code) = (char_type) code; | |
498 | } | |
499 | free_ent = block_compress ? FIRST : 256; | |
500 | ||
501 | finchar = oldcode = getcode(zs); | |
502 | if (oldcode == -1) /* EOF already? */ | |
503 | return (0); /* Get out of here */ | |
504 | ||
505 | /* First code must be 8 bits = char. */ | |
506 | *bp++ = (u_char)finchar; | |
507 | count--; | |
508 | stackp = de_stack; | |
509 | ||
510 | while ((code = getcode(zs)) > -1) { | |
511 | ||
512 | if ((code == CLEAR) && block_compress) { | |
513 | for (code = 255; code >= 0; code--) | |
514 | tab_prefixof(code) = 0; | |
515 | clear_flg = 1; | |
e19e38b2 A |
516 | free_ent = FIRST; |
517 | oldcode = -1; | |
518 | continue; | |
44a7a5ab A |
519 | } |
520 | incode = code; | |
521 | ||
e19e38b2 | 522 | /* Special case for kWkWk string. */ |
44a7a5ab | 523 | if (code >= free_ent) { |
e19e38b2 A |
524 | if (code > free_ent || oldcode == -1) { |
525 | /* Bad stream. */ | |
526 | errno = EINVAL; | |
527 | return (-1); | |
528 | } | |
44a7a5ab A |
529 | *stackp++ = finchar; |
530 | code = oldcode; | |
531 | } | |
e19e38b2 A |
532 | /* |
533 | * The above condition ensures that code < free_ent. | |
534 | * The construction of tab_prefixof in turn guarantees that | |
535 | * each iteration decreases code and therefore stack usage is | |
536 | * bound by 1 << BITS - 256. | |
537 | */ | |
44a7a5ab A |
538 | |
539 | /* Generate output characters in reverse order. */ | |
540 | while (code >= 256) { | |
541 | *stackp++ = tab_suffixof(code); | |
542 | code = tab_prefixof(code); | |
543 | } | |
544 | *stackp++ = finchar = tab_suffixof(code); | |
545 | ||
546 | /* And put them out in forward order. */ | |
547 | middle: do { | |
548 | if (count-- == 0) | |
549 | return (num); | |
550 | *bp++ = *--stackp; | |
551 | } while (stackp > de_stack); | |
552 | ||
553 | /* Generate the new entry. */ | |
e19e38b2 | 554 | if ((code = free_ent) < maxmaxcode && oldcode != -1) { |
44a7a5ab A |
555 | tab_prefixof(code) = (u_short) oldcode; |
556 | tab_suffixof(code) = finchar; | |
557 | free_ent = code + 1; | |
558 | } | |
559 | ||
560 | /* Remember previous code. */ | |
561 | oldcode = incode; | |
562 | } | |
563 | state = S_EOF; | |
564 | eof: return (num - count); | |
565 | } | |
566 | ||
567 | /*- | |
568 | * Read one code from the standard input. If EOF, return -1. | |
569 | * Inputs: | |
570 | * stdin | |
571 | * Outputs: | |
572 | * code or -1 is returned. | |
573 | */ | |
574 | static code_int | |
c59d3020 | 575 | getcode(struct s_zstate *zs) |
44a7a5ab A |
576 | { |
577 | code_int gcode; | |
578 | int r_off, bits; | |
579 | char_type *bp; | |
580 | ||
581 | bp = gbuf; | |
582 | if (clear_flg > 0 || roffset >= size || free_ent > maxcode) { | |
583 | /* | |
584 | * If the next entry will be too big for the current gcode | |
585 | * size, then we must increase the size. This implies reading | |
586 | * a new buffer full, too. | |
587 | */ | |
588 | if (free_ent > maxcode) { | |
589 | n_bits++; | |
590 | if (n_bits == maxbits) /* Won't get any bigger now. */ | |
591 | maxcode = maxmaxcode; | |
592 | else | |
593 | maxcode = MAXCODE(n_bits); | |
594 | } | |
595 | if (clear_flg > 0) { | |
596 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
597 | clear_flg = 0; | |
598 | } | |
599 | size = fread(gbuf, 1, n_bits, fp); | |
600 | if (size <= 0) /* End of file. */ | |
601 | return (-1); | |
602 | roffset = 0; | |
603 | /* Round size down to integral number of codes. */ | |
604 | size = (size << 3) - (n_bits - 1); | |
605 | } | |
606 | r_off = roffset; | |
607 | bits = n_bits; | |
608 | ||
609 | /* Get to the first byte. */ | |
610 | bp += (r_off >> 3); | |
611 | r_off &= 7; | |
612 | ||
613 | /* Get first part (low order bits). */ | |
614 | gcode = (*bp++ >> r_off); | |
615 | bits -= (8 - r_off); | |
616 | r_off = 8 - r_off; /* Now, roffset into gcode word. */ | |
617 | ||
618 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
619 | if (bits >= 8) { | |
620 | gcode |= *bp++ << r_off; | |
621 | r_off += 8; | |
622 | bits -= 8; | |
623 | } | |
624 | ||
625 | /* High order bits. */ | |
626 | gcode |= (*bp & rmask[bits]) << r_off; | |
627 | roffset += n_bits; | |
628 | ||
629 | return (gcode); | |
630 | } | |
631 | ||
632 | static int | |
c59d3020 | 633 | cl_block(struct s_zstate *zs) /* Table clear for block compress. */ |
44a7a5ab A |
634 | { |
635 | long rat; | |
636 | ||
637 | checkpoint = in_count + CHECK_GAP; | |
638 | ||
639 | if (in_count > 0x007fffff) { /* Shift will overflow. */ | |
640 | rat = bytes_out >> 8; | |
641 | if (rat == 0) /* Don't divide by zero. */ | |
642 | rat = 0x7fffffff; | |
643 | else | |
644 | rat = in_count / rat; | |
645 | } else | |
646 | rat = (in_count << 8) / bytes_out; /* 8 fractional bits. */ | |
647 | if (rat > ratio) | |
648 | ratio = rat; | |
649 | else { | |
650 | ratio = 0; | |
651 | cl_hash(zs, (count_int) hsize); | |
652 | free_ent = FIRST; | |
653 | clear_flg = 1; | |
654 | if (output(zs, (code_int) CLEAR) == -1) | |
655 | return (-1); | |
656 | } | |
657 | return (0); | |
658 | } | |
659 | ||
660 | static void | |
c59d3020 | 661 | cl_hash(struct s_zstate *zs, count_int cl_hsize) /* Reset code table. */ |
44a7a5ab A |
662 | { |
663 | count_int *htab_p; | |
664 | long i, m1; | |
665 | ||
666 | m1 = -1; | |
667 | htab_p = htab + cl_hsize; | |
668 | i = cl_hsize - 16; | |
669 | do { /* Might use Sys V memset(3) here. */ | |
670 | *(htab_p - 16) = m1; | |
671 | *(htab_p - 15) = m1; | |
672 | *(htab_p - 14) = m1; | |
673 | *(htab_p - 13) = m1; | |
674 | *(htab_p - 12) = m1; | |
675 | *(htab_p - 11) = m1; | |
676 | *(htab_p - 10) = m1; | |
677 | *(htab_p - 9) = m1; | |
678 | *(htab_p - 8) = m1; | |
679 | *(htab_p - 7) = m1; | |
680 | *(htab_p - 6) = m1; | |
681 | *(htab_p - 5) = m1; | |
682 | *(htab_p - 4) = m1; | |
683 | *(htab_p - 3) = m1; | |
684 | *(htab_p - 2) = m1; | |
685 | *(htab_p - 1) = m1; | |
686 | htab_p -= 16; | |
687 | } while ((i -= 16) >= 0); | |
688 | for (i += 16; i > 0; i--) | |
689 | *--htab_p = m1; | |
690 | } | |
691 | ||
692 | FILE * | |
c59d3020 | 693 | zopen(const char *fname, const char *mode, int bits) |
44a7a5ab A |
694 | { |
695 | struct s_zstate *zs; | |
696 | ||
697 | if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' || | |
698 | bits < 0 || bits > BITS) { | |
699 | errno = EINVAL; | |
700 | return (NULL); | |
701 | } | |
702 | ||
703 | if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) | |
704 | return (NULL); | |
705 | ||
706 | maxbits = bits ? bits : BITS; /* User settable max # bits/code. */ | |
c59d3020 | 707 | maxmaxcode = 1L << maxbits; /* Should NEVER generate this code. */ |
44a7a5ab A |
708 | hsize = HSIZE; /* For dynamic table sizing. */ |
709 | free_ent = 0; /* First unused entry. */ | |
710 | block_compress = BLOCK_MASK; | |
711 | clear_flg = 0; | |
712 | ratio = 0; | |
713 | checkpoint = CHECK_GAP; | |
714 | in_count = 1; /* Length of input. */ | |
715 | out_count = 0; /* # of codes output (for debugging). */ | |
716 | state = S_START; | |
717 | roffset = 0; | |
718 | size = 0; | |
719 | ||
720 | /* | |
721 | * Layering compress on top of stdio in order to provide buffering, | |
722 | * and ensure that reads and write work with the data specified. | |
723 | */ | |
724 | if ((fp = fopen(fname, mode)) == NULL) { | |
725 | free(zs); | |
726 | return (NULL); | |
727 | } | |
728 | switch (*mode) { | |
729 | case 'r': | |
730 | zmode = 'r'; | |
731 | return (funopen(zs, zread, NULL, NULL, zclose)); | |
732 | case 'w': | |
733 | zmode = 'w'; | |
734 | return (funopen(zs, NULL, zwrite, NULL, zclose)); | |
735 | } | |
736 | /* NOTREACHED */ | |
737 | return (NULL); | |
738 | } |