]>
Commit | Line | Data |
---|---|---|
44a7a5ab A |
1 | /* $NetBSD: zopen.c,v 1.6 1997/09/15 10:58:39 lukem Exp $ */ |
2 | ||
3 | /*- | |
4 | * Copyright (c) 1985, 1986, 1992, 1993 | |
5 | * The Regents of the University of California. All rights reserved. | |
6 | * | |
7 | * This code is derived from software contributed to Berkeley by | |
8 | * Diomidis Spinellis and James A. Woods, derived from original | |
9 | * work by Spencer Thomas and Joseph Orost. | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * 1. Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | |
16 | * 2. Redistributions in binary form must reproduce the above copyright | |
17 | * notice, this list of conditions and the following disclaimer in the | |
18 | * documentation and/or other materials provided with the distribution. | |
19 | * 3. All advertising materials mentioning features or use of this software | |
20 | * must display the following acknowledgement: | |
21 | * This product includes software developed by the University of | |
22 | * California, Berkeley and its contributors. | |
23 | * 4. Neither the name of the University nor the names of its contributors | |
24 | * may be used to endorse or promote products derived from this software | |
25 | * without specific prior written permission. | |
26 | * | |
27 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
28 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
32 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
33 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
34 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
36 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
37 | * SUCH DAMAGE. | |
38 | */ | |
39 | ||
40 | #if defined(LIBC_SCCS) && !defined(lint) | |
41 | #if 0 | |
42 | static char sccsid[] = "@(#)zopen.c 8.1 (Berkeley) 6/27/93"; | |
43 | #else | |
44 | static char rcsid[] = "$NetBSD: zopen.c,v 1.6 1997/09/15 10:58:39 lukem Exp $"; | |
45 | #endif | |
46 | #endif /* LIBC_SCCS and not lint */ | |
47 | ||
48 | /*- | |
49 | * fcompress.c - File compression ala IEEE Computer, June 1984. | |
50 | * | |
51 | * Compress authors: | |
52 | * Spencer W. Thomas (decvax!utah-cs!thomas) | |
53 | * Jim McKie (decvax!mcvax!jim) | |
54 | * Steve Davies (decvax!vax135!petsd!peora!srd) | |
55 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) | |
56 | * James A. Woods (decvax!ihnp4!ames!jaw) | |
57 | * Joe Orost (decvax!vax135!petsd!joe) | |
58 | * | |
59 | * Cleaned up and converted to library returning I/O streams by | |
60 | * Diomidis Spinellis <dds@doc.ic.ac.uk>. | |
61 | * | |
62 | * zopen(filename, mode, bits) | |
63 | * Returns a FILE * that can be used for read or write. The modes | |
64 | * supported are only "r" and "w". Seeking is not allowed. On | |
65 | * reading the file is decompressed, on writing it is compressed. | |
66 | * The output is compatible with compress(1) with 16 bit tables. | |
67 | * Any file produced by compress(1) can be read. | |
68 | */ | |
69 | ||
70 | #include <sys/param.h> | |
71 | #include <sys/stat.h> | |
72 | ||
73 | #include <ctype.h> | |
74 | #include <errno.h> | |
75 | #include <signal.h> | |
76 | #include <stdio.h> | |
77 | #include <stdlib.h> | |
78 | #include <string.h> | |
79 | #include <unistd.h> | |
80 | ||
81 | #define BITS 16 /* Default bits. */ | |
82 | #define HSIZE 69001 /* 95% occupancy */ | |
83 | ||
84 | /* A code_int must be able to hold 2**BITS values of type int, and also -1. */ | |
85 | typedef long code_int; | |
86 | typedef long count_int; | |
87 | ||
88 | typedef u_char char_type; | |
89 | static char_type magic_header[] = | |
90 | {'\037', '\235'}; /* 1F 9D */ | |
91 | ||
92 | #define BIT_MASK 0x1f /* Defines for third byte of header. */ | |
93 | #define BLOCK_MASK 0x80 | |
94 | ||
95 | /* | |
96 | * Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is | |
97 | * a fourth header byte (for expansion). | |
98 | */ | |
99 | #define INIT_BITS 9 /* Initial number of bits/code. */ | |
100 | ||
101 | #define MAXCODE(n_bits) ((1 << (n_bits)) - 1) | |
102 | ||
103 | struct s_zstate { | |
104 | FILE *zs_fp; /* File stream for I/O */ | |
105 | char zs_mode; /* r or w */ | |
106 | enum { | |
107 | S_START, S_MIDDLE, S_EOF | |
108 | } zs_state; /* State of computation */ | |
109 | int zs_n_bits; /* Number of bits/code. */ | |
110 | int zs_maxbits; /* User settable max # bits/code. */ | |
111 | code_int zs_maxcode; /* Maximum code, given n_bits. */ | |
112 | code_int zs_maxmaxcode; /* Should NEVER generate this code. */ | |
113 | count_int zs_htab [HSIZE]; | |
114 | u_short zs_codetab [HSIZE]; | |
115 | code_int zs_hsize; /* For dynamic table sizing. */ | |
116 | code_int zs_free_ent; /* First unused entry. */ | |
117 | /* | |
118 | * Block compression parameters -- after all codes are used up, | |
119 | * and compression rate changes, start over. | |
120 | */ | |
121 | int zs_block_compress; | |
122 | int zs_clear_flg; | |
123 | long zs_ratio; | |
124 | count_int zs_checkpoint; | |
125 | int zs_offset; | |
126 | long zs_in_count; /* Length of input. */ | |
127 | long zs_bytes_out; /* Length of compressed output. */ | |
128 | long zs_out_count; /* # of codes output (for debugging). */ | |
129 | char_type zs_buf[BITS]; | |
130 | union { | |
131 | struct { | |
132 | long zs_fcode; | |
133 | code_int zs_ent; | |
134 | code_int zs_hsize_reg; | |
135 | int zs_hshift; | |
136 | } w; /* Write paramenters */ | |
137 | struct { | |
138 | char_type *zs_stackp; | |
139 | int zs_finchar; | |
140 | code_int zs_code, zs_oldcode, zs_incode; | |
141 | int zs_roffset, zs_size; | |
142 | char_type zs_gbuf[BITS]; | |
143 | } r; /* Read parameters */ | |
144 | } u; | |
145 | }; | |
146 | ||
147 | /* Definitions to retain old variable names */ | |
148 | #define fp zs->zs_fp | |
149 | #define zmode zs->zs_mode | |
150 | #define state zs->zs_state | |
151 | #define n_bits zs->zs_n_bits | |
152 | #define maxbits zs->zs_maxbits | |
153 | #define maxcode zs->zs_maxcode | |
154 | #define maxmaxcode zs->zs_maxmaxcode | |
155 | #define htab zs->zs_htab | |
156 | #define codetab zs->zs_codetab | |
157 | #define hsize zs->zs_hsize | |
158 | #define free_ent zs->zs_free_ent | |
159 | #define block_compress zs->zs_block_compress | |
160 | #define clear_flg zs->zs_clear_flg | |
161 | #define ratio zs->zs_ratio | |
162 | #define checkpoint zs->zs_checkpoint | |
163 | #define offset zs->zs_offset | |
164 | #define in_count zs->zs_in_count | |
165 | #define bytes_out zs->zs_bytes_out | |
166 | #define out_count zs->zs_out_count | |
167 | #define buf zs->zs_buf | |
168 | #define fcode zs->u.w.zs_fcode | |
169 | #define hsize_reg zs->u.w.zs_hsize_reg | |
170 | #define ent zs->u.w.zs_ent | |
171 | #define hshift zs->u.w.zs_hshift | |
172 | #define stackp zs->u.r.zs_stackp | |
173 | #define finchar zs->u.r.zs_finchar | |
174 | #define code zs->u.r.zs_code | |
175 | #define oldcode zs->u.r.zs_oldcode | |
176 | #define incode zs->u.r.zs_incode | |
177 | #define roffset zs->u.r.zs_roffset | |
178 | #define size zs->u.r.zs_size | |
179 | #define gbuf zs->u.r.zs_gbuf | |
180 | ||
181 | /* | |
182 | * To save much memory, we overlay the table used by compress() with those | |
183 | * used by decompress(). The tab_prefix table is the same size and type as | |
184 | * the codetab. The tab_suffix table needs 2**BITS characters. We get this | |
185 | * from the beginning of htab. The output stack uses the rest of htab, and | |
186 | * contains characters. There is plenty of room for any possible stack | |
187 | * (stack used to be 8000 characters). | |
188 | */ | |
189 | ||
190 | #define htabof(i) htab[i] | |
191 | #define codetabof(i) codetab[i] | |
192 | ||
193 | #define tab_prefixof(i) codetabof(i) | |
194 | #define tab_suffixof(i) ((char_type *)(htab))[i] | |
195 | #define de_stack ((char_type *)&tab_suffixof(1 << BITS)) | |
196 | ||
197 | #define CHECK_GAP 10000 /* Ratio check interval. */ | |
198 | ||
199 | /* | |
200 | * the next two codes should not be changed lightly, as they must not | |
201 | * lie within the contiguous general code space. | |
202 | */ | |
203 | #define FIRST 257 /* First free entry. */ | |
204 | #define CLEAR 256 /* Table clear output code. */ | |
205 | ||
206 | static int cl_block __P((struct s_zstate *)); | |
207 | static void cl_hash __P((struct s_zstate *, count_int)); | |
208 | static code_int getcode __P((struct s_zstate *)); | |
209 | static int output __P((struct s_zstate *, code_int)); | |
210 | static int zclose __P((void *)); | |
211 | FILE *zopen __P((const char *, const char *, int)); | |
212 | static int zread __P((void *, char *, int)); | |
213 | static int zwrite __P((void *, const char *, int)); | |
214 | ||
215 | /*- | |
216 | * Algorithm from "A Technique for High Performance Data Compression", | |
217 | * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. | |
218 | * | |
219 | * Algorithm: | |
220 | * Modified Lempel-Ziv method (LZW). Basically finds common | |
221 | * substrings and replaces them with a variable size code. This is | |
222 | * deterministic, and can be done on the fly. Thus, the decompression | |
223 | * procedure needs no input table, but tracks the way the table was built. | |
224 | */ | |
225 | ||
226 | /*- | |
227 | * compress write | |
228 | * | |
229 | * Algorithm: use open addressing double hashing (no chaining) on the | |
230 | * prefix code / next character combination. We do a variant of Knuth's | |
231 | * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime | |
232 | * secondary probe. Here, the modular division first probe is gives way | |
233 | * to a faster exclusive-or manipulation. Also do block compression with | |
234 | * an adaptive reset, whereby the code table is cleared when the compression | |
235 | * ratio decreases, but after the table fills. The variable-length output | |
236 | * codes are re-sized at this point, and a special CLEAR code is generated | |
237 | * for the decompressor. Late addition: construct the table according to | |
238 | * file size for noticeable speed improvement on small files. Please direct | |
239 | * questions about this implementation to ames!jaw. | |
240 | */ | |
241 | static int | |
242 | zwrite(cookie, wbp, num) | |
243 | void *cookie; | |
244 | const char *wbp; | |
245 | int num; | |
246 | { | |
247 | code_int i; | |
248 | int c, disp; | |
249 | struct s_zstate *zs; | |
250 | const u_char *bp; | |
251 | u_char tmp; | |
252 | int count; | |
253 | ||
254 | if (num == 0) | |
255 | return (0); | |
256 | ||
257 | zs = cookie; | |
258 | count = num; | |
259 | bp = (u_char *)wbp; | |
260 | if (state == S_MIDDLE) | |
261 | goto middle; | |
262 | state = S_MIDDLE; | |
263 | ||
264 | maxmaxcode = 1L << maxbits; | |
265 | if (fwrite(magic_header, | |
266 | sizeof(char), sizeof(magic_header), fp) != sizeof(magic_header)) | |
267 | return (-1); | |
268 | tmp = (u_char)(maxbits | block_compress); | |
269 | if (fwrite(&tmp, sizeof(char), sizeof(tmp), fp) != sizeof(tmp)) | |
270 | return (-1); | |
271 | ||
272 | offset = 0; | |
273 | bytes_out = 3; /* Includes 3-byte header mojo. */ | |
274 | out_count = 0; | |
275 | clear_flg = 0; | |
276 | ratio = 0; | |
277 | in_count = 1; | |
278 | checkpoint = CHECK_GAP; | |
279 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
280 | free_ent = ((block_compress) ? FIRST : 256); | |
281 | ||
282 | ent = *bp++; | |
283 | --count; | |
284 | ||
285 | hshift = 0; | |
286 | for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) | |
287 | hshift++; | |
288 | hshift = 8 - hshift; /* Set hash code range bound. */ | |
289 | ||
290 | hsize_reg = hsize; | |
291 | cl_hash(zs, (count_int)hsize_reg); /* Clear hash table. */ | |
292 | ||
293 | middle: for (i = 0; count--;) { | |
294 | c = *bp++; | |
295 | in_count++; | |
296 | fcode = (long)(((long)c << maxbits) + ent); | |
297 | i = ((c << hshift) ^ ent); /* Xor hashing. */ | |
298 | ||
299 | if (htabof(i) == fcode) { | |
300 | ent = codetabof(i); | |
301 | continue; | |
302 | } else if ((long)htabof(i) < 0) /* Empty slot. */ | |
303 | goto nomatch; | |
304 | disp = hsize_reg - i; /* Secondary hash (after G. Knott). */ | |
305 | if (i == 0) | |
306 | disp = 1; | |
307 | probe: if ((i -= disp) < 0) | |
308 | i += hsize_reg; | |
309 | ||
310 | if (htabof(i) == fcode) { | |
311 | ent = codetabof(i); | |
312 | continue; | |
313 | } | |
314 | if ((long)htabof(i) >= 0) | |
315 | goto probe; | |
316 | nomatch: if (output(zs, (code_int) ent) == -1) | |
317 | return (-1); | |
318 | out_count++; | |
319 | ent = c; | |
320 | if (free_ent < maxmaxcode) { | |
321 | codetabof(i) = free_ent++; /* code -> hashtable */ | |
322 | htabof(i) = fcode; | |
323 | } else if ((count_int)in_count >= | |
324 | checkpoint && block_compress) { | |
325 | if (cl_block(zs) == -1) | |
326 | return (-1); | |
327 | } | |
328 | } | |
329 | return (num); | |
330 | } | |
331 | ||
332 | static int | |
333 | zclose(cookie) | |
334 | void *cookie; | |
335 | { | |
336 | struct s_zstate *zs; | |
337 | int rval; | |
338 | ||
339 | zs = cookie; | |
340 | if (zmode == 'w') { /* Put out the final code. */ | |
341 | if (output(zs, (code_int) ent) == -1) { | |
342 | (void)fclose(fp); | |
343 | free(zs); | |
344 | return (-1); | |
345 | } | |
346 | out_count++; | |
347 | if (output(zs, (code_int) - 1) == -1) { | |
348 | (void)fclose(fp); | |
349 | free(zs); | |
350 | return (-1); | |
351 | } | |
352 | } | |
353 | rval = fclose(fp) == EOF ? -1 : 0; | |
354 | free(zs); | |
355 | return (rval); | |
356 | } | |
357 | ||
358 | /*- | |
359 | * Output the given code. | |
360 | * Inputs: | |
361 | * code: A n_bits-bit integer. If == -1, then EOF. This assumes | |
362 | * that n_bits =< (long)wordsize - 1. | |
363 | * Outputs: | |
364 | * Outputs code to the file. | |
365 | * Assumptions: | |
366 | * Chars are 8 bits long. | |
367 | * Algorithm: | |
368 | * Maintain a BITS character long buffer (so that 8 codes will | |
369 | * fit in it exactly). Use the VAX insv instruction to insert each | |
370 | * code in turn. When the buffer fills up empty it and start over. | |
371 | */ | |
372 | ||
373 | static char_type lmask[9] = | |
374 | {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; | |
375 | static char_type rmask[9] = | |
376 | {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; | |
377 | ||
378 | static int | |
379 | output(zs, ocode) | |
380 | struct s_zstate *zs; | |
381 | code_int ocode; | |
382 | { | |
383 | int bits, r_off; | |
384 | char_type *bp; | |
385 | ||
386 | r_off = offset; | |
387 | bits = n_bits; | |
388 | bp = buf; | |
389 | if (ocode >= 0) { | |
390 | /* Get to the first byte. */ | |
391 | bp += (r_off >> 3); | |
392 | r_off &= 7; | |
393 | /* | |
394 | * Since ocode is always >= 8 bits, only need to mask the first | |
395 | * hunk on the left. | |
396 | */ | |
397 | *bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]); | |
398 | bp++; | |
399 | bits -= (8 - r_off); | |
400 | ocode >>= 8 - r_off; | |
401 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
402 | if (bits >= 8) { | |
403 | *bp++ = ocode; | |
404 | ocode >>= 8; | |
405 | bits -= 8; | |
406 | } | |
407 | /* Last bits. */ | |
408 | if (bits) | |
409 | *bp = ocode; | |
410 | offset += n_bits; | |
411 | if (offset == (n_bits << 3)) { | |
412 | bp = buf; | |
413 | bits = n_bits; | |
414 | bytes_out += bits; | |
415 | if (fwrite(bp, sizeof(char), bits, fp) != bits) | |
416 | return (-1); | |
417 | bp += bits; | |
418 | bits = 0; | |
419 | offset = 0; | |
420 | } | |
421 | /* | |
422 | * If the next entry is going to be too big for the ocode size, | |
423 | * then increase it, if possible. | |
424 | */ | |
425 | if (free_ent > maxcode || (clear_flg > 0)) { | |
426 | /* | |
427 | * Write the whole buffer, because the input side won't | |
428 | * discover the size increase until after it has read it. | |
429 | */ | |
430 | if (offset > 0) { | |
431 | if (fwrite(buf, 1, n_bits, fp) != n_bits) | |
432 | return (-1); | |
433 | bytes_out += n_bits; | |
434 | } | |
435 | offset = 0; | |
436 | ||
437 | if (clear_flg) { | |
438 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
439 | clear_flg = 0; | |
440 | } else { | |
441 | n_bits++; | |
442 | if (n_bits == maxbits) | |
443 | maxcode = maxmaxcode; | |
444 | else | |
445 | maxcode = MAXCODE(n_bits); | |
446 | } | |
447 | } | |
448 | } else { | |
449 | /* At EOF, write the rest of the buffer. */ | |
450 | if (offset > 0) { | |
451 | offset = (offset + 7) / 8; | |
452 | if (fwrite(buf, 1, offset, fp) != offset) | |
453 | return (-1); | |
454 | bytes_out += offset; | |
455 | } | |
456 | offset = 0; | |
457 | } | |
458 | return (0); | |
459 | } | |
460 | ||
461 | /* | |
462 | * Decompress read. This routine adapts to the codes in the file building | |
463 | * the "string" table on-the-fly; requiring no table to be stored in the | |
464 | * compressed file. The tables used herein are shared with those of the | |
465 | * compress() routine. See the definitions above. | |
466 | */ | |
467 | static int | |
468 | zread(cookie, rbp, num) | |
469 | void *cookie; | |
470 | char *rbp; | |
471 | int num; | |
472 | { | |
473 | u_int count; | |
474 | struct s_zstate *zs; | |
475 | u_char *bp, header[3]; | |
476 | ||
477 | if (num == 0) | |
478 | return (0); | |
479 | ||
480 | zs = cookie; | |
481 | count = num; | |
482 | bp = (u_char *)rbp; | |
483 | switch (state) { | |
484 | case S_START: | |
485 | state = S_MIDDLE; | |
486 | break; | |
487 | case S_MIDDLE: | |
488 | goto middle; | |
489 | case S_EOF: | |
490 | goto eof; | |
491 | } | |
492 | ||
493 | /* Check the magic number */ | |
494 | if (fread(header, | |
495 | sizeof(char), sizeof(header), fp) != sizeof(header) || | |
496 | memcmp(header, magic_header, sizeof(magic_header)) != 0) { | |
497 | errno = EFTYPE; | |
498 | return (-1); | |
499 | } | |
500 | maxbits = header[2]; /* Set -b from file. */ | |
501 | block_compress = maxbits & BLOCK_MASK; | |
502 | maxbits &= BIT_MASK; | |
503 | maxmaxcode = 1L << maxbits; | |
504 | if (maxbits > BITS) { | |
505 | errno = EFTYPE; | |
506 | return (-1); | |
507 | } | |
508 | /* As above, initialize the first 256 entries in the table. */ | |
509 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
510 | for (code = 255; code >= 0; code--) { | |
511 | tab_prefixof(code) = 0; | |
512 | tab_suffixof(code) = (char_type) code; | |
513 | } | |
514 | free_ent = block_compress ? FIRST : 256; | |
515 | ||
516 | finchar = oldcode = getcode(zs); | |
517 | if (oldcode == -1) /* EOF already? */ | |
518 | return (0); /* Get out of here */ | |
519 | ||
520 | /* First code must be 8 bits = char. */ | |
521 | *bp++ = (u_char)finchar; | |
522 | count--; | |
523 | stackp = de_stack; | |
524 | ||
525 | while ((code = getcode(zs)) > -1) { | |
526 | ||
527 | if ((code == CLEAR) && block_compress) { | |
528 | for (code = 255; code >= 0; code--) | |
529 | tab_prefixof(code) = 0; | |
530 | clear_flg = 1; | |
531 | free_ent = FIRST - 1; | |
532 | if ((code = getcode(zs)) == -1) /* O, untimely death! */ | |
533 | break; | |
534 | } | |
535 | incode = code; | |
536 | ||
537 | /* Special case for KwKwK string. */ | |
538 | if (code >= free_ent) { | |
539 | *stackp++ = finchar; | |
540 | code = oldcode; | |
541 | } | |
542 | ||
543 | /* Generate output characters in reverse order. */ | |
544 | while (code >= 256) { | |
545 | *stackp++ = tab_suffixof(code); | |
546 | code = tab_prefixof(code); | |
547 | } | |
548 | *stackp++ = finchar = tab_suffixof(code); | |
549 | ||
550 | /* And put them out in forward order. */ | |
551 | middle: do { | |
552 | if (count-- == 0) | |
553 | return (num); | |
554 | *bp++ = *--stackp; | |
555 | } while (stackp > de_stack); | |
556 | ||
557 | /* Generate the new entry. */ | |
558 | if ((code = free_ent) < maxmaxcode) { | |
559 | tab_prefixof(code) = (u_short) oldcode; | |
560 | tab_suffixof(code) = finchar; | |
561 | free_ent = code + 1; | |
562 | } | |
563 | ||
564 | /* Remember previous code. */ | |
565 | oldcode = incode; | |
566 | } | |
567 | state = S_EOF; | |
568 | eof: return (num - count); | |
569 | } | |
570 | ||
571 | /*- | |
572 | * Read one code from the standard input. If EOF, return -1. | |
573 | * Inputs: | |
574 | * stdin | |
575 | * Outputs: | |
576 | * code or -1 is returned. | |
577 | */ | |
578 | static code_int | |
579 | getcode(zs) | |
580 | struct s_zstate *zs; | |
581 | { | |
582 | code_int gcode; | |
583 | int r_off, bits; | |
584 | char_type *bp; | |
585 | ||
586 | bp = gbuf; | |
587 | if (clear_flg > 0 || roffset >= size || free_ent > maxcode) { | |
588 | /* | |
589 | * If the next entry will be too big for the current gcode | |
590 | * size, then we must increase the size. This implies reading | |
591 | * a new buffer full, too. | |
592 | */ | |
593 | if (free_ent > maxcode) { | |
594 | n_bits++; | |
595 | if (n_bits == maxbits) /* Won't get any bigger now. */ | |
596 | maxcode = maxmaxcode; | |
597 | else | |
598 | maxcode = MAXCODE(n_bits); | |
599 | } | |
600 | if (clear_flg > 0) { | |
601 | maxcode = MAXCODE(n_bits = INIT_BITS); | |
602 | clear_flg = 0; | |
603 | } | |
604 | size = fread(gbuf, 1, n_bits, fp); | |
605 | if (size <= 0) /* End of file. */ | |
606 | return (-1); | |
607 | roffset = 0; | |
608 | /* Round size down to integral number of codes. */ | |
609 | size = (size << 3) - (n_bits - 1); | |
610 | } | |
611 | r_off = roffset; | |
612 | bits = n_bits; | |
613 | ||
614 | /* Get to the first byte. */ | |
615 | bp += (r_off >> 3); | |
616 | r_off &= 7; | |
617 | ||
618 | /* Get first part (low order bits). */ | |
619 | gcode = (*bp++ >> r_off); | |
620 | bits -= (8 - r_off); | |
621 | r_off = 8 - r_off; /* Now, roffset into gcode word. */ | |
622 | ||
623 | /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ | |
624 | if (bits >= 8) { | |
625 | gcode |= *bp++ << r_off; | |
626 | r_off += 8; | |
627 | bits -= 8; | |
628 | } | |
629 | ||
630 | /* High order bits. */ | |
631 | gcode |= (*bp & rmask[bits]) << r_off; | |
632 | roffset += n_bits; | |
633 | ||
634 | return (gcode); | |
635 | } | |
636 | ||
637 | static int | |
638 | cl_block(zs) /* Table clear for block compress. */ | |
639 | struct s_zstate *zs; | |
640 | { | |
641 | long rat; | |
642 | ||
643 | checkpoint = in_count + CHECK_GAP; | |
644 | ||
645 | if (in_count > 0x007fffff) { /* Shift will overflow. */ | |
646 | rat = bytes_out >> 8; | |
647 | if (rat == 0) /* Don't divide by zero. */ | |
648 | rat = 0x7fffffff; | |
649 | else | |
650 | rat = in_count / rat; | |
651 | } else | |
652 | rat = (in_count << 8) / bytes_out; /* 8 fractional bits. */ | |
653 | if (rat > ratio) | |
654 | ratio = rat; | |
655 | else { | |
656 | ratio = 0; | |
657 | cl_hash(zs, (count_int) hsize); | |
658 | free_ent = FIRST; | |
659 | clear_flg = 1; | |
660 | if (output(zs, (code_int) CLEAR) == -1) | |
661 | return (-1); | |
662 | } | |
663 | return (0); | |
664 | } | |
665 | ||
666 | static void | |
667 | cl_hash(zs, cl_hsize) /* Reset code table. */ | |
668 | struct s_zstate *zs; | |
669 | count_int cl_hsize; | |
670 | { | |
671 | count_int *htab_p; | |
672 | long i, m1; | |
673 | ||
674 | m1 = -1; | |
675 | htab_p = htab + cl_hsize; | |
676 | i = cl_hsize - 16; | |
677 | do { /* Might use Sys V memset(3) here. */ | |
678 | *(htab_p - 16) = m1; | |
679 | *(htab_p - 15) = m1; | |
680 | *(htab_p - 14) = m1; | |
681 | *(htab_p - 13) = m1; | |
682 | *(htab_p - 12) = m1; | |
683 | *(htab_p - 11) = m1; | |
684 | *(htab_p - 10) = m1; | |
685 | *(htab_p - 9) = m1; | |
686 | *(htab_p - 8) = m1; | |
687 | *(htab_p - 7) = m1; | |
688 | *(htab_p - 6) = m1; | |
689 | *(htab_p - 5) = m1; | |
690 | *(htab_p - 4) = m1; | |
691 | *(htab_p - 3) = m1; | |
692 | *(htab_p - 2) = m1; | |
693 | *(htab_p - 1) = m1; | |
694 | htab_p -= 16; | |
695 | } while ((i -= 16) >= 0); | |
696 | for (i += 16; i > 0; i--) | |
697 | *--htab_p = m1; | |
698 | } | |
699 | ||
700 | FILE * | |
701 | zopen(fname, mode, bits) | |
702 | const char *fname, *mode; | |
703 | int bits; | |
704 | { | |
705 | struct s_zstate *zs; | |
706 | ||
707 | if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' || | |
708 | bits < 0 || bits > BITS) { | |
709 | errno = EINVAL; | |
710 | return (NULL); | |
711 | } | |
712 | ||
713 | if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL) | |
714 | return (NULL); | |
715 | ||
716 | maxbits = bits ? bits : BITS; /* User settable max # bits/code. */ | |
717 | maxmaxcode = 1 << maxbits; /* Should NEVER generate this code. */ | |
718 | hsize = HSIZE; /* For dynamic table sizing. */ | |
719 | free_ent = 0; /* First unused entry. */ | |
720 | block_compress = BLOCK_MASK; | |
721 | clear_flg = 0; | |
722 | ratio = 0; | |
723 | checkpoint = CHECK_GAP; | |
724 | in_count = 1; /* Length of input. */ | |
725 | out_count = 0; /* # of codes output (for debugging). */ | |
726 | state = S_START; | |
727 | roffset = 0; | |
728 | size = 0; | |
729 | ||
730 | /* | |
731 | * Layering compress on top of stdio in order to provide buffering, | |
732 | * and ensure that reads and write work with the data specified. | |
733 | */ | |
734 | if ((fp = fopen(fname, mode)) == NULL) { | |
735 | free(zs); | |
736 | return (NULL); | |
737 | } | |
738 | switch (*mode) { | |
739 | case 'r': | |
740 | zmode = 'r'; | |
741 | return (funopen(zs, zread, NULL, NULL, zclose)); | |
742 | case 'w': | |
743 | zmode = 'w'; | |
744 | return (funopen(zs, NULL, zwrite, NULL, zclose)); | |
745 | } | |
746 | /* NOTREACHED */ | |
747 | return (NULL); | |
748 | } |