]>
Commit | Line | Data |
---|---|---|
f6bcfd97 BP |
1 | /*--------------------------------------------------------------------------- |
2 | ||
3 | unshrink.c version 1.21 23 Nov 95 | |
4 | ||
5 | ||
6 | NOTE: This code may or may not infringe on the so-called "Welch | |
7 | patent" owned by Unisys. (From reading the patent, it appears | |
8 | that a pure LZW decompressor is *not* covered, but this claim has | |
9 | not been tested in court, and Unisys is reported to believe other- | |
10 | wise.) It is therefore the responsibility of the user to acquire | |
11 | whatever license(s) may be required for legal use of this code. | |
12 | ||
13 | THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE | |
14 | IN VIOLATION OF APPLICABLE PATENT LAW. | |
15 | ||
16 | ||
17 | Shrinking is basically a dynamic LZW algorithm with allowed code sizes of | |
18 | up to 13 bits; in addition, there is provision for partial clearing of | |
19 | leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a | |
20 | change in code size or a partial clear of the code tree: 256,1 for the | |
21 | former and 256,2 for the latter. [Note that partial clearing can "orphan" | |
22 | nodes: the parent-to-be can be cleared before its new child is added, | |
23 | but the child is added anyway (as an orphan, as though the parent still | |
24 | existed). When the tree fills up to the point where the parent node is | |
25 | reused, the orphan is effectively "adopted." Versions prior to 1.05 were | |
26 | affected more due to greater use of pointers (to children and siblings | |
27 | as well as parents).] | |
28 | ||
29 | This replacement version of unshrink.c was written from scratch. It is | |
30 | based only on the algorithms described in Mark Nelson's _The Data Compres- | |
31 | sion Book_ and in Terry Welch's original paper in the June 1984 issue of | |
32 | IEEE _Computer_; no existing source code, including any in Nelson's book, | |
33 | was used. | |
34 | ||
35 | Memory requirements have been reduced in this version and are now no more | |
36 | than the original Sam Smith code. This is still larger than any of the | |
37 | other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming | |
38 | 16-bit short ints, and this does not even include the output buffer (the | |
39 | other algorithms leave the uncompressed data in the work area, typically | |
40 | called slide[]). For machines with a 64KB data space this is a problem, | |
41 | particularly when text conversion is required and line endings have more | |
42 | than one character. UnZip's solution is to use two roughly equal halves | |
43 | of outbuf for the ASCII conversion in such a case; the "unshrink" argument | |
44 | to flush() signals that this is the case. | |
45 | ||
46 | For large-memory machines, a second outbuf is allocated for translations, | |
47 | but only if unshrinking and only if translations are required. | |
48 | ||
49 | | binary mode | text mode | |
50 | --------------------------------------------------- | |
51 | big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here | |
52 | small mem | small outbuf | half + half small outbuf | |
53 | ||
54 | Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING" | |
55 | in UnZip 5.20 (or later) source or binary distributions. | |
56 | ||
57 | ---------------------------------------------------------------------------*/ | |
58 | ||
59 | ||
60 | #define UNZIP_INTERNAL | |
61 | #include "unzip.h" /* defines LZW_CLEAN by default */ | |
62 | ||
63 | ||
64 | #ifndef LZW_CLEAN | |
65 | ||
66 | static void partial_clear OF((__GPRO)); | |
67 | ||
68 | #ifdef DEBUG | |
69 | # define OUTDBG(c) \ | |
70 | if ((c)<32 || (c)>=127) pipeit("\\x%02x",(c)); else { } | |
71 | #else | |
72 | # define OUTDBG(c) | |
73 | #endif | |
74 | ||
75 | /* HSIZE is defined as 2^13 (8192) in unzip.h */ | |
76 | #define BOGUSCODE 256 | |
77 | #define FLAG_BITS parent /* upper bits of parent[] used as flag bits */ | |
78 | #define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */ | |
79 | #define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */ | |
80 | #define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */ | |
81 | ||
82 | #define parent G.area.shrink.Parent | |
83 | #define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */ | |
84 | #define stack G.area.shrink.Stack | |
85 | ||
86 | ||
87 | /***********************/ | |
88 | /* Function unshrink() */ | |
89 | /***********************/ | |
90 | ||
91 | int unshrink(__G) | |
92 | __GDEF | |
93 | { | |
94 | int offset = (HSIZE - 1); | |
95 | uch *stacktop = stack + offset; | |
96 | register uch *newstr; | |
97 | int codesize=9, len, KwKwK, error; | |
98 | shrint code, oldcode, freecode, curcode; | |
99 | shrint lastfreecode; | |
100 | unsigned int outbufsiz; | |
101 | #if (defined(DLL) && !defined(NO_SLIDE_REDIR)) | |
102 | /* Normally realbuf and outbuf will be the same. However, if the data | |
103 | * are redirected to a large memory buffer, realbuf will point to the | |
104 | * new location while outbuf will remain pointing to the malloc'd | |
105 | * memory buffer. */ | |
106 | uch *realbuf = G.outbuf; | |
107 | #else | |
108 | # define realbuf G.outbuf | |
109 | #endif | |
110 | ||
111 | ||
112 | /*--------------------------------------------------------------------------- | |
113 | Initialize various variables. | |
114 | ---------------------------------------------------------------------------*/ | |
115 | ||
116 | lastfreecode = BOGUSCODE; | |
117 | ||
118 | #ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */ | |
119 | #ifndef SMALL_MEM | |
120 | /* non-memory-limited machines: allocate second (large) buffer for | |
121 | * textmode conversion in flush(), but only if needed */ | |
122 | if (G.pInfo->textmode && !G.outbuf2 && | |
123 | (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL) | |
124 | return PK_MEM3; | |
125 | #endif | |
126 | #endif /* !VMS */ | |
127 | ||
128 | for (code = 0; code < BOGUSCODE; ++code) { | |
129 | Value[code] = (uch)code; | |
130 | parent[code] = BOGUSCODE; | |
131 | } | |
132 | for (code = BOGUSCODE+1; code < HSIZE; ++code) | |
133 | parent[code] = FREE_CODE; | |
134 | ||
135 | #if (defined(DLL) && !defined(NO_SLIDE_REDIR)) | |
136 | if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */ | |
137 | realbuf = G.redirect_buffer; | |
138 | outbufsiz = G.redirect_size; | |
139 | } else | |
140 | #endif | |
141 | #ifdef DLL | |
142 | if (G.pInfo->textmode && !G.redirect_data) | |
143 | #else | |
144 | if (G.pInfo->textmode) | |
145 | #endif | |
146 | outbufsiz = RAWBUFSIZ; | |
147 | else | |
148 | outbufsiz = OUTBUFSIZ; | |
149 | G.outptr = realbuf; | |
150 | G.outcnt = 0L; | |
151 | ||
152 | /*--------------------------------------------------------------------------- | |
153 | Get and output first code, then loop over remaining ones. | |
154 | ---------------------------------------------------------------------------*/ | |
155 | ||
156 | READBITS(codesize, oldcode) | |
157 | if (!G.zipeof) { | |
158 | *G.outptr++ = (uch)oldcode; | |
159 | OUTDBG((uch)oldcode) | |
160 | ++G.outcnt; | |
161 | } | |
162 | ||
163 | do { | |
164 | READBITS(codesize, code) | |
165 | if (G.zipeof) | |
166 | break; | |
167 | if (code == BOGUSCODE) { /* possible to have consecutive escapes? */ | |
168 | READBITS(codesize, code) | |
169 | if (code == 1) { | |
170 | ++codesize; | |
171 | Trace((stderr, " (codesize now %d bits)\n", codesize)); | |
172 | } else if (code == 2) { | |
173 | Trace((stderr, " (partial clear code)\n")); | |
174 | partial_clear(__G); /* clear leafs (nodes with no children) */ | |
175 | Trace((stderr, " (done with partial clear)\n")); | |
176 | lastfreecode = BOGUSCODE; /* reset start of free-node search */ | |
177 | } | |
178 | continue; | |
179 | } | |
180 | ||
181 | /*----------------------------------------------------------------------- | |
182 | Translate code: traverse tree from leaf back to root. | |
183 | -----------------------------------------------------------------------*/ | |
184 | ||
185 | newstr = stacktop; | |
186 | curcode = code; | |
187 | ||
188 | if (parent[curcode] == FREE_CODE) { | |
189 | /* or (FLAG_BITS[curcode] & FREE_CODE)? */ | |
190 | KwKwK = TRUE; | |
191 | Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code, | |
192 | oldcode)); | |
193 | --newstr; /* last character will be same as first character */ | |
194 | curcode = oldcode; | |
195 | } else | |
196 | KwKwK = FALSE; | |
197 | ||
198 | do { | |
199 | *newstr-- = Value[curcode]; | |
200 | curcode = (shrint)(parent[curcode] & CODE_MASK); | |
201 | } while (curcode != BOGUSCODE); | |
202 | ||
203 | len = (int)(stacktop - newstr++); | |
204 | if (KwKwK) | |
205 | *stacktop = *newstr; | |
206 | ||
207 | /*----------------------------------------------------------------------- | |
208 | Write expanded string in reverse order to output buffer. | |
209 | -----------------------------------------------------------------------*/ | |
210 | ||
211 | Trace((stderr, "code %4d; oldcode %4d; char %3d (%c); string [", code, | |
212 | oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr)); | |
213 | ||
214 | { | |
215 | register uch *p; | |
216 | ||
217 | for (p = newstr; p < newstr+len; ++p) { | |
218 | *G.outptr++ = *p; | |
219 | OUTDBG(*p) | |
220 | if (++G.outcnt == outbufsiz) { | |
221 | Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt)); | |
222 | if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) | |
223 | pipeit("unshrink: flush() error (%d)\n", | |
224 | error); | |
225 | Trace((stderr, "done with flush()\n")); | |
226 | G.outptr = realbuf; | |
227 | G.outcnt = 0L; | |
228 | } | |
229 | } | |
230 | } | |
231 | ||
232 | /*----------------------------------------------------------------------- | |
233 | Add new leaf (first character of newstr) to tree as child of oldcode. | |
234 | -----------------------------------------------------------------------*/ | |
235 | ||
236 | /* search for freecode */ | |
237 | freecode = (shrint)(lastfreecode + 1); | |
238 | /* add if-test before loop for speed? */ | |
239 | while (parent[freecode] != FREE_CODE) | |
240 | ++freecode; | |
241 | lastfreecode = freecode; | |
242 | Trace((stderr, "]; newcode %d\n", freecode)); | |
243 | ||
244 | Value[freecode] = *newstr; | |
245 | parent[freecode] = oldcode; | |
246 | oldcode = code; | |
247 | ||
248 | } while (!G.zipeof); | |
249 | ||
250 | /*--------------------------------------------------------------------------- | |
251 | Flush any remaining data and return to sender... | |
252 | ---------------------------------------------------------------------------*/ | |
253 | ||
254 | if (G.outcnt > 0L) { | |
255 | Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt)); | |
256 | if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0) | |
257 | pipeit("unshrink: flush() error (%d)\n", error); | |
258 | Trace((stderr, "done with flush()\n")); | |
259 | } | |
260 | ||
261 | return PK_OK; | |
262 | ||
263 | } /* end function unshrink() */ | |
264 | ||
265 | ||
266 | ||
267 | ||
268 | ||
269 | /****************************/ | |
270 | /* Function partial_clear() */ /* no longer recursive... */ | |
271 | /****************************/ | |
272 | ||
273 | static void partial_clear(__G) | |
274 | __GDEF | |
275 | { | |
276 | register shrint code; | |
277 | ||
278 | /* clear all nodes which have no children (i.e., leaf nodes only) */ | |
279 | ||
280 | /* first loop: mark each parent as such */ | |
281 | for (code = BOGUSCODE+1; code < HSIZE; ++code) { | |
282 | register shrint cparent = (shrint)(parent[code] & CODE_MASK); | |
283 | ||
284 | if (cparent > BOGUSCODE && cparent != FREE_CODE) | |
285 | FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */ | |
286 | } | |
287 | ||
288 | /* second loop: clear all nodes *not* marked as parents; reset flag bits */ | |
289 | for (code = BOGUSCODE+1; code < HSIZE; ++code) { | |
290 | if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */ | |
291 | FLAG_BITS[code] &= ~HAS_CHILD; | |
292 | else { /* leaf: lose it */ | |
293 | Trace((stderr, "%d\n", code)); | |
294 | parent[code] = FREE_CODE; | |
295 | } | |
296 | } | |
297 | ||
298 | return; | |
299 | } | |
300 | ||
301 | #endif /* !LZW_CLEAN */ |