]> git.saurik.com Git - wxWidgets.git/blob - utils/Install/sfxzip/unshrink.c
Updated PyCrust from Patrick O'Brien
[wxWidgets.git] / utils / Install / sfxzip / unshrink.c
1 /*---------------------------------------------------------------------------
2
3 unshrink.c version 1.21 23 Nov 95
4
5
6 NOTE: This code may or may not infringe on the so-called "Welch
7 patent" owned by Unisys. (From reading the patent, it appears
8 that a pure LZW decompressor is *not* covered, but this claim has
9 not been tested in court, and Unisys is reported to believe other-
10 wise.) It is therefore the responsibility of the user to acquire
11 whatever license(s) may be required for legal use of this code.
12
13 THE INFO-ZIP GROUP DISCLAIMS ALL LIABILITY FOR USE OF THIS CODE
14 IN VIOLATION OF APPLICABLE PATENT LAW.
15
16
17 Shrinking is basically a dynamic LZW algorithm with allowed code sizes of
18 up to 13 bits; in addition, there is provision for partial clearing of
19 leaf nodes. PKWARE uses the special code 256 (decimal) to indicate a
20 change in code size or a partial clear of the code tree: 256,1 for the
21 former and 256,2 for the latter. [Note that partial clearing can "orphan"
22 nodes: the parent-to-be can be cleared before its new child is added,
23 but the child is added anyway (as an orphan, as though the parent still
24 existed). When the tree fills up to the point where the parent node is
25 reused, the orphan is effectively "adopted." Versions prior to 1.05 were
26 affected more due to greater use of pointers (to children and siblings
27 as well as parents).]
28
29 This replacement version of unshrink.c was written from scratch. It is
30 based only on the algorithms described in Mark Nelson's _The Data Compres-
31 sion Book_ and in Terry Welch's original paper in the June 1984 issue of
32 IEEE _Computer_; no existing source code, including any in Nelson's book,
33 was used.
34
35 Memory requirements have been reduced in this version and are now no more
36 than the original Sam Smith code. This is still larger than any of the
37 other algorithms: at a minimum, 8K+8K+16K (stack+values+parents) assuming
38 16-bit short ints, and this does not even include the output buffer (the
39 other algorithms leave the uncompressed data in the work area, typically
40 called slide[]). For machines with a 64KB data space this is a problem,
41 particularly when text conversion is required and line endings have more
42 than one character. UnZip's solution is to use two roughly equal halves
43 of outbuf for the ASCII conversion in such a case; the "unshrink" argument
44 to flush() signals that this is the case.
45
46 For large-memory machines, a second outbuf is allocated for translations,
47 but only if unshrinking and only if translations are required.
48
49 | binary mode | text mode
50 ---------------------------------------------------
51 big mem | big outbuf | big outbuf + big outbuf2 <- malloc'd here
52 small mem | small outbuf | half + half small outbuf
53
54 Copyright 1994, 1995 Greg Roelofs. See the accompanying file "COPYING"
55 in UnZip 5.20 (or later) source or binary distributions.
56
57 ---------------------------------------------------------------------------*/
58
59
60 #define UNZIP_INTERNAL
61 #include "unzip.h" /* defines LZW_CLEAN by default */
62
63
64 #ifndef LZW_CLEAN
65
66 static void partial_clear OF((__GPRO));
67
68 #ifdef DEBUG
69 # define OUTDBG(c) \
70 if ((c)<32 || (c)>=127) pipeit("\\x%02x",(c)); else { }
71 #else
72 # define OUTDBG(c)
73 #endif
74
75 /* HSIZE is defined as 2^13 (8192) in unzip.h */
76 #define BOGUSCODE 256
77 #define FLAG_BITS parent /* upper bits of parent[] used as flag bits */
78 #define CODE_MASK (HSIZE - 1) /* 0x1fff (lower bits are parent's index) */
79 #define FREE_CODE HSIZE /* 0x2000 (code is unused or was cleared) */
80 #define HAS_CHILD (HSIZE << 1) /* 0x4000 (code has a child--do not clear) */
81
82 #define parent G.area.shrink.Parent
83 #define Value G.area.shrink.value /* "value" conflicts with Pyramid ioctl.h */
84 #define stack G.area.shrink.Stack
85
86
87 /***********************/
88 /* Function unshrink() */
89 /***********************/
90
91 int unshrink(__G)
92 __GDEF
93 {
94 int offset = (HSIZE - 1);
95 uch *stacktop = stack + offset;
96 register uch *newstr;
97 int codesize=9, len, KwKwK, error;
98 shrint code, oldcode, freecode, curcode;
99 shrint lastfreecode;
100 unsigned int outbufsiz;
101 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
102 /* Normally realbuf and outbuf will be the same. However, if the data
103 * are redirected to a large memory buffer, realbuf will point to the
104 * new location while outbuf will remain pointing to the malloc'd
105 * memory buffer. */
106 uch *realbuf = G.outbuf;
107 #else
108 # define realbuf G.outbuf
109 #endif
110
111
112 /*---------------------------------------------------------------------------
113 Initialize various variables.
114 ---------------------------------------------------------------------------*/
115
116 lastfreecode = BOGUSCODE;
117
118 #ifndef VMS /* VMS uses its own buffer scheme for textmode flush(). */
119 #ifndef SMALL_MEM
120 /* non-memory-limited machines: allocate second (large) buffer for
121 * textmode conversion in flush(), but only if needed */
122 if (G.pInfo->textmode && !G.outbuf2 &&
123 (G.outbuf2 = (uch *)malloc(TRANSBUFSIZ)) == (uch *)NULL)
124 return PK_MEM3;
125 #endif
126 #endif /* !VMS */
127
128 for (code = 0; code < BOGUSCODE; ++code) {
129 Value[code] = (uch)code;
130 parent[code] = BOGUSCODE;
131 }
132 for (code = BOGUSCODE+1; code < HSIZE; ++code)
133 parent[code] = FREE_CODE;
134
135 #if (defined(DLL) && !defined(NO_SLIDE_REDIR))
136 if (G.redirect_slide) { /* use normal outbuf unless we're a DLL routine */
137 realbuf = G.redirect_buffer;
138 outbufsiz = G.redirect_size;
139 } else
140 #endif
141 #ifdef DLL
142 if (G.pInfo->textmode && !G.redirect_data)
143 #else
144 if (G.pInfo->textmode)
145 #endif
146 outbufsiz = RAWBUFSIZ;
147 else
148 outbufsiz = OUTBUFSIZ;
149 G.outptr = realbuf;
150 G.outcnt = 0L;
151
152 /*---------------------------------------------------------------------------
153 Get and output first code, then loop over remaining ones.
154 ---------------------------------------------------------------------------*/
155
156 READBITS(codesize, oldcode)
157 if (!G.zipeof) {
158 *G.outptr++ = (uch)oldcode;
159 OUTDBG((uch)oldcode)
160 ++G.outcnt;
161 }
162
163 do {
164 READBITS(codesize, code)
165 if (G.zipeof)
166 break;
167 if (code == BOGUSCODE) { /* possible to have consecutive escapes? */
168 READBITS(codesize, code)
169 if (code == 1) {
170 ++codesize;
171 Trace((stderr, " (codesize now %d bits)\n", codesize));
172 } else if (code == 2) {
173 Trace((stderr, " (partial clear code)\n"));
174 partial_clear(__G); /* clear leafs (nodes with no children) */
175 Trace((stderr, " (done with partial clear)\n"));
176 lastfreecode = BOGUSCODE; /* reset start of free-node search */
177 }
178 continue;
179 }
180
181 /*-----------------------------------------------------------------------
182 Translate code: traverse tree from leaf back to root.
183 -----------------------------------------------------------------------*/
184
185 newstr = stacktop;
186 curcode = code;
187
188 if (parent[curcode] == FREE_CODE) {
189 /* or (FLAG_BITS[curcode] & FREE_CODE)? */
190 KwKwK = TRUE;
191 Trace((stderr, " (found a KwKwK code %d; oldcode = %d)\n", code,
192 oldcode));
193 --newstr; /* last character will be same as first character */
194 curcode = oldcode;
195 } else
196 KwKwK = FALSE;
197
198 do {
199 *newstr-- = Value[curcode];
200 curcode = (shrint)(parent[curcode] & CODE_MASK);
201 } while (curcode != BOGUSCODE);
202
203 len = (int)(stacktop - newstr++);
204 if (KwKwK)
205 *stacktop = *newstr;
206
207 /*-----------------------------------------------------------------------
208 Write expanded string in reverse order to output buffer.
209 -----------------------------------------------------------------------*/
210
211 Trace((stderr, "code %4d; oldcode %4d; char %3d (%c); string [", code,
212 oldcode, (int)(*newstr), (*newstr<32 || *newstr>=127)? ' ':*newstr));
213
214 {
215 register uch *p;
216
217 for (p = newstr; p < newstr+len; ++p) {
218 *G.outptr++ = *p;
219 OUTDBG(*p)
220 if (++G.outcnt == outbufsiz) {
221 Trace((stderr, "doing flush(), outcnt = %lu\n", G.outcnt));
222 if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0)
223 pipeit("unshrink: flush() error (%d)\n",
224 error);
225 Trace((stderr, "done with flush()\n"));
226 G.outptr = realbuf;
227 G.outcnt = 0L;
228 }
229 }
230 }
231
232 /*-----------------------------------------------------------------------
233 Add new leaf (first character of newstr) to tree as child of oldcode.
234 -----------------------------------------------------------------------*/
235
236 /* search for freecode */
237 freecode = (shrint)(lastfreecode + 1);
238 /* add if-test before loop for speed? */
239 while (parent[freecode] != FREE_CODE)
240 ++freecode;
241 lastfreecode = freecode;
242 Trace((stderr, "]; newcode %d\n", freecode));
243
244 Value[freecode] = *newstr;
245 parent[freecode] = oldcode;
246 oldcode = code;
247
248 } while (!G.zipeof);
249
250 /*---------------------------------------------------------------------------
251 Flush any remaining data and return to sender...
252 ---------------------------------------------------------------------------*/
253
254 if (G.outcnt > 0L) {
255 Trace((stderr, "doing final flush(), outcnt = %lu\n", G.outcnt));
256 if ((error = flush(__G__ realbuf, G.outcnt, TRUE)) != 0)
257 pipeit("unshrink: flush() error (%d)\n", error);
258 Trace((stderr, "done with flush()\n"));
259 }
260
261 return PK_OK;
262
263 } /* end function unshrink() */
264
265
266
267
268
269 /****************************/
270 /* Function partial_clear() */ /* no longer recursive... */
271 /****************************/
272
273 static void partial_clear(__G)
274 __GDEF
275 {
276 register shrint code;
277
278 /* clear all nodes which have no children (i.e., leaf nodes only) */
279
280 /* first loop: mark each parent as such */
281 for (code = BOGUSCODE+1; code < HSIZE; ++code) {
282 register shrint cparent = (shrint)(parent[code] & CODE_MASK);
283
284 if (cparent > BOGUSCODE && cparent != FREE_CODE)
285 FLAG_BITS[cparent] |= HAS_CHILD; /* set parent's child-bit */
286 }
287
288 /* second loop: clear all nodes *not* marked as parents; reset flag bits */
289 for (code = BOGUSCODE+1; code < HSIZE; ++code) {
290 if (FLAG_BITS[code] & HAS_CHILD) /* just clear child-bit */
291 FLAG_BITS[code] &= ~HAS_CHILD;
292 else { /* leaf: lose it */
293 Trace((stderr, "%d\n", code));
294 parent[code] = FREE_CODE;
295 }
296 }
297
298 return;
299 }
300
301 #endif /* !LZW_CLEAN */