]>
Commit | Line | Data |
---|---|---|
6d2010ae A |
1 | #if (defined __i386__) |
2 | ||
3 | /* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */ | |
4 | ||
5 | .cstring | |
6 | LC0: | |
7 | .ascii "invalid distance too far back\0" | |
8 | LC1: | |
9 | .ascii "invalid distance code\0" | |
10 | LC2: | |
11 | .ascii "invalid literal/length code\0" | |
12 | .text | |
13 | .align 4,0x90 | |
14 | ||
15 | ||
16 | #ifdef INFLATE_STRICT | |
17 | .byte 0 | |
18 | .byte 0 | |
19 | .byte 0 | |
20 | .byte 0 | |
21 | .byte 0 | |
22 | .byte 0 | |
23 | .byte 0 | |
24 | .byte 0 | |
25 | .byte 0 | |
26 | .byte 0 | |
27 | #endif | |
28 | .globl _inflate_fast | |
29 | _inflate_fast: | |
30 | ||
31 | // set up ebp to refer to arguments strm and start | |
32 | pushl %ebp | |
33 | movl %esp, %ebp | |
34 | ||
35 | // push edi/esi/ebx into stack | |
36 | pushl %edi | |
37 | pushl %esi | |
38 | pushl %ebx | |
39 | ||
40 | // allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary | |
41 | subl $92, %esp | |
42 | movl 8(%ebp), %ebx | |
43 | ||
44 | /* definitions to help code readability */ | |
45 | ||
46 | #define bits %edi | |
47 | #define strm %ebx | |
48 | #define state 28(strm) // state = (struct inflate_state FAR *)strm->state; | |
49 | #define in -84(%ebp) // in = strm->next_in - OFF; OFF=0 | |
50 | #define last -80(%ebp) // last = in + (strm->avail_in - 5); | |
51 | #define out -28(%ebp) // out = strm->next_out - OFF; | |
52 | #define beg -76(%ebp) // beg = out - (start - strm->avail_out); | |
53 | #define end -72(%ebp) // end = out + (strm->avail_out - 257); | |
54 | #define wsize -68(%ebp) // wsize = state->wsize; | |
55 | #define whave -64(%ebp) // whave = state->whave; | |
56 | #define write -60(%ebp) // write = state->write; | |
57 | #define window -56(%ebp) // window = state->window; | |
58 | #define hold -52(%ebp) // hold = state->hold; | |
59 | #define lcode -48(%ebp) // lcode = state->lencode; | |
60 | #define dcode -44(%ebp) // dcode = state->distcode; | |
61 | #define lmask -40(%ebp) // lmask = (1U << state->lenbits) - 1; | |
62 | #define dmask -36(%ebp) // dmask = (1U << state->distbits) - 1; | |
63 | #define len -32(%ebp) | |
64 | #define dmax -20(%ebp) | |
65 | #define dist -16(%ebp) // dist | |
66 | #define write_wsize -24(%ebp) // write+wsize | |
67 | #define write_1 -88(%ebp) // write-1 | |
68 | #define op -92(%ebp) // op | |
69 | ||
70 | movl (strm), %eax // strm->next_in | |
71 | movl %eax, in // in = strm->next_in - OFF; OFF=0 | |
72 | ||
73 | subl $5, %eax // in - 5; | |
74 | movl 4(strm), %ecx // strm->avail_in | |
75 | addl %ecx, %eax // in + (strm->avail_in - 5); | |
76 | movl %eax, last // last = in + (strm->avail_in - 5); | |
77 | ||
78 | movl 12(strm), %esi // strm->next_out | |
79 | movl %esi, out // out = strm->next_out - OFF; | |
80 | ||
81 | movl 16(strm), %ecx // strm->avail_out | |
82 | movl %esi, %eax // out | |
83 | subl 12(%ebp), %eax // out - start | |
84 | addl %ecx, %eax // out - (start - strm->avail_out); | |
85 | movl %eax, beg // beg = out - (start - strm->avail_out); | |
86 | ||
87 | leal -257(%esi,%ecx), %ecx // out + (strm->avail_out - 257); | |
88 | movl %ecx, end // end = out + (strm->avail_out - 257); | |
89 | ||
90 | movl state, %edx | |
91 | ||
92 | #ifdef INFLATE_STRICT | |
93 | movl 20(%edx), %ecx // state->dmax | |
94 | movl %ecx, dmax // dmax = state->dmax; | |
95 | #endif | |
96 | ||
97 | movl 40(%edx), %ecx // state->wsize | |
98 | movl %ecx, wsize // wsize = state->wsize; | |
99 | ||
100 | movl 44(%edx), %ecx // state->whave | |
101 | movl %ecx, whave // whave = state->whave; | |
102 | ||
103 | movl 48(%edx), %esi // state->write | |
104 | movl %esi, write // write = state->write; | |
105 | ||
106 | movl 52(%edx), %eax // state->window | |
107 | movl %eax, window // window = state->window; | |
108 | ||
109 | ||
110 | movl 56(%edx), %ecx // state->hold | |
111 | movl %ecx, hold // hold = state->hold | |
112 | ||
113 | movl 60(%edx), bits // bits = state->bits; | |
114 | ||
115 | movl 76(%edx), %esi // state->lencode | |
116 | movl %esi, lcode // lcode = state->lencode; | |
117 | ||
118 | movl 80(%edx), %eax // state->distcode | |
119 | movl %eax, dcode // dcode = state->distcode; | |
120 | ||
121 | movl 84(%edx), %ecx // state->lenbits | |
122 | movl $1, %eax | |
123 | movl %eax, %esi // a copy of 1 | |
124 | sall %cl, %esi // 1 << state->lenbits | |
125 | decl %esi // (1U << state->lenbits) - 1; | |
126 | movl %esi, lmask // lmask = (1U << state->lenbits) - 1; | |
127 | ||
128 | movl 88(%edx), %ecx // state->distbits | |
129 | sall %cl, %eax // 1 << state->distbits | |
130 | decl %eax // (1U << state->distbits) - 1; | |
131 | movl %eax, dmask // dmask = (1U << state->distbits) - 1; | |
132 | ||
133 | ||
134 | // these 2 might be used often, precomputed and saved in stack | |
135 | movl write, %eax | |
136 | addl wsize, %eax | |
137 | movl %eax, write_wsize // write+wsize | |
138 | ||
139 | movl write, %edx | |
140 | decl %edx | |
141 | movl %edx, write_1 // write-1 | |
142 | ||
143 | ||
144 | L_do_while_loop: // do { | |
145 | ||
146 | cmpl $15, bits | |
147 | jae bits_ge_15 // if (bits < 15) { | |
148 | #if 0 | |
149 | leal 8(bits), %esi // esi = bits+8 | |
150 | movl in, %eax // eax = in | |
151 | movzbl (%eax), %edx // edx = *in++ | |
152 | movl bits, %ecx // cl = bits | |
153 | sall %cl, %edx // 1st *in << bits | |
154 | addl hold, %edx // hold += 1st *in << bits | |
155 | movzbl 1(%eax), %eax // 2nd *in | |
156 | movl %esi, %ecx // cl = bits+8 | |
157 | sall %cl, %eax // 2nd *in << (bits+8) | |
158 | addl %eax, %edx // hold += 2nd *in << (bits+8) | |
159 | movl %edx, hold // update hold | |
160 | addl $2, in // in += 2 | |
161 | addl $16, bits // bits += 16; | |
162 | #else | |
163 | /* from simulation, this code segment performs better than the other case | |
164 | possibly, we are more often hit with aligned memory access */ | |
165 | movl in, %ecx // unsigned short *inp = (unsigned short *) (in+OFF); | |
166 | movzwl (%ecx), %eax // *((unsigned short *) in); | |
167 | movl bits, %ecx // bits | |
168 | sall %cl, %eax // *((unsigned short *) in) << bits | |
169 | addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; | |
170 | addl $2, in // in += 2; | |
171 | addl $16, bits // bits += 16; | |
172 | #endif | |
173 | ||
174 | bits_ge_15: // } /* bits < 15 */ | |
175 | ||
176 | movl hold, %eax // hold | |
177 | andl lmask, %eax // hold & lmask; | |
178 | movl lcode, %esi // lcode[] : 4-byte aligned | |
179 | movl (%esi,%eax,4), %eax // this = lcode[hold&lmask]; | |
180 | jmp dolen | |
181 | .align 4,0x90 | |
182 | op_nonzero: | |
183 | movzbl %al, %ecx // a copy of op to cl | |
184 | testb $16, %cl // if op&16 | |
185 | jne Llength_base // branch to length_base | |
186 | ||
187 | testb $64, %cl // elif op&64 | |
188 | jne length_2nd_level_else // branch to 2nd level length code else conditions | |
189 | ||
190 | // 2nd level length code | |
191 | ||
192 | movl $1, %eax | |
193 | sall %cl, %eax // 1 << op | |
194 | decl %eax // ((1<<op) - 1) | |
195 | andl hold, %eax // hold & ((1U << op) - 1) | |
196 | movzwl %si, %ecx // this.val | |
197 | addl %ecx, %eax // this.val + (hold & ((1U << op) - 1)) | |
198 | ||
199 | movl lcode, %ecx // lcode[] : 4-byte aligned | |
200 | movl (%ecx,%eax,4), %eax // this = lcode[this.val + (hold & ((1U << op) - 1))]; | |
201 | // goto dolen (compiler rearranged the order of code) | |
202 | dolen: | |
203 | movl %eax, %esi // make a copy of this (val 16-bit, bits 8-bit, op 8-bit) | |
204 | shrl $16, %esi // %esi = this.val; | |
205 | movzbl %ah, %ecx // op = (unsigned)(this.bits); | |
206 | shrl %cl, hold // hold >>= op; | |
207 | subl %ecx, bits // bits -= op; | |
208 | testb %al, %al // op = (unsigned)(this.op); | |
209 | jne op_nonzero // if op!=0, branch to op_nonzero | |
210 | ||
211 | movl %esi, %ecx // this.val; | |
212 | movl out, %eax // out | |
213 | movb %cl, (%eax) // PUP(out) = (unsigned char)(this.val); | |
214 | incl %eax // out++; | |
215 | movl %eax, out // save out | |
216 | ||
217 | L_tst_do_while_loop_end: | |
218 | movl last, %eax // last | |
219 | cmpl %eax, in // in vs last | |
220 | jae return_unused_bytes // branch to return_unused_bytes if in >= last | |
221 | movl end, %edx // end | |
222 | cmpl %edx, out // out vs end | |
223 | jb L_do_while_loop // branch to do loop if out < end | |
224 | ||
225 | return_unused_bytes: | |
226 | ||
227 | movl bits, %eax // bits | |
228 | shrl $3, %eax // len = bits >> 3 | |
229 | movl in, %edx // in | |
230 | subl %eax, %edx // in -= len | |
231 | sall $3, %eax // len << 3 | |
232 | movl bits, %ecx // bits | |
233 | subl %eax, %ecx // bits -= len << 3 | |
234 | ||
235 | movl %edx, (strm) // strm->next_in = in + OFF; | |
236 | movl out, %eax | |
237 | movl %eax, 12(strm) // strm->next_out = out + OFF; | |
238 | ||
239 | cmpl %edx, last // last vs in | |
240 | jbe L67 // if (last <= in) branch to L67 and return to L69 | |
241 | movl last, %eax // last | |
242 | addl $5, %eax // 5 + last | |
243 | subl %edx, %eax // 5 + last - in | |
244 | L69: | |
245 | movl %eax, 4(strm) // update strm->avail_in | |
246 | ||
247 | movl end, %eax | |
248 | cmpl %eax, out // out vs end | |
249 | jae L70 // if (out>=end) branch to L70, and return to L72 | |
250 | addl $257, %eax // 257 + end | |
251 | subl out, %eax // 257 + end - out | |
252 | L72: | |
253 | movl %eax, 16(strm) // update strm->avail_out | |
254 | ||
255 | movl $1, %eax | |
256 | sall %cl, %eax // 1 << bits | |
257 | decl %eax // (1 << bits) -1 | |
258 | andl hold, %eax // hold &= (1U << bits) - 1; | |
259 | movl state, %esi | |
260 | movl %eax, 56(%esi) // state->hold = hold; | |
261 | movl %ecx, 60(%esi) // state->bits = bits; | |
262 | ||
263 | addl $92, %esp // pop out local from stack | |
264 | ||
265 | // restore saved registers and return | |
266 | popl %ebx | |
267 | popl %esi | |
268 | popl %edi | |
269 | leave | |
270 | ret | |
271 | ||
272 | // this code segment is branched in from op_nonzero, with op in cl and this.value in esi | |
273 | Llength_base: | |
274 | movzwl %si, %esi // this instruction might not be needed, pad here to give better performance | |
275 | movl %esi, len // len = (unsigned)(this.val); | |
276 | ||
277 | movl %ecx, %esi // leave a copy of op at ecx | |
278 | andl $15, %esi // op&=15; | |
279 | je Lop_is_zero // if (op) { | |
280 | cmpl bits, %esi // op vs bits | |
281 | jbe Lop_be_bits // if (bits < op) { | |
282 | movl in, %edx // in | |
283 | movzbl (%edx), %eax // *in | |
284 | movl bits, %ecx // bits | |
285 | sall %cl, %eax // *in << bits | |
286 | addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; | |
287 | incl %edx // in++ | |
288 | movl %edx, in // update in | |
289 | addl $8, bits // bits += 8 | |
290 | Lop_be_bits: // } | |
291 | movl $1, %eax // 1 | |
292 | movl %esi, %ecx // op | |
293 | sall %cl, %eax // 1 << op | |
294 | decl %eax // (1<<op)-1 | |
295 | andl hold, %eax // hold & ((1U << op) - 1) | |
296 | addl %eax, len // len += (unsigned)hold & ((1U << op) - 1); | |
297 | shrl %cl, hold // hold >>= op; | |
298 | subl %esi, bits // bits -= op; | |
299 | Lop_is_zero: // } | |
300 | cmpl $14, bits // if (bits < 15) { | |
301 | jbe bits_le_14 // branch to refill 16-bit into hold, and branch back to next | |
302 | L19: // } | |
303 | movl hold, %eax // hold | |
304 | andl dmask, %eax // hold&dmask | |
305 | movl dcode, %esi // dcode[] : 4-byte aligned | |
306 | movl (%esi,%eax,4), %eax // this = dcode[hold & dmask]; | |
307 | jmp dodist | |
308 | ||
309 | Lop_16_zero: | |
310 | testb $64, %cl // op&64 | |
311 | jne Linvalid_distance_code // if (op&64)!=0, branch to invalid distance code | |
312 | movl $1, %eax // 1 | |
313 | sall %cl, %eax // (1<<op) | |
314 | decl %eax // (1<<op)-1 | |
315 | andl hold, %eax // (hold & ((1U << op) - 1)) | |
316 | movzwl %dx, %edx // this.val | |
317 | addl %edx, %eax // this.val + (hold & ((1U << op) - 1)) | |
318 | movl dcode, %edx // dcode[] : 4 byte aligned | |
319 | movl (%edx,%eax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))]; | |
320 | dodist: | |
321 | movl %eax, %edx // this : (val 16-bit, bits 8-bit, op 8-bit) | |
322 | shrl $16, %edx // edx = this.val | |
323 | movzbl %ah, %ecx // op = (unsigned)(this.bits); | |
324 | shrl %cl, hold // hold >>= op; | |
325 | subl %ecx, bits // bits -= op; | |
326 | movzbl %al, %ecx // op = (unsigned)(this.op); | |
327 | testb $16, %cl // op & 16 | |
328 | je Lop_16_zero // if (op&16)==0 goto test op&64 | |
329 | ||
330 | Ldistance_base: // if (op&16) { /* distance base */ | |
331 | andl $15, %ecx // op &= 15; edx = dist = this.val; | |
332 | movl %ecx, op // save a copy of op | |
333 | cmpl bits, %ecx // op vs bits | |
334 | jbe 0f // if (bits < op) { | |
335 | movl in, %ecx // in | |
336 | movzbl (%ecx), %eax // *in | |
337 | movl bits, %ecx // bits | |
338 | sall %cl, %eax // *in << bits | |
339 | addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; | |
340 | incl in // in++ | |
341 | addl $8, bits // bits += 8 | |
342 | cmpl bits, op // op vs bits | |
343 | jbe 0f // if (bits < op) { | |
344 | movl in, %esi // i | |
345 | movzbl (%esi), %eax // *in | |
346 | movl bits, %ecx // cl = bits | |
347 | sall %cl, %eax // *in << bits | |
348 | addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits; | |
349 | incl %esi // in++ | |
350 | movl %esi, in // update in | |
351 | addl $8, bits // bits += 8 | |
352 | 0: // } } | |
353 | ||
354 | movzwl %dx, %edx // dist = (unsigned)(this.val); | |
355 | movl $1, %eax // 1 | |
356 | movzbl op, %ecx // cl = op | |
357 | sall %cl, %eax // 1 << op | |
358 | decl %eax // ((1U << op) - 1) | |
359 | andl hold, %eax // (unsigned)hold & ((1U << op) - 1) | |
360 | addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); | |
361 | ||
362 | #ifdef INFLATE_STRICT | |
363 | ||
364 | cmpl dmax, %eax // dist vs dmax | |
365 | ja Linvalid_distance_too_far_back // if (dist > dmax) break for invalid distance too far back | |
366 | ||
367 | #endif | |
368 | ||
369 | movl %eax, dist // save a copy of dist in stack | |
370 | shrl %cl, hold // hold >>= op; | |
371 | subl %ecx, bits // bits -= op; | |
372 | ||
373 | movl out, %eax | |
374 | subl beg, %eax // eax = op = out - beg | |
375 | cmpl %eax, dist // dist vs op | |
376 | jbe Lcopy_direct_from_output // if (dist <= op) branch to copy direct from output | |
377 | ||
378 | // if (dist > op) { | |
379 | movl dist, %ecx // dist | |
380 | subl %eax, %ecx // esi = op = dist - op; | |
381 | cmpl %ecx, whave // whave vs op | |
382 | jb Linvalid_distance_too_far_back // if (op > whave) break for error; | |
383 | ||
384 | movl write, %edx | |
385 | testl %edx, %edx | |
386 | jne Lwrite_non_zero // if (write==0) { | |
387 | movl wsize, %eax // wsize | |
388 | subl %ecx, %eax // wsize-op | |
389 | movl window, %esi // from=window-OFF | |
390 | addl %eax, %esi // from += wsize-op | |
391 | movl out, %edx // out | |
392 | cmpl %ecx, len // len vs op | |
393 | jbe L38 // if !(op < len) skip | |
394 | subl %ecx, len // len - op | |
395 | 0: // do { | |
396 | movzbl (%esi), %eax // | |
397 | movb %al, (%edx) // | |
398 | incl %edx // | |
399 | incl %esi // PUP(out) = PUP(from); | |
400 | decl %ecx // --op; | |
401 | jne 0b // } while (op); | |
402 | ||
403 | movl %edx, out // update out | |
404 | movl %edx, %esi // out | |
405 | subl dist, %esi // esi = from = out - dist; | |
406 | ||
407 | L38: /* copy from output */ | |
408 | ||
409 | // while (len > 2) { | |
410 | // PUP(out) = PUP(from); | |
411 | // PUP(out) = PUP(from); | |
412 | // PUP(out) = PUP(from); | |
413 | // len -= 3; | |
414 | // } | |
415 | // if (len) { | |
416 | // PUP(out) = PUP(from); | |
417 | // if (len > 1) | |
418 | // PUP(out) = PUP(from); | |
419 | // } | |
420 | ||
421 | movl len, %ecx // len | |
422 | movl out, %edx // out | |
423 | subl $3, %ecx // pre-decrement len by 3 | |
424 | jl 1f // if len < 3, branch to 1f for remaining processing | |
425 | 0: // while (len>2) { | |
426 | movzbl (%esi), %eax | |
427 | movb %al, (%edx) // PUP(out) = PUP(from); | |
428 | movzbl 1(%esi), %eax | |
429 | movb %al, 1(%edx) // PUP(out) = PUP(from); | |
430 | movzbl 2(%esi), %eax | |
431 | movb %al, 2(%edx) // PUP(out) = PUP(from); | |
432 | addl $3, %esi // from += 3; | |
433 | addl $3, %edx // out += 3; | |
434 | subl $3, %ecx // len -= 3; | |
435 | jge 0b // } | |
436 | movl %edx, out // update out, in case len == 0 | |
437 | 1: | |
438 | addl $3, %ecx // post-increment len by 3 | |
439 | je L_tst_do_while_loop_end // if (len) { | |
440 | movzbl (%esi), %eax // | |
441 | movb %al, (%edx) // PUP(out) = PUP(from); | |
442 | incl %edx // out++ | |
443 | movl %edx, out // update out, in case len == 1 | |
444 | cmpl $2, %ecx // | |
445 | jne L_tst_do_while_loop_end // if len==1, break | |
446 | movzbl 1(%esi), %eax | |
447 | movb %al, (%edx) // PUP(out) = PUP(from); | |
448 | incl %edx // out++ | |
449 | movl %edx, out // update out | |
450 | jmp L_tst_do_while_loop_end // } | |
451 | ||
452 | .align 4,0x90 | |
453 | length_2nd_level_else: | |
454 | andl $32, %ecx // test end-of-block | |
455 | je invalid_literal_length_code // if (op&32)==0, branch for invalid literal/length code break | |
456 | movl state, %edx // if (op&32), end-of-block is detected | |
457 | movl $11, (%edx) // state->mode = TYPE | |
458 | jmp return_unused_bytes | |
459 | ||
460 | L70: | |
461 | movl out, %edx // out | |
462 | subl %edx, end // (end-out) | |
463 | movl end, %esi // %esi = (end-out) = -(out - end); | |
464 | leal 257(%esi), %eax // %eax = 257 + %esi = 257 - (out -end) | |
465 | jmp L72 // return to update state and return | |
466 | ||
467 | L67: // %edx = in, to return 5 - (in - last) in %eax | |
468 | subl %edx, last // last - in | |
469 | movl last, %edx // %edx = last - in = - (in - last); | |
470 | leal 5(%edx), %eax // %eax = 5 + %edx = 5 - (in - last); | |
471 | jmp L69 // return to update state and return | |
472 | ||
473 | bits_le_14: | |
474 | #if 1 | |
475 | leal 8(bits), %esi // esi = bits+8 | |
476 | movl in, %eax // eax = in | |
477 | movzbl (%eax), %edx // edx = *in++ | |
478 | movl bits, %ecx // cl = bits | |
479 | sall %cl, %edx // 1st *in << bits | |
480 | addl hold, %edx // hold += 1st *in << bits | |
481 | movzbl 1(%eax), %eax // 2nd *in | |
482 | movl %esi, %ecx // cl = bits+8 | |
483 | sall %cl, %eax // 2nd *in << (bits+8) | |
484 | addl %eax, %edx // hold += 2nd *in << (bits+8) | |
485 | movl %edx, hold // update hold | |
486 | addl $2, in // in += 2 | |
487 | addl $16, bits // bits += 16; | |
488 | jmp L19 | |
489 | #else | |
490 | /* this code segment does not run as fast as the other original code segment, possibly the processor | |
491 | need extra time to handle unaligned short access */ | |
492 | movl in, %edx // unsigned short *inp = (unsigned short *) (in+OFF); | |
493 | movzwl (%edx), %eax // *((unsigned short *) in); | |
494 | movl bits, %ecx // bits | |
495 | sall %cl, %eax // *((unsigned short *) in) << bits | |
496 | addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits; | |
497 | addl $2, %edx // in += 2; | |
498 | addl $16, %ecx // bits += 16; | |
499 | movl %edx, in | |
500 | movl %ecx, bits | |
501 | jmp L19 | |
502 | #endif | |
503 | invalid_literal_length_code: | |
504 | call 0f | |
505 | 0: popl %eax | |
506 | leal LC2-0b(%eax), %eax | |
507 | movl %eax, 24(strm) | |
508 | movl state, %esi | |
509 | movl $27, (%esi) | |
510 | jmp return_unused_bytes | |
511 | Linvalid_distance_code: | |
512 | call 0f | |
513 | 0: popl %eax | |
514 | leal LC1-0b(%eax), %eax | |
515 | movl %eax, 24(strm) | |
516 | movl state, %eax | |
517 | movl $27, (%eax) | |
518 | jmp return_unused_bytes | |
519 | ||
520 | #ifdef INFLATE_STRICT | |
521 | .align 4,0x90 | |
522 | .byte 0 | |
523 | .byte 0 | |
524 | .byte 0 | |
525 | .byte 0 | |
526 | .byte 0 | |
527 | .byte 0 | |
528 | .byte 0 | |
529 | .byte 0 | |
530 | .byte 0 | |
531 | #endif | |
532 | Lcopy_direct_from_output: | |
533 | movl out, %edx // out | |
534 | subl dist, %edx // from = out - dist | |
535 | movl out, %ecx // out | |
536 | movl len, %esi // len | |
537 | subl $3, %esi // pre-decement len by 3 | |
538 | 0: // do { | |
539 | movzbl (%edx), %eax | |
540 | movb %al, (%ecx) // PUP(out) = PUP(from); | |
541 | movzbl 1(%edx), %eax | |
542 | movb %al, 1(%ecx) // PUP(out) = PUP(from); | |
543 | movzbl 2(%edx), %eax | |
544 | movb %al, 2(%ecx) // PUP(out) = PUP(from); | |
545 | addl $3, %edx // from += 3 | |
546 | addl $3, %ecx // out += 3 | |
547 | subl $3, %esi // len -= 3 | |
548 | jge 0b // } while (len > 2); | |
549 | movl %ecx, out // update out in case len == 0 | |
550 | addl $3, %esi // post-increment len by 3 | |
551 | je L_tst_do_while_loop_end // if (len) { | |
552 | movzbl (%edx), %eax | |
553 | movb %al, (%ecx) // PUP(out) = PUP(from); | |
554 | incl %ecx | |
555 | movl %ecx, out // out++ | |
556 | cmpl $2, %esi // | |
557 | jne L_tst_do_while_loop_end // if (len>2) | |
558 | movzbl 1(%edx), %eax | |
559 | movb %al, (%ecx) // PUP(out) = PUP(from); | |
560 | incl %ecx | |
561 | movl %ecx, out // out++ | |
562 | jmp L_tst_do_while_loop_end // } | |
563 | ||
564 | .align 4,0x90 | |
565 | Lwrite_non_zero: // %edx = write, %ecx = op | |
566 | movl window, %esi // from = window - OFF; | |
567 | cmp %ecx, %edx // write vs op, test for wrap around window or contiguous in window | |
568 | jae Lcontiguous_in_window // if (write >= op) branch to contiguous in window | |
569 | ||
570 | Lwrap_around_window: // wrap around window | |
571 | addl write_wsize, %esi // from += write+wsize | |
572 | subl %ecx, %esi // from += wsize + write - op; | |
573 | subl %edx, %ecx // op -= write | |
574 | cmpl %ecx, len // len vs op | |
575 | jbe L38 // if (len <= op) break to copy from output | |
576 | subl %ecx, len // len -= op; | |
577 | movl out, %edx // out | |
578 | 0: // do { | |
579 | movzbl (%esi), %eax // *from | |
580 | movb %al, (%edx) // *out | |
581 | incl %esi // from++ | |
582 | incl %edx // out++ | |
583 | decl %ecx // --op | |
584 | jne 0b // } while (op); | |
585 | ||
586 | movl %edx, out // save out in case we need to break to L38 | |
587 | movl window, %esi // from = window - OFF; | |
588 | movl len, %eax // len | |
589 | cmpl %eax, write // write vs len | |
590 | jae L38 // if (write >= len) break to L38 | |
591 | ||
592 | movl write, %ecx // op = write | |
593 | subl %ecx, len // len -= op; | |
594 | 0: // do { | |
595 | movzbl (%esi), %eax // *from | |
596 | movb %al, (%edx) // *out | |
597 | incl %esi // from++ | |
598 | incl %edx // out++ | |
599 | decl %ecx // --op | |
600 | jne 0b // } while (op); | |
601 | ||
602 | movl %edx, %esi // from = out | |
603 | movl %edx, out // save a copy of out | |
604 | subl dist, %esi // from = out - dist; | |
605 | jmp L38 // break to copy from output | |
606 | ||
607 | Lcontiguous_in_window: // contiguous in window, edx = write, %ecx = op | |
608 | subl %ecx, %edx // write - op | |
609 | addl %edx, %esi // from += write - op; | |
610 | cmpl %ecx, len // len vs op | |
611 | jbe L38 // if (len <= op) break to copy from output | |
612 | movl out, %edx // out | |
613 | subl %ecx, len // len -= op; | |
614 | ||
615 | 0: // do { | |
616 | movzbl (%esi), %eax // *from | |
617 | movb %al, (%edx) // *out | |
618 | incl %esi // from++ | |
619 | incl %edx // out++ | |
620 | decl %ecx // op-- | |
621 | jne 0b // } while (op); | |
622 | ||
623 | movl %edx, out // update out | |
624 | movl %edx, %esi // from = out | |
625 | subl dist, %esi // from = out - dist; | |
626 | jmp L38 | |
627 | ||
628 | Linvalid_distance_too_far_back: | |
629 | call 0f | |
630 | 0: popl %eax | |
631 | leal LC0-0b(%eax), %eax | |
632 | movl %eax, 24(strm) | |
633 | movl state, %ecx | |
634 | movl $27, (%ecx) | |
635 | jmp return_unused_bytes | |
636 | ||
637 | #endif | |
638 | ||
639 | #if (defined __x86_64__) | |
640 | .cstring | |
641 | LC0: | |
642 | .ascii "invalid distance too far back\0" | |
643 | LC1: | |
644 | .ascii "invalid distance code\0" | |
645 | LC2: | |
646 | .ascii "invalid literal/length code\0" | |
647 | .text | |
648 | .align 4,0x90 | |
649 | ||
650 | #ifdef INFLATE_STRICT | |
651 | .byte 0 | |
652 | .byte 0 | |
653 | .byte 0 | |
654 | .byte 0 | |
655 | .byte 0 | |
656 | .byte 0 | |
657 | .byte 0 | |
658 | .byte 0 | |
659 | .byte 0 | |
660 | .byte 0 | |
661 | .byte 0 | |
662 | .byte 0 | |
663 | #endif | |
664 | ||
665 | .globl _inflate_fast | |
666 | _inflate_fast: | |
667 | ||
668 | // set up rbp | |
669 | pushq %rbp | |
670 | movq %rsp, %rbp | |
671 | ||
672 | // save registers in stack | |
673 | pushq %r15 | |
674 | pushq %r14 | |
675 | pushq %r13 | |
676 | pushq %r12 | |
677 | pushq %rbx | |
678 | ||
679 | #define strm %r13 | |
680 | #define state %rdi | |
681 | #define in %r12 | |
682 | #define in_d %r12d | |
683 | #define out %r10 | |
684 | #define out_d %r10d | |
685 | #define write %r15d | |
686 | #define hold %r9 | |
687 | #define holdd %r9d | |
688 | #define bits %r8d | |
689 | #define lcode %r14 | |
690 | #define len %ebx | |
691 | #define from %rcx | |
692 | #define dmax %r11d | |
693 | ||
694 | #define last -104(%rbp) | |
695 | #define beg -96(%rbp) | |
696 | #define end -88(%rbp) | |
697 | #define wsize -80(%rbp) | |
698 | #define whave -76(%rbp) | |
699 | #define window -72(%rbp) | |
700 | #define dcode -64(%rbp) | |
701 | #define lmask -56(%rbp) | |
702 | #define dmask -112(%rbp) | |
703 | #define wsize_write -116(%rbp) | |
704 | #define write_1 -128(%rbp) | |
705 | #define dist -44(%rbp) | |
706 | ||
707 | // reserve stack memory for local variables 128-40=88 | |
708 | subq $88, %rsp | |
709 | ||
710 | movq %rdi, strm | |
711 | movq 56(%rdi), state // state = (struct inflate_state FAR *)strm->state; | |
712 | movq (strm), in // in = strm->next_in - OFF; | |
713 | movl 8(strm), %eax // strm->avail_in | |
714 | subl $5, %eax // (strm->avail_in - 5) | |
715 | addq in, %rax // in + (strm->avail_in - 5) | |
716 | movq %rax, last // last = in + (strm->avail_in - 5) | |
717 | movq 24(strm), out // out = strm->next_out | |
718 | movl 32(strm), %eax // strm->avail_out | |
719 | subl %eax, %esi // (start - strm->avail_out); | |
720 | movq out, %rdx // strm->next_out | |
721 | subq %rsi, %rdx // out - (start - strm->avail_out); | |
722 | movq %rdx, beg // beg = out - (start - strm->avail_out); | |
723 | subl $257, %eax // (strm->avail_out - 257) | |
724 | addq out, %rax // out + (strm->avail_out - 257); | |
725 | movq %rax, end // end = out + (strm->avail_out - 257); | |
726 | ||
727 | #ifdef INFLATE_STRICT | |
728 | movl 20(state), dmax // dmax = state->dmax; | |
729 | #endif | |
730 | ||
731 | movl 52(state), %ecx // state->wsize | |
732 | movl %ecx, wsize // wsize = state->wsize; | |
733 | movl 56(state), %ebx // state->whave; | |
734 | movl %ebx, whave // whave = state->whave; | |
735 | movl 60(state), write // write = state->write; | |
736 | movq 64(state), %rax // state->window | |
737 | movq %rax, window // window = state->window; | |
738 | movq 72(state), hold // hold = state->hold; | |
739 | movl 80(state), bits // bits = state->bits; | |
740 | ||
741 | movq 96(state), lcode // lcode = state->lencode; | |
742 | movq 104(state), %rdx // state->distcode; | |
743 | movq %rdx, dcode // dcode = state->distcode; | |
744 | ||
745 | movl 116(state), %ecx // state->distbits | |
746 | movl $1, %eax | |
747 | movl %eax, %edx // 1 | |
748 | sall %cl, %edx // (1U << state->distbits) | |
749 | movl 112(state), %ecx // state->lenbits | |
750 | sall %cl, %eax // (1U << state->lenbits) | |
751 | decl %eax // (1U << state->lenbits) - 1 | |
752 | movq %rax, lmask // lmask = (1U << state->lenbits) - 1 | |
753 | decl %edx // (1U << state->distbits) - 1 | |
754 | movq %rdx, dmask // dmask = (1U << state->distbits) - 1 | |
755 | ||
756 | movl wsize, %ecx // wsize | |
757 | addl write, %ecx // wsize + write | |
758 | movl %ecx, wsize_write // wsize_write = wsize + write | |
759 | ||
760 | leal -1(%r15), %ebx // write - 1 | |
761 | movq %rbx, write_1 // write_1 = write - 1 | |
762 | ||
763 | L_do_while_loop: | |
764 | cmpl $14, bits // bits vs 14 | |
765 | ja 0f // if (bits < 15) { | |
766 | movzwl (in), %eax // read 2 bytes from in | |
767 | movl bits, %ecx // set up cl = bits | |
768 | salq %cl, %rax // (*in) << bits | |
769 | addq %rax, hold // hold += (*in) << bits | |
770 | addq $2, in // in += 2 | |
771 | addl $16, bits // bits += 16 | |
772 | 0: // } | |
773 | movq lmask, %rax // lmask | |
774 | andq hold, %rax // hold & lmask | |
775 | jmp 1f | |
776 | .align 4,0x90 | |
777 | Lop_nonzero: | |
778 | movzbl %al, %ecx // op in al and cl | |
779 | testb $16, %cl // check for length base processing (op&16) | |
780 | jne L_length_base // if (op&16) branch to length base processing | |
781 | testb $64, %cl // check for 2nd level length code (op&64==0) | |
782 | jne L_end_of_block // if (op&64)!=0, branch for end-of-block processing | |
783 | ||
784 | /* 2nd level length code : (op&64) == 0*/ | |
785 | L_2nd_level_length_code: | |
786 | movl $1, %eax // 1 | |
787 | sall %cl, %eax // 1 << op | |
788 | decl %eax // ((1U << op) - 1) | |
789 | andq hold, %rax // (hold & ((1U << op) - 1)) | |
790 | movzwl %dx, %edx | |
791 | addq %rdx, %rax // this = lcode[this.val + (hold & ((1U << op) - 1))]; | |
792 | 1: | |
793 | movl (lcode,%rax,4), %eax // this = lcode[hold & lmask]; | |
794 | Ldolen: | |
795 | movl %eax, %edx // a copy of this | |
796 | shrl $16, %edx // edx = this.val; | |
797 | movzbl %ah, %ecx // op = this.bits | |
798 | shrq %cl, hold // hold >>= op; | |
799 | subl %ecx, bits // bits -= op; | |
800 | testb %al, %al // op = (unsigned)(this.op); | |
801 | jne Lop_nonzero // if (op!-0) branch for copy operation | |
802 | L_literal: | |
803 | movb %dl, (out) // *out = this.val | |
804 | incq out // out ++ | |
805 | L_do_while_loop_check: | |
806 | cmpq last, in // in vs last | |
807 | jae L_return_unused_byte // if in >= last, break to return unused byte processing | |
808 | cmpq end, out // out vs end | |
809 | jb L_do_while_loop // back to do_while_loop if out < end | |
810 | ||
811 | /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ | |
812 | ||
813 | L_return_unused_byte: | |
814 | movl out_d, %esi | |
815 | jmp L34 | |
816 | ||
817 | L_length_base: /* al = cl = op, edx = this.val, op&16 = 16 */ | |
818 | movzwl %dx, len // len = (unsigned)(this.val); | |
819 | movl %ecx, %edx // op | |
820 | andl $15, %edx // op &= 15; | |
821 | je 1f // if (op) { | |
822 | cmpl bits, %edx // op vs bits | |
823 | jbe 0f // if (bits < op) { | |
824 | movzbl (in), %eax // *in | |
825 | movl bits, %ecx // cl = bits | |
826 | salq %cl, %rax // *in << bits | |
827 | addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; | |
828 | incq in // in++ | |
829 | addl $8, bits // bits += 8 | |
830 | 0: // } | |
831 | movl $1, %eax // 1 | |
832 | movl %edx, %ecx // cl = op | |
833 | sall %cl, %eax // 1 << op | |
834 | decl %eax // (1 << op) - 1 | |
835 | andl holdd, %eax // (unsigned)hold & ((1U << op) - 1); | |
836 | addl %eax, len // len += (unsigned)hold & ((1U << op) - 1); | |
837 | shrq %cl, hold // hold >>= op; | |
838 | subl %edx, bits // bits -= op; | |
839 | 1: // } | |
840 | cmpl $14, bits // bits vs 14 | |
841 | jbe L99 // if (bits < 15) go to loading to hold and return to L19 | |
842 | L19: // } | |
843 | movq dmask, %rax // dmask | |
844 | andq hold, %rax // hold & dmask | |
845 | movq dcode, %rdx // dcode[] | |
846 | movl (%rdx,%rax,4), %eax // this = dcode[hold & dmask]; | |
847 | jmp L_dodist | |
848 | .align 4,0x90 | |
849 | 0: // op&16 == 0, test (op&64)==0 for 2nd level distance code | |
850 | testb $64, %cl // op&64 | |
851 | jne L_invalid_distance_code // if ((op&64)==0) { /* 2nd level distance code */ | |
852 | movl $1, %eax // 1 | |
853 | sall %cl, %eax // 1 << op | |
854 | decl %eax // (1 << op) - 1 | |
855 | andq hold, %rax // (hold & ((1U << op) - 1)) | |
856 | movzwl %dx, %edx // this.val | |
857 | addq %rdx, %rax // this.val + (hold & ((1U << op) - 1)) | |
858 | movq dcode, %rcx // dcode[] | |
859 | movl (%rcx,%rax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))]; | |
860 | L_dodist: | |
861 | movl %eax, %edx // this | |
862 | shrl $16, %edx // dist = (unsigned)(this.val); | |
863 | movzbl %ah, %ecx // cl = op = this.bits | |
864 | shrq %cl, hold // hold >>= op; | |
865 | subl %ecx, bits // bits -= op; | |
866 | movzbl %al, %ecx // op = (unsigned)(this.op); | |
867 | testb $16, %cl // (op & 16) test for distance base | |
868 | je 0b // if (op&16) == 0, branch to check for 2nd level distance code | |
869 | ||
870 | L_distance_base: /* distance base */ | |
871 | ||
872 | movl %ecx, %esi // op | |
873 | andl $15, %esi // op&=15 | |
874 | cmpl bits, %esi // op vs bits | |
875 | jbe 1f // if (bits < op) { | |
876 | movzbl (in), %eax // *in | |
877 | movl bits, %ecx // cl = bits | |
878 | salq %cl, %rax // *in << bits | |
879 | addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; | |
880 | incq in // in++ | |
881 | addl $8, bits // bits += 8 | |
882 | cmpl bits, %esi // op vs bits | |
883 | jbe 1f // if (bits < op) { | |
884 | movzbl (in), %eax // *in | |
885 | movl bits, %ecx // cl = bits | |
886 | salq %cl, %rax // *in << bits | |
887 | addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits; | |
888 | incq in // in++ | |
889 | addl $8, bits // bits += 8 | |
890 | 1: // } } | |
891 | ||
892 | movzwl %dx, %edx // dist | |
893 | movl $1, %eax // 1 | |
894 | movl %esi, %ecx // cl = op | |
895 | sall %cl, %eax // (1 << op) | |
896 | decl %eax // (1 << op) - 1 | |
897 | andl holdd, %eax // (unsigned)hold & ((1U << op) - 1) | |
898 | addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1); | |
899 | movl %eax, dist // save a copy of dist in stack | |
900 | ||
901 | #ifdef INFLATE_STRICT | |
902 | cmp %eax, dmax // dmax vs dist | |
903 | jb L_invalid_distance_too_far_back // if (dmax < dist) break for invalid distance too far back | |
904 | #endif | |
905 | ||
906 | shrq %cl, hold // hold >>= op; | |
907 | subl %esi, bits // bits -= op; | |
908 | movl out_d, %esi // out | |
909 | movl out_d, %eax // out | |
910 | subl beg, %eax // op = out - beg | |
911 | cmpl %eax, dist // dist vs op, /* see if copy from window */ | |
912 | jbe L_copy_direct_from_output // if (dist <= op) branch to copy direct from output | |
913 | ||
914 | L_distance_back_in_window: | |
915 | ||
916 | movl dist, %edx // dist | |
917 | subl %eax, %edx // op = dist - op; /* distance back in window */ | |
918 | ||
919 | cmpl %edx, whave // whave vs op | |
920 | jb L_invalid_distance_too_far_back // if (op > whave), break for invalid distance too far back | |
921 | ||
922 | testl write, write // if (write!=0) | |
923 | jne L_wrap_around_window // branch to wrap around window | |
924 | ||
925 | L_very_common_case: | |
926 | ||
927 | movl wsize, %eax // wsize | |
928 | subl %edx, %eax // wsize - op | |
929 | movq window, from // from = window - OFF; | |
930 | addq %rax, from // from += wsize - op; | |
931 | ||
932 | movl %edx, %esi // op | |
933 | cmpl %edx, len // len vs op | |
934 | ja L_some_from_window // if (len > op), branch for aligned code block L_some_from_window | |
935 | L38: | |
936 | subl $3, len // pre-decrement len by 3 | |
937 | jge 0f // if len >= 3, branch to the aligned code block | |
938 | 1: addl $3, len // post-increment len by 3 | |
939 | je L_do_while_loop_check // if (len==0) break to L_do_while_loop_check | |
940 | movzbl (from), %eax // *from | |
941 | movb %al, (out) // *out | |
942 | incq out // out++ | |
943 | cmpl $2, len // len vs 2 | |
944 | jne L_do_while_loop_check // if len!=2 break to L_do_while_loop_check | |
945 | movzbl 1(from), %eax // *from | |
946 | movb %al, (out) // *out | |
947 | incq out // out++ | |
948 | jmp L_do_while_loop_check // break to L_do_while_loop_check | |
949 | ||
950 | .align 4,0x90 | |
951 | 0: // do { | |
952 | movzbl (from), %eax // *from | |
953 | movb %al, (out) // *out | |
954 | movzbl 1(from), %eax // *from | |
955 | movb %al, 1(out) // *out | |
956 | movzbl 2(from), %eax // *from | |
957 | movb %al, 2(out) // *out | |
958 | addq $3, out // out += 3 | |
959 | addq $3, from // from += 3 | |
960 | subl $3, len // len -= 3 | |
961 | jge 0b // } while (len>=0); | |
962 | jmp 1b // branch back to the possibly unaligned code | |
963 | ||
964 | .align 4,0x90 | |
965 | L_end_of_block: | |
966 | andl $32, %ecx // op & 32 | |
967 | jne L101 // if (op&32) branch to end-of-block break | |
968 | leaq LC2(%rip), from | |
969 | movq from, 48(strm) // state->mode | |
970 | movl $27, (state) // state->mode = BAD; | |
971 | movl out_d, %esi | |
972 | ||
973 | L34: | |
974 | movl bits, %eax // bits | |
975 | shrl $3, %eax // len = bits >> 3; | |
976 | mov %eax, %edx // len | |
977 | subq %rdx, in // in -= len | |
978 | sall $3, %eax // len << 3 | |
979 | movl bits, %ecx // bits | |
980 | subl %eax, %ecx // bits -= len << 3 | |
981 | movq in, (strm) // strm->next_in = in + OFF; | |
982 | movq out, 24(strm) // strm->next_out = out + OFF; | |
983 | cmpq in, last // last vs in | |
984 | jbe L67 // if (last <= in) branch to L67 and return to L69 | |
985 | movl last, %eax // last | |
986 | addl $5, %eax // last + 5 | |
987 | subl in_d, %eax // 5 + last - in | |
988 | L69: | |
989 | movl %eax, 8(strm) // update strm->avail_in | |
990 | ||
991 | cmpq end, out // out vs end | |
992 | jae L70 // if out<=end branch to L70 and return to L72 | |
993 | movl end, %eax // end | |
994 | addl $257, %eax // 257 + end | |
995 | subl %esi, %eax // 257 + end - out; | |
996 | L72: | |
997 | movl %eax, 32(strm) // update strm->avail_out | |
998 | ||
999 | movl $1, %eax // 1 | |
1000 | sall %cl, %eax // 1 << bits | |
1001 | decl %eax // (1U << bits) - 1 | |
1002 | andq hold, %rax // hold &= (1U << bits) - 1; | |
1003 | movq %rax, 72(state) // state->hold = hold; | |
1004 | movl %ecx, 80(state) // state->bits = bits; | |
1005 | ||
1006 | // clear stack memory for local variables | |
1007 | addq $88, %rsp | |
1008 | ||
1009 | // restore registers from stack | |
1010 | popq %rbx | |
1011 | popq %r12 | |
1012 | popq %r13 | |
1013 | popq %r14 | |
1014 | popq %r15 | |
1015 | ||
1016 | // return to caller | |
1017 | leave | |
1018 | ret | |
1019 | ||
1020 | .align 4,0x90 | |
1021 | L99: | |
1022 | leal 8(bits), %esi // esi = bits+8 | |
1023 | movzbl (in), %edx // 1st *in | |
1024 | movl bits, %ecx // cl = bits | |
1025 | salq %cl, %rdx // 1st *in << 8 | |
1026 | addq %rdx, hold // 1st hold += (unsigned long)(PUP(in)) << bits; | |
1027 | movzbl 1(in), %eax // 2nd *in | |
1028 | movl %esi, %ecx // cl = bits + 8 | |
1029 | salq %cl, %rax // 2nd *in << bits+8 | |
1030 | addq %rax, hold // 2nd hold += (unsigned long)(PUP(in)) << bits; | |
1031 | addq $2, in // in += 2 | |
1032 | addl $16, bits // bits += 16 | |
1033 | jmp L19 | |
1034 | ||
1035 | L101: | |
1036 | movl $11, (state) | |
1037 | movl out_d, %esi | |
1038 | jmp L34 | |
1039 | .align 4,0x90 | |
1040 | L70: | |
1041 | movl end, %eax // end | |
1042 | subl %esi, %eax // end - out | |
1043 | addl $257, %eax // 257 + end - out | |
1044 | jmp L72 | |
1045 | .align 4,0x90 | |
1046 | L67: | |
1047 | movl last, %eax // last | |
1048 | subl in_d, %eax // last - in | |
1049 | addl $5, %eax // 5 + last - in | |
1050 | jmp L69 | |
1051 | ||
1052 | ||
1053 | .align 4,0x90 | |
1054 | ||
1055 | // stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance | |
1056 | .byte 0 | |
1057 | .byte 0 | |
1058 | .byte 0 | |
1059 | .byte 0 | |
1060 | L_copy_direct_from_output: | |
1061 | mov dist, %eax // dist | |
1062 | movq out, %rdx // out | |
1063 | subq %rax, %rdx // from = out - dist; | |
1064 | subl $3, len // pre-decrement len by 3 | |
1065 | // do { | |
1066 | 0: movzbl (%rdx), %eax // *from | |
1067 | movb %al, (out) // *out | |
1068 | movzbl 1(%rdx), %eax // *from | |
1069 | movb %al, 1(out) // *out | |
1070 | movzbl 2(%rdx), %eax // *from | |
1071 | movb %al, 2(out) // *out | |
1072 | addq $3, out // out+=3 | |
1073 | addq $3, %rdx // from+=3 | |
1074 | subl $3, len // len-=3 | |
1075 | jge 0b // } while (len>=0); | |
1076 | 1: addl $3, len // post-increment len by 3 | |
1077 | je L_do_while_loop_check // if len==0, branch to do_while_loop_check | |
1078 | ||
1079 | movzbl (%rdx), %eax // *from | |
1080 | movb %al, (out) // *out | |
1081 | incq out // out++ | |
1082 | cmpl $2, len // len == 2 ? | |
1083 | jne L_do_while_loop_check // if len==1, branch to do_while_loop_check | |
1084 | ||
1085 | movzbl 1(%rdx), %eax // *from | |
1086 | movb %al, (out) // *out | |
1087 | incq out // out++ | |
1088 | jmp L_do_while_loop_check // branch to do_while_loop_check | |
1089 | ||
1090 | .align 4,0x90 | |
1091 | L_some_from_window: // from : from, out, %esi/%edx = op | |
1092 | // do { | |
1093 | movzbl (from), %eax // *from | |
1094 | movb %al, (out) // *out | |
1095 | incq from // from++ | |
1096 | incq out // out++ | |
1097 | decl %esi // --op | |
1098 | jne L_some_from_window // } while (op); | |
1099 | subl %edx, len // len -= op; | |
1100 | mov dist, %eax // dist | |
1101 | movq out, from // out | |
1102 | subq %rax, from // from = out - dist; | |
1103 | jmp L38 // copy from output | |
1104 | ||
1105 | .align 4,0x90 | |
1106 | L_wrap_around_window: | |
1107 | cmpl %edx, write // write vs op | |
1108 | jae L_contiguous_in_window // if (write >= op) branch to contiguous in window | |
1109 | movl wsize_write, %eax // wsize+write | |
1110 | subl %edx, %eax // wsize+write-op | |
1111 | movq window, from // from = window - OFF | |
1112 | addq %rax, from // from += wsize+write-op | |
1113 | subl write, %edx // op -= write | |
1114 | cmpl %edx, len // len vs op | |
1115 | jbe L38 // if (len<=op) branch to copy from output | |
1116 | ||
1117 | subl %edx, len // len -= op; | |
1118 | 0: // do { | |
1119 | movzbl (from), %eax // *from | |
1120 | movb %al, (out) // *out | |
1121 | incq from // from++ | |
1122 | incq out // out++ | |
1123 | decl %edx // op-- | |
1124 | jne 0b // } while (op); | |
1125 | movq window, from | |
1126 | ||
1127 | cmpl len, write // write vs len | |
1128 | jae L38 // if (write >= len) branch to copy from output | |
1129 | movl write, %esi // op = write | |
1130 | subl write, len // len -= op | |
1131 | 1: // do { | |
1132 | movzbl (from), %eax // *from | |
1133 | movb %al, (out) // *out | |
1134 | incq from // from++ | |
1135 | incq out // out++ | |
1136 | decl %esi // op-- | |
1137 | jne 1b // } while (op); | |
1138 | mov dist, %eax // dist | |
1139 | movq out, from // out | |
1140 | subq %rax, from // from = out - dist; | |
1141 | jmp L38 | |
1142 | ||
1143 | .align 4,0x90 | |
1144 | L_contiguous_in_window: | |
1145 | movl write, %eax // write | |
1146 | subl %edx, %eax // write - op | |
1147 | movq window, from // from = window - OFF | |
1148 | addq %rax, from // from += write - op | |
1149 | cmpl %edx, len // len vs op | |
1150 | jbe L38 // if (len <= op) branch to copy from output | |
1151 | subl %edx, len // len -= op; | |
1152 | 2: // do { | |
1153 | movzbl (from), %eax // *from | |
1154 | movb %al, (out) // *out | |
1155 | incq from // from++ | |
1156 | incq out // out++ | |
1157 | decl %edx // op-- | |
1158 | jne 2b // } while (op); | |
1159 | ||
1160 | mov dist, %eax // dist | |
1161 | movq out, from // out | |
1162 | subq %rax, from // from = out - dist; | |
1163 | jmp L38 // copy from output | |
1164 | ||
1165 | .align 4,0x90 | |
1166 | L_invalid_distance_code: | |
1167 | leaq LC1(%rip), %rdx | |
1168 | movq %rdx, 48(strm) | |
1169 | movl $27, (state) | |
1170 | movl out_d, %esi | |
1171 | jmp L34 | |
1172 | ||
1173 | L_invalid_distance_too_far_back: | |
1174 | leaq LC0(%rip), %rbx | |
1175 | movq %rbx, 48(strm) // error message | |
1176 | movl $27, (state) // state->mode = BAD | |
1177 | jmp L34 | |
1178 | ||
1179 | #endif |