]> git.saurik.com Git - apple/xnu.git/blame - libkern/zlib/intel/inffastS.s
xnu-1699.24.23.tar.gz
[apple/xnu.git] / libkern / zlib / intel / inffastS.s
CommitLineData
6d2010ae
A
1#if (defined __i386__)
2
3/* this assembly was 1st compiled from inffast.c (assuming POSTINC defined, OFF=0) and then hand optimized */
4
5 .cstring
6LC0:
7 .ascii "invalid distance too far back\0"
8LC1:
9 .ascii "invalid distance code\0"
10LC2:
11 .ascii "invalid literal/length code\0"
12 .text
13 .align 4,0x90
14
15
16#ifdef INFLATE_STRICT
17 .byte 0
18 .byte 0
19 .byte 0
20 .byte 0
21 .byte 0
22 .byte 0
23 .byte 0
24 .byte 0
25 .byte 0
26 .byte 0
27#endif
28.globl _inflate_fast
29_inflate_fast:
30
31 // set up ebp to refer to arguments strm and start
32 pushl %ebp
33 movl %esp, %ebp
34
35 // push edi/esi/ebx into stack
36 pushl %edi
37 pushl %esi
38 pushl %ebx
39
40 // allocate for local variables 92-12=80, + 12 to align %esp to 16-byte boundary
41 subl $92, %esp
42 movl 8(%ebp), %ebx
43
44 /* definitions to help code readability */
45
46 #define bits %edi
47 #define strm %ebx
48 #define state 28(strm) // state = (struct inflate_state FAR *)strm->state;
49 #define in -84(%ebp) // in = strm->next_in - OFF; OFF=0
50 #define last -80(%ebp) // last = in + (strm->avail_in - 5);
51 #define out -28(%ebp) // out = strm->next_out - OFF;
52 #define beg -76(%ebp) // beg = out - (start - strm->avail_out);
53 #define end -72(%ebp) // end = out + (strm->avail_out - 257);
54 #define wsize -68(%ebp) // wsize = state->wsize;
55 #define whave -64(%ebp) // whave = state->whave;
56 #define write -60(%ebp) // write = state->write;
57 #define window -56(%ebp) // window = state->window;
58 #define hold -52(%ebp) // hold = state->hold;
59 #define lcode -48(%ebp) // lcode = state->lencode;
60 #define dcode -44(%ebp) // dcode = state->distcode;
61 #define lmask -40(%ebp) // lmask = (1U << state->lenbits) - 1;
62 #define dmask -36(%ebp) // dmask = (1U << state->distbits) - 1;
63 #define len -32(%ebp)
64 #define dmax -20(%ebp)
65 #define dist -16(%ebp) // dist
66 #define write_wsize -24(%ebp) // write+wsize
67 #define write_1 -88(%ebp) // write-1
68 #define op -92(%ebp) // op
69
70 movl (strm), %eax // strm->next_in
71 movl %eax, in // in = strm->next_in - OFF; OFF=0
72
73 subl $5, %eax // in - 5;
74 movl 4(strm), %ecx // strm->avail_in
75 addl %ecx, %eax // in + (strm->avail_in - 5);
76 movl %eax, last // last = in + (strm->avail_in - 5);
77
78 movl 12(strm), %esi // strm->next_out
79 movl %esi, out // out = strm->next_out - OFF;
80
81 movl 16(strm), %ecx // strm->avail_out
82 movl %esi, %eax // out
83 subl 12(%ebp), %eax // out - start
84 addl %ecx, %eax // out - (start - strm->avail_out);
85 movl %eax, beg // beg = out - (start - strm->avail_out);
86
87 leal -257(%esi,%ecx), %ecx // out + (strm->avail_out - 257);
88 movl %ecx, end // end = out + (strm->avail_out - 257);
89
90 movl state, %edx
91
92#ifdef INFLATE_STRICT
93 movl 20(%edx), %ecx // state->dmax
94 movl %ecx, dmax // dmax = state->dmax;
95#endif
96
97 movl 40(%edx), %ecx // state->wsize
98 movl %ecx, wsize // wsize = state->wsize;
99
100 movl 44(%edx), %ecx // state->whave
101 movl %ecx, whave // whave = state->whave;
102
103 movl 48(%edx), %esi // state->write
104 movl %esi, write // write = state->write;
105
106 movl 52(%edx), %eax // state->window
107 movl %eax, window // window = state->window;
108
109
110 movl 56(%edx), %ecx // state->hold
111 movl %ecx, hold // hold = state->hold
112
113 movl 60(%edx), bits // bits = state->bits;
114
115 movl 76(%edx), %esi // state->lencode
116 movl %esi, lcode // lcode = state->lencode;
117
118 movl 80(%edx), %eax // state->distcode
119 movl %eax, dcode // dcode = state->distcode;
120
121 movl 84(%edx), %ecx // state->lenbits
122 movl $1, %eax
123 movl %eax, %esi // a copy of 1
124 sall %cl, %esi // 1 << state->lenbits
125 decl %esi // (1U << state->lenbits) - 1;
126 movl %esi, lmask // lmask = (1U << state->lenbits) - 1;
127
128 movl 88(%edx), %ecx // state->distbits
129 sall %cl, %eax // 1 << state->distbits
130 decl %eax // (1U << state->distbits) - 1;
131 movl %eax, dmask // dmask = (1U << state->distbits) - 1;
132
133
134 // these 2 might be used often, precomputed and saved in stack
135 movl write, %eax
136 addl wsize, %eax
137 movl %eax, write_wsize // write+wsize
138
139 movl write, %edx
140 decl %edx
141 movl %edx, write_1 // write-1
142
143
144L_do_while_loop: // do {
145
146 cmpl $15, bits
147 jae bits_ge_15 // if (bits < 15) {
148#if 0
149 leal 8(bits), %esi // esi = bits+8
150 movl in, %eax // eax = in
151 movzbl (%eax), %edx // edx = *in++
152 movl bits, %ecx // cl = bits
153 sall %cl, %edx // 1st *in << bits
154 addl hold, %edx // hold += 1st *in << bits
155 movzbl 1(%eax), %eax // 2nd *in
156 movl %esi, %ecx // cl = bits+8
157 sall %cl, %eax // 2nd *in << (bits+8)
158 addl %eax, %edx // hold += 2nd *in << (bits+8)
159 movl %edx, hold // update hold
160 addl $2, in // in += 2
161 addl $16, bits // bits += 16;
162#else
163 /* from simulation, this code segment performs better than the other case
164 possibly, we are more often hit with aligned memory access */
165 movl in, %ecx // unsigned short *inp = (unsigned short *) (in+OFF);
166 movzwl (%ecx), %eax // *((unsigned short *) in);
167 movl bits, %ecx // bits
168 sall %cl, %eax // *((unsigned short *) in) << bits
169 addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits;
170 addl $2, in // in += 2;
171 addl $16, bits // bits += 16;
172#endif
173
174bits_ge_15: // } /* bits < 15 */
175
176 movl hold, %eax // hold
177 andl lmask, %eax // hold & lmask;
178 movl lcode, %esi // lcode[] : 4-byte aligned
179 movl (%esi,%eax,4), %eax // this = lcode[hold&lmask];
180 jmp dolen
181 .align 4,0x90
182op_nonzero:
183 movzbl %al, %ecx // a copy of op to cl
184 testb $16, %cl // if op&16
185 jne Llength_base // branch to length_base
186
187 testb $64, %cl // elif op&64
188 jne length_2nd_level_else // branch to 2nd level length code else conditions
189
190 // 2nd level length code
191
192 movl $1, %eax
193 sall %cl, %eax // 1 << op
194 decl %eax // ((1<<op) - 1)
195 andl hold, %eax // hold & ((1U << op) - 1)
196 movzwl %si, %ecx // this.val
197 addl %ecx, %eax // this.val + (hold & ((1U << op) - 1))
198
199 movl lcode, %ecx // lcode[] : 4-byte aligned
200 movl (%ecx,%eax,4), %eax // this = lcode[this.val + (hold & ((1U << op) - 1))];
201 // goto dolen (compiler rearranged the order of code)
202dolen:
203 movl %eax, %esi // make a copy of this (val 16-bit, bits 8-bit, op 8-bit)
204 shrl $16, %esi // %esi = this.val;
205 movzbl %ah, %ecx // op = (unsigned)(this.bits);
206 shrl %cl, hold // hold >>= op;
207 subl %ecx, bits // bits -= op;
208 testb %al, %al // op = (unsigned)(this.op);
209 jne op_nonzero // if op!=0, branch to op_nonzero
210
211 movl %esi, %ecx // this.val;
212 movl out, %eax // out
213 movb %cl, (%eax) // PUP(out) = (unsigned char)(this.val);
214 incl %eax // out++;
215 movl %eax, out // save out
216
217L_tst_do_while_loop_end:
218 movl last, %eax // last
219 cmpl %eax, in // in vs last
220 jae return_unused_bytes // branch to return_unused_bytes if in >= last
221 movl end, %edx // end
222 cmpl %edx, out // out vs end
223 jb L_do_while_loop // branch to do loop if out < end
224
225return_unused_bytes:
226
227 movl bits, %eax // bits
228 shrl $3, %eax // len = bits >> 3
229 movl in, %edx // in
230 subl %eax, %edx // in -= len
231 sall $3, %eax // len << 3
232 movl bits, %ecx // bits
233 subl %eax, %ecx // bits -= len << 3
234
235 movl %edx, (strm) // strm->next_in = in + OFF;
236 movl out, %eax
237 movl %eax, 12(strm) // strm->next_out = out + OFF;
238
239 cmpl %edx, last // last vs in
240 jbe L67 // if (last <= in) branch to L67 and return to L69
241 movl last, %eax // last
242 addl $5, %eax // 5 + last
243 subl %edx, %eax // 5 + last - in
244L69:
245 movl %eax, 4(strm) // update strm->avail_in
246
247 movl end, %eax
248 cmpl %eax, out // out vs end
249 jae L70 // if (out>=end) branch to L70, and return to L72
250 addl $257, %eax // 257 + end
251 subl out, %eax // 257 + end - out
252L72:
253 movl %eax, 16(strm) // update strm->avail_out
254
255 movl $1, %eax
256 sall %cl, %eax // 1 << bits
257 decl %eax // (1 << bits) -1
258 andl hold, %eax // hold &= (1U << bits) - 1;
259 movl state, %esi
260 movl %eax, 56(%esi) // state->hold = hold;
261 movl %ecx, 60(%esi) // state->bits = bits;
262
263 addl $92, %esp // pop out local from stack
264
265 // restore saved registers and return
266 popl %ebx
267 popl %esi
268 popl %edi
269 leave
270 ret
271
272 // this code segment is branched in from op_nonzero, with op in cl and this.value in esi
273Llength_base:
274 movzwl %si, %esi // this instruction might not be needed, pad here to give better performance
275 movl %esi, len // len = (unsigned)(this.val);
276
277 movl %ecx, %esi // leave a copy of op at ecx
278 andl $15, %esi // op&=15;
279 je Lop_is_zero // if (op) {
280 cmpl bits, %esi // op vs bits
281 jbe Lop_be_bits // if (bits < op) {
282 movl in, %edx // in
283 movzbl (%edx), %eax // *in
284 movl bits, %ecx // bits
285 sall %cl, %eax // *in << bits
286 addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits;
287 incl %edx // in++
288 movl %edx, in // update in
289 addl $8, bits // bits += 8
290Lop_be_bits: // }
291 movl $1, %eax // 1
292 movl %esi, %ecx // op
293 sall %cl, %eax // 1 << op
294 decl %eax // (1<<op)-1
295 andl hold, %eax // hold & ((1U << op) - 1)
296 addl %eax, len // len += (unsigned)hold & ((1U << op) - 1);
297 shrl %cl, hold // hold >>= op;
298 subl %esi, bits // bits -= op;
299Lop_is_zero: // }
300 cmpl $14, bits // if (bits < 15) {
301 jbe bits_le_14 // branch to refill 16-bit into hold, and branch back to next
302L19: // }
303 movl hold, %eax // hold
304 andl dmask, %eax // hold&dmask
305 movl dcode, %esi // dcode[] : 4-byte aligned
306 movl (%esi,%eax,4), %eax // this = dcode[hold & dmask];
307 jmp dodist
308
309Lop_16_zero:
310 testb $64, %cl // op&64
311 jne Linvalid_distance_code // if (op&64)!=0, branch to invalid distance code
312 movl $1, %eax // 1
313 sall %cl, %eax // (1<<op)
314 decl %eax // (1<<op)-1
315 andl hold, %eax // (hold & ((1U << op) - 1))
316 movzwl %dx, %edx // this.val
317 addl %edx, %eax // this.val + (hold & ((1U << op) - 1))
318 movl dcode, %edx // dcode[] : 4 byte aligned
319 movl (%edx,%eax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))];
320dodist:
321 movl %eax, %edx // this : (val 16-bit, bits 8-bit, op 8-bit)
322 shrl $16, %edx // edx = this.val
323 movzbl %ah, %ecx // op = (unsigned)(this.bits);
324 shrl %cl, hold // hold >>= op;
325 subl %ecx, bits // bits -= op;
326 movzbl %al, %ecx // op = (unsigned)(this.op);
327 testb $16, %cl // op & 16
328 je Lop_16_zero // if (op&16)==0 goto test op&64
329
330Ldistance_base: // if (op&16) { /* distance base */
331 andl $15, %ecx // op &= 15; edx = dist = this.val;
332 movl %ecx, op // save a copy of op
333 cmpl bits, %ecx // op vs bits
334 jbe 0f // if (bits < op) {
335 movl in, %ecx // in
336 movzbl (%ecx), %eax // *in
337 movl bits, %ecx // bits
338 sall %cl, %eax // *in << bits
339 addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits;
340 incl in // in++
341 addl $8, bits // bits += 8
342 cmpl bits, op // op vs bits
343 jbe 0f // if (bits < op) {
344 movl in, %esi // i
345 movzbl (%esi), %eax // *in
346 movl bits, %ecx // cl = bits
347 sall %cl, %eax // *in << bits
348 addl %eax, hold // hold += (unsigned long)(PUP(in)) << bits;
349 incl %esi // in++
350 movl %esi, in // update in
351 addl $8, bits // bits += 8
3520: // } }
353
354 movzwl %dx, %edx // dist = (unsigned)(this.val);
355 movl $1, %eax // 1
356 movzbl op, %ecx // cl = op
357 sall %cl, %eax // 1 << op
358 decl %eax // ((1U << op) - 1)
359 andl hold, %eax // (unsigned)hold & ((1U << op) - 1)
360 addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1);
361
362#ifdef INFLATE_STRICT
363
364 cmpl dmax, %eax // dist vs dmax
365 ja Linvalid_distance_too_far_back // if (dist > dmax) break for invalid distance too far back
366
367#endif
368
369 movl %eax, dist // save a copy of dist in stack
370 shrl %cl, hold // hold >>= op;
371 subl %ecx, bits // bits -= op;
372
373 movl out, %eax
374 subl beg, %eax // eax = op = out - beg
375 cmpl %eax, dist // dist vs op
376 jbe Lcopy_direct_from_output // if (dist <= op) branch to copy direct from output
377
378 // if (dist > op) {
379 movl dist, %ecx // dist
380 subl %eax, %ecx // esi = op = dist - op;
381 cmpl %ecx, whave // whave vs op
382 jb Linvalid_distance_too_far_back // if (op > whave) break for error;
383
384 movl write, %edx
385 testl %edx, %edx
386 jne Lwrite_non_zero // if (write==0) {
387 movl wsize, %eax // wsize
388 subl %ecx, %eax // wsize-op
389 movl window, %esi // from=window-OFF
390 addl %eax, %esi // from += wsize-op
391 movl out, %edx // out
392 cmpl %ecx, len // len vs op
393 jbe L38 // if !(op < len) skip
394 subl %ecx, len // len - op
3950: // do {
396 movzbl (%esi), %eax //
397 movb %al, (%edx) //
398 incl %edx //
399 incl %esi // PUP(out) = PUP(from);
400 decl %ecx // --op;
401 jne 0b // } while (op);
402
403 movl %edx, out // update out
404 movl %edx, %esi // out
405 subl dist, %esi // esi = from = out - dist;
406
407L38: /* copy from output */
408
409 // while (len > 2) {
410 // PUP(out) = PUP(from);
411 // PUP(out) = PUP(from);
412 // PUP(out) = PUP(from);
413 // len -= 3;
414 // }
415 // if (len) {
416 // PUP(out) = PUP(from);
417 // if (len > 1)
418 // PUP(out) = PUP(from);
419 // }
420
421 movl len, %ecx // len
422 movl out, %edx // out
423 subl $3, %ecx // pre-decrement len by 3
424 jl 1f // if len < 3, branch to 1f for remaining processing
4250: // while (len>2) {
426 movzbl (%esi), %eax
427 movb %al, (%edx) // PUP(out) = PUP(from);
428 movzbl 1(%esi), %eax
429 movb %al, 1(%edx) // PUP(out) = PUP(from);
430 movzbl 2(%esi), %eax
431 movb %al, 2(%edx) // PUP(out) = PUP(from);
432 addl $3, %esi // from += 3;
433 addl $3, %edx // out += 3;
434 subl $3, %ecx // len -= 3;
435 jge 0b // }
436 movl %edx, out // update out, in case len == 0
4371:
438 addl $3, %ecx // post-increment len by 3
439 je L_tst_do_while_loop_end // if (len) {
440 movzbl (%esi), %eax //
441 movb %al, (%edx) // PUP(out) = PUP(from);
442 incl %edx // out++
443 movl %edx, out // update out, in case len == 1
444 cmpl $2, %ecx //
445 jne L_tst_do_while_loop_end // if len==1, break
446 movzbl 1(%esi), %eax
447 movb %al, (%edx) // PUP(out) = PUP(from);
448 incl %edx // out++
449 movl %edx, out // update out
450 jmp L_tst_do_while_loop_end // }
451
452 .align 4,0x90
453length_2nd_level_else:
454 andl $32, %ecx // test end-of-block
455 je invalid_literal_length_code // if (op&32)==0, branch for invalid literal/length code break
456 movl state, %edx // if (op&32), end-of-block is detected
457 movl $11, (%edx) // state->mode = TYPE
458 jmp return_unused_bytes
459
460L70:
461 movl out, %edx // out
462 subl %edx, end // (end-out)
463 movl end, %esi // %esi = (end-out) = -(out - end);
464 leal 257(%esi), %eax // %eax = 257 + %esi = 257 - (out -end)
465 jmp L72 // return to update state and return
466
467L67: // %edx = in, to return 5 - (in - last) in %eax
468 subl %edx, last // last - in
469 movl last, %edx // %edx = last - in = - (in - last);
470 leal 5(%edx), %eax // %eax = 5 + %edx = 5 - (in - last);
471 jmp L69 // return to update state and return
472
473bits_le_14:
474#if 1
475 leal 8(bits), %esi // esi = bits+8
476 movl in, %eax // eax = in
477 movzbl (%eax), %edx // edx = *in++
478 movl bits, %ecx // cl = bits
479 sall %cl, %edx // 1st *in << bits
480 addl hold, %edx // hold += 1st *in << bits
481 movzbl 1(%eax), %eax // 2nd *in
482 movl %esi, %ecx // cl = bits+8
483 sall %cl, %eax // 2nd *in << (bits+8)
484 addl %eax, %edx // hold += 2nd *in << (bits+8)
485 movl %edx, hold // update hold
486 addl $2, in // in += 2
487 addl $16, bits // bits += 16;
488 jmp L19
489#else
490 /* this code segment does not run as fast as the other original code segment, possibly the processor
491 need extra time to handle unaligned short access */
492 movl in, %edx // unsigned short *inp = (unsigned short *) (in+OFF);
493 movzwl (%edx), %eax // *((unsigned short *) in);
494 movl bits, %ecx // bits
495 sall %cl, %eax // *((unsigned short *) in) << bits
496 addl %eax, hold // hold += (unsigned long) *((unsigned short *) in) << bits;
497 addl $2, %edx // in += 2;
498 addl $16, %ecx // bits += 16;
499 movl %edx, in
500 movl %ecx, bits
501 jmp L19
502#endif
503invalid_literal_length_code:
504 call 0f
5050: popl %eax
506 leal LC2-0b(%eax), %eax
507 movl %eax, 24(strm)
508 movl state, %esi
509 movl $27, (%esi)
510 jmp return_unused_bytes
511Linvalid_distance_code:
512 call 0f
5130: popl %eax
514 leal LC1-0b(%eax), %eax
515 movl %eax, 24(strm)
516 movl state, %eax
517 movl $27, (%eax)
518 jmp return_unused_bytes
519
520#ifdef INFLATE_STRICT
521 .align 4,0x90
522 .byte 0
523 .byte 0
524 .byte 0
525 .byte 0
526 .byte 0
527 .byte 0
528 .byte 0
529 .byte 0
530 .byte 0
531#endif
532Lcopy_direct_from_output:
533 movl out, %edx // out
534 subl dist, %edx // from = out - dist
535 movl out, %ecx // out
536 movl len, %esi // len
537 subl $3, %esi // pre-decement len by 3
5380: // do {
539 movzbl (%edx), %eax
540 movb %al, (%ecx) // PUP(out) = PUP(from);
541 movzbl 1(%edx), %eax
542 movb %al, 1(%ecx) // PUP(out) = PUP(from);
543 movzbl 2(%edx), %eax
544 movb %al, 2(%ecx) // PUP(out) = PUP(from);
545 addl $3, %edx // from += 3
546 addl $3, %ecx // out += 3
547 subl $3, %esi // len -= 3
548 jge 0b // } while (len > 2);
549 movl %ecx, out // update out in case len == 0
550 addl $3, %esi // post-increment len by 3
551 je L_tst_do_while_loop_end // if (len) {
552 movzbl (%edx), %eax
553 movb %al, (%ecx) // PUP(out) = PUP(from);
554 incl %ecx
555 movl %ecx, out // out++
556 cmpl $2, %esi //
557 jne L_tst_do_while_loop_end // if (len>2)
558 movzbl 1(%edx), %eax
559 movb %al, (%ecx) // PUP(out) = PUP(from);
560 incl %ecx
561 movl %ecx, out // out++
562 jmp L_tst_do_while_loop_end // }
563
564 .align 4,0x90
565Lwrite_non_zero: // %edx = write, %ecx = op
566 movl window, %esi // from = window - OFF;
567 cmp %ecx, %edx // write vs op, test for wrap around window or contiguous in window
568 jae Lcontiguous_in_window // if (write >= op) branch to contiguous in window
569
570Lwrap_around_window: // wrap around window
571 addl write_wsize, %esi // from += write+wsize
572 subl %ecx, %esi // from += wsize + write - op;
573 subl %edx, %ecx // op -= write
574 cmpl %ecx, len // len vs op
575 jbe L38 // if (len <= op) break to copy from output
576 subl %ecx, len // len -= op;
577 movl out, %edx // out
5780: // do {
579 movzbl (%esi), %eax // *from
580 movb %al, (%edx) // *out
581 incl %esi // from++
582 incl %edx // out++
583 decl %ecx // --op
584 jne 0b // } while (op);
585
586 movl %edx, out // save out in case we need to break to L38
587 movl window, %esi // from = window - OFF;
588 movl len, %eax // len
589 cmpl %eax, write // write vs len
590 jae L38 // if (write >= len) break to L38
591
592 movl write, %ecx // op = write
593 subl %ecx, len // len -= op;
5940: // do {
595 movzbl (%esi), %eax // *from
596 movb %al, (%edx) // *out
597 incl %esi // from++
598 incl %edx // out++
599 decl %ecx // --op
600 jne 0b // } while (op);
601
602 movl %edx, %esi // from = out
603 movl %edx, out // save a copy of out
604 subl dist, %esi // from = out - dist;
605 jmp L38 // break to copy from output
606
607Lcontiguous_in_window: // contiguous in window, edx = write, %ecx = op
608 subl %ecx, %edx // write - op
609 addl %edx, %esi // from += write - op;
610 cmpl %ecx, len // len vs op
611 jbe L38 // if (len <= op) break to copy from output
612 movl out, %edx // out
613 subl %ecx, len // len -= op;
614
6150: // do {
616 movzbl (%esi), %eax // *from
617 movb %al, (%edx) // *out
618 incl %esi // from++
619 incl %edx // out++
620 decl %ecx // op--
621 jne 0b // } while (op);
622
623 movl %edx, out // update out
624 movl %edx, %esi // from = out
625 subl dist, %esi // from = out - dist;
626 jmp L38
627
628Linvalid_distance_too_far_back:
629 call 0f
6300: popl %eax
631 leal LC0-0b(%eax), %eax
632 movl %eax, 24(strm)
633 movl state, %ecx
634 movl $27, (%ecx)
635 jmp return_unused_bytes
636
637#endif
638
639#if (defined __x86_64__)
640 .cstring
641LC0:
642 .ascii "invalid distance too far back\0"
643LC1:
644 .ascii "invalid distance code\0"
645LC2:
646 .ascii "invalid literal/length code\0"
647 .text
648 .align 4,0x90
649
650#ifdef INFLATE_STRICT
651 .byte 0
652 .byte 0
653 .byte 0
654 .byte 0
655 .byte 0
656 .byte 0
657 .byte 0
658 .byte 0
659 .byte 0
660 .byte 0
661 .byte 0
662 .byte 0
663#endif
664
665.globl _inflate_fast
666_inflate_fast:
667
668 // set up rbp
669 pushq %rbp
670 movq %rsp, %rbp
671
672 // save registers in stack
673 pushq %r15
674 pushq %r14
675 pushq %r13
676 pushq %r12
677 pushq %rbx
678
679 #define strm %r13
680 #define state %rdi
681 #define in %r12
682 #define in_d %r12d
683 #define out %r10
684 #define out_d %r10d
685 #define write %r15d
686 #define hold %r9
687 #define holdd %r9d
688 #define bits %r8d
689 #define lcode %r14
690 #define len %ebx
691 #define from %rcx
692 #define dmax %r11d
693
694 #define last -104(%rbp)
695 #define beg -96(%rbp)
696 #define end -88(%rbp)
697 #define wsize -80(%rbp)
698 #define whave -76(%rbp)
699 #define window -72(%rbp)
700 #define dcode -64(%rbp)
701 #define lmask -56(%rbp)
702 #define dmask -112(%rbp)
703 #define wsize_write -116(%rbp)
704 #define write_1 -128(%rbp)
705 #define dist -44(%rbp)
706
707 // reserve stack memory for local variables 128-40=88
708 subq $88, %rsp
709
710 movq %rdi, strm
711 movq 56(%rdi), state // state = (struct inflate_state FAR *)strm->state;
712 movq (strm), in // in = strm->next_in - OFF;
713 movl 8(strm), %eax // strm->avail_in
714 subl $5, %eax // (strm->avail_in - 5)
715 addq in, %rax // in + (strm->avail_in - 5)
716 movq %rax, last // last = in + (strm->avail_in - 5)
717 movq 24(strm), out // out = strm->next_out
718 movl 32(strm), %eax // strm->avail_out
719 subl %eax, %esi // (start - strm->avail_out);
720 movq out, %rdx // strm->next_out
721 subq %rsi, %rdx // out - (start - strm->avail_out);
722 movq %rdx, beg // beg = out - (start - strm->avail_out);
723 subl $257, %eax // (strm->avail_out - 257)
724 addq out, %rax // out + (strm->avail_out - 257);
725 movq %rax, end // end = out + (strm->avail_out - 257);
726
727#ifdef INFLATE_STRICT
728 movl 20(state), dmax // dmax = state->dmax;
729#endif
730
731 movl 52(state), %ecx // state->wsize
732 movl %ecx, wsize // wsize = state->wsize;
733 movl 56(state), %ebx // state->whave;
734 movl %ebx, whave // whave = state->whave;
735 movl 60(state), write // write = state->write;
736 movq 64(state), %rax // state->window
737 movq %rax, window // window = state->window;
738 movq 72(state), hold // hold = state->hold;
739 movl 80(state), bits // bits = state->bits;
740
741 movq 96(state), lcode // lcode = state->lencode;
742 movq 104(state), %rdx // state->distcode;
743 movq %rdx, dcode // dcode = state->distcode;
744
745 movl 116(state), %ecx // state->distbits
746 movl $1, %eax
747 movl %eax, %edx // 1
748 sall %cl, %edx // (1U << state->distbits)
749 movl 112(state), %ecx // state->lenbits
750 sall %cl, %eax // (1U << state->lenbits)
751 decl %eax // (1U << state->lenbits) - 1
752 movq %rax, lmask // lmask = (1U << state->lenbits) - 1
753 decl %edx // (1U << state->distbits) - 1
754 movq %rdx, dmask // dmask = (1U << state->distbits) - 1
755
756 movl wsize, %ecx // wsize
757 addl write, %ecx // wsize + write
758 movl %ecx, wsize_write // wsize_write = wsize + write
759
760 leal -1(%r15), %ebx // write - 1
761 movq %rbx, write_1 // write_1 = write - 1
762
763L_do_while_loop:
764 cmpl $14, bits // bits vs 14
765 ja 0f // if (bits < 15) {
766 movzwl (in), %eax // read 2 bytes from in
767 movl bits, %ecx // set up cl = bits
768 salq %cl, %rax // (*in) << bits
769 addq %rax, hold // hold += (*in) << bits
770 addq $2, in // in += 2
771 addl $16, bits // bits += 16
7720: // }
773 movq lmask, %rax // lmask
774 andq hold, %rax // hold & lmask
775 jmp 1f
776 .align 4,0x90
777Lop_nonzero:
778 movzbl %al, %ecx // op in al and cl
779 testb $16, %cl // check for length base processing (op&16)
780 jne L_length_base // if (op&16) branch to length base processing
781 testb $64, %cl // check for 2nd level length code (op&64==0)
782 jne L_end_of_block // if (op&64)!=0, branch for end-of-block processing
783
784 /* 2nd level length code : (op&64) == 0*/
785L_2nd_level_length_code:
786 movl $1, %eax // 1
787 sall %cl, %eax // 1 << op
788 decl %eax // ((1U << op) - 1)
789 andq hold, %rax // (hold & ((1U << op) - 1))
790 movzwl %dx, %edx
791 addq %rdx, %rax // this = lcode[this.val + (hold & ((1U << op) - 1))];
7921:
793 movl (lcode,%rax,4), %eax // this = lcode[hold & lmask];
794Ldolen:
795 movl %eax, %edx // a copy of this
796 shrl $16, %edx // edx = this.val;
797 movzbl %ah, %ecx // op = this.bits
798 shrq %cl, hold // hold >>= op;
799 subl %ecx, bits // bits -= op;
800 testb %al, %al // op = (unsigned)(this.op);
801 jne Lop_nonzero // if (op!-0) branch for copy operation
802L_literal:
803 movb %dl, (out) // *out = this.val
804 incq out // out ++
805L_do_while_loop_check:
806 cmpq last, in // in vs last
807 jae L_return_unused_byte // if in >= last, break to return unused byte processing
808 cmpq end, out // out vs end
809 jb L_do_while_loop // back to do_while_loop if out < end
810
811 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
812
813L_return_unused_byte:
814 movl out_d, %esi
815 jmp L34
816
817L_length_base: /* al = cl = op, edx = this.val, op&16 = 16 */
818 movzwl %dx, len // len = (unsigned)(this.val);
819 movl %ecx, %edx // op
820 andl $15, %edx // op &= 15;
821 je 1f // if (op) {
822 cmpl bits, %edx // op vs bits
823 jbe 0f // if (bits < op) {
824 movzbl (in), %eax // *in
825 movl bits, %ecx // cl = bits
826 salq %cl, %rax // *in << bits
827 addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits;
828 incq in // in++
829 addl $8, bits // bits += 8
8300: // }
831 movl $1, %eax // 1
832 movl %edx, %ecx // cl = op
833 sall %cl, %eax // 1 << op
834 decl %eax // (1 << op) - 1
835 andl holdd, %eax // (unsigned)hold & ((1U << op) - 1);
836 addl %eax, len // len += (unsigned)hold & ((1U << op) - 1);
837 shrq %cl, hold // hold >>= op;
838 subl %edx, bits // bits -= op;
8391: // }
840 cmpl $14, bits // bits vs 14
841 jbe L99 // if (bits < 15) go to loading to hold and return to L19
842L19: // }
843 movq dmask, %rax // dmask
844 andq hold, %rax // hold & dmask
845 movq dcode, %rdx // dcode[]
846 movl (%rdx,%rax,4), %eax // this = dcode[hold & dmask];
847 jmp L_dodist
848 .align 4,0x90
8490: // op&16 == 0, test (op&64)==0 for 2nd level distance code
850 testb $64, %cl // op&64
851 jne L_invalid_distance_code // if ((op&64)==0) { /* 2nd level distance code */
852 movl $1, %eax // 1
853 sall %cl, %eax // 1 << op
854 decl %eax // (1 << op) - 1
855 andq hold, %rax // (hold & ((1U << op) - 1))
856 movzwl %dx, %edx // this.val
857 addq %rdx, %rax // this.val + (hold & ((1U << op) - 1))
858 movq dcode, %rcx // dcode[]
859 movl (%rcx,%rax,4), %eax // this = dcode[this.val + (hold & ((1U << op) - 1))];
860L_dodist:
861 movl %eax, %edx // this
862 shrl $16, %edx // dist = (unsigned)(this.val);
863 movzbl %ah, %ecx // cl = op = this.bits
864 shrq %cl, hold // hold >>= op;
865 subl %ecx, bits // bits -= op;
866 movzbl %al, %ecx // op = (unsigned)(this.op);
867 testb $16, %cl // (op & 16) test for distance base
868 je 0b // if (op&16) == 0, branch to check for 2nd level distance code
869
870L_distance_base: /* distance base */
871
872 movl %ecx, %esi // op
873 andl $15, %esi // op&=15
874 cmpl bits, %esi // op vs bits
875 jbe 1f // if (bits < op) {
876 movzbl (in), %eax // *in
877 movl bits, %ecx // cl = bits
878 salq %cl, %rax // *in << bits
879 addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits;
880 incq in // in++
881 addl $8, bits // bits += 8
882 cmpl bits, %esi // op vs bits
883 jbe 1f // if (bits < op) {
884 movzbl (in), %eax // *in
885 movl bits, %ecx // cl = bits
886 salq %cl, %rax // *in << bits
887 addq %rax, hold // hold += (unsigned long)(PUP(in)) << bits;
888 incq in // in++
889 addl $8, bits // bits += 8
8901: // } }
891
892 movzwl %dx, %edx // dist
893 movl $1, %eax // 1
894 movl %esi, %ecx // cl = op
895 sall %cl, %eax // (1 << op)
896 decl %eax // (1 << op) - 1
897 andl holdd, %eax // (unsigned)hold & ((1U << op) - 1)
898 addl %edx, %eax // dist += (unsigned)hold & ((1U << op) - 1);
899 movl %eax, dist // save a copy of dist in stack
900
901#ifdef INFLATE_STRICT
902 cmp %eax, dmax // dmax vs dist
903 jb L_invalid_distance_too_far_back // if (dmax < dist) break for invalid distance too far back
904#endif
905
906 shrq %cl, hold // hold >>= op;
907 subl %esi, bits // bits -= op;
908 movl out_d, %esi // out
909 movl out_d, %eax // out
910 subl beg, %eax // op = out - beg
911 cmpl %eax, dist // dist vs op, /* see if copy from window */
912 jbe L_copy_direct_from_output // if (dist <= op) branch to copy direct from output
913
914L_distance_back_in_window:
915
916 movl dist, %edx // dist
917 subl %eax, %edx // op = dist - op; /* distance back in window */
918
919 cmpl %edx, whave // whave vs op
920 jb L_invalid_distance_too_far_back // if (op > whave), break for invalid distance too far back
921
922 testl write, write // if (write!=0)
923 jne L_wrap_around_window // branch to wrap around window
924
925L_very_common_case:
926
927 movl wsize, %eax // wsize
928 subl %edx, %eax // wsize - op
929 movq window, from // from = window - OFF;
930 addq %rax, from // from += wsize - op;
931
932 movl %edx, %esi // op
933 cmpl %edx, len // len vs op
934 ja L_some_from_window // if (len > op), branch for aligned code block L_some_from_window
935L38:
936 subl $3, len // pre-decrement len by 3
937 jge 0f // if len >= 3, branch to the aligned code block
9381: addl $3, len // post-increment len by 3
939 je L_do_while_loop_check // if (len==0) break to L_do_while_loop_check
940 movzbl (from), %eax // *from
941 movb %al, (out) // *out
942 incq out // out++
943 cmpl $2, len // len vs 2
944 jne L_do_while_loop_check // if len!=2 break to L_do_while_loop_check
945 movzbl 1(from), %eax // *from
946 movb %al, (out) // *out
947 incq out // out++
948 jmp L_do_while_loop_check // break to L_do_while_loop_check
949
950 .align 4,0x90
9510: // do {
952 movzbl (from), %eax // *from
953 movb %al, (out) // *out
954 movzbl 1(from), %eax // *from
955 movb %al, 1(out) // *out
956 movzbl 2(from), %eax // *from
957 movb %al, 2(out) // *out
958 addq $3, out // out += 3
959 addq $3, from // from += 3
960 subl $3, len // len -= 3
961 jge 0b // } while (len>=0);
962 jmp 1b // branch back to the possibly unaligned code
963
964 .align 4,0x90
965L_end_of_block:
966 andl $32, %ecx // op & 32
967 jne L101 // if (op&32) branch to end-of-block break
968 leaq LC2(%rip), from
969 movq from, 48(strm) // state->mode
970 movl $27, (state) // state->mode = BAD;
971 movl out_d, %esi
972
973L34:
974 movl bits, %eax // bits
975 shrl $3, %eax // len = bits >> 3;
976 mov %eax, %edx // len
977 subq %rdx, in // in -= len
978 sall $3, %eax // len << 3
979 movl bits, %ecx // bits
980 subl %eax, %ecx // bits -= len << 3
981 movq in, (strm) // strm->next_in = in + OFF;
982 movq out, 24(strm) // strm->next_out = out + OFF;
983 cmpq in, last // last vs in
984 jbe L67 // if (last <= in) branch to L67 and return to L69
985 movl last, %eax // last
986 addl $5, %eax // last + 5
987 subl in_d, %eax // 5 + last - in
988L69:
989 movl %eax, 8(strm) // update strm->avail_in
990
991 cmpq end, out // out vs end
992 jae L70 // if out<=end branch to L70 and return to L72
993 movl end, %eax // end
994 addl $257, %eax // 257 + end
995 subl %esi, %eax // 257 + end - out;
996L72:
997 movl %eax, 32(strm) // update strm->avail_out
998
999 movl $1, %eax // 1
1000 sall %cl, %eax // 1 << bits
1001 decl %eax // (1U << bits) - 1
1002 andq hold, %rax // hold &= (1U << bits) - 1;
1003 movq %rax, 72(state) // state->hold = hold;
1004 movl %ecx, 80(state) // state->bits = bits;
1005
1006 // clear stack memory for local variables
1007 addq $88, %rsp
1008
1009 // restore registers from stack
1010 popq %rbx
1011 popq %r12
1012 popq %r13
1013 popq %r14
1014 popq %r15
1015
1016 // return to caller
1017 leave
1018 ret
1019
1020 .align 4,0x90
1021L99:
1022 leal 8(bits), %esi // esi = bits+8
1023 movzbl (in), %edx // 1st *in
1024 movl bits, %ecx // cl = bits
1025 salq %cl, %rdx // 1st *in << 8
1026 addq %rdx, hold // 1st hold += (unsigned long)(PUP(in)) << bits;
1027 movzbl 1(in), %eax // 2nd *in
1028 movl %esi, %ecx // cl = bits + 8
1029 salq %cl, %rax // 2nd *in << bits+8
1030 addq %rax, hold // 2nd hold += (unsigned long)(PUP(in)) << bits;
1031 addq $2, in // in += 2
1032 addl $16, bits // bits += 16
1033 jmp L19
1034
1035L101:
1036 movl $11, (state)
1037 movl out_d, %esi
1038 jmp L34
1039 .align 4,0x90
1040L70:
1041 movl end, %eax // end
1042 subl %esi, %eax // end - out
1043 addl $257, %eax // 257 + end - out
1044 jmp L72
1045 .align 4,0x90
1046L67:
1047 movl last, %eax // last
1048 subl in_d, %eax // last - in
1049 addl $5, %eax // 5 + last - in
1050 jmp L69
1051
1052
1053 .align 4,0x90
1054
1055 // stuffing the following 4 bytes to align the major loop to a 16-byte boundary to give the better performance
1056 .byte 0
1057 .byte 0
1058 .byte 0
1059 .byte 0
1060L_copy_direct_from_output:
1061 mov dist, %eax // dist
1062 movq out, %rdx // out
1063 subq %rax, %rdx // from = out - dist;
1064 subl $3, len // pre-decrement len by 3
1065 // do {
10660: movzbl (%rdx), %eax // *from
1067 movb %al, (out) // *out
1068 movzbl 1(%rdx), %eax // *from
1069 movb %al, 1(out) // *out
1070 movzbl 2(%rdx), %eax // *from
1071 movb %al, 2(out) // *out
1072 addq $3, out // out+=3
1073 addq $3, %rdx // from+=3
1074 subl $3, len // len-=3
1075 jge 0b // } while (len>=0);
10761: addl $3, len // post-increment len by 3
1077 je L_do_while_loop_check // if len==0, branch to do_while_loop_check
1078
1079 movzbl (%rdx), %eax // *from
1080 movb %al, (out) // *out
1081 incq out // out++
1082 cmpl $2, len // len == 2 ?
1083 jne L_do_while_loop_check // if len==1, branch to do_while_loop_check
1084
1085 movzbl 1(%rdx), %eax // *from
1086 movb %al, (out) // *out
1087 incq out // out++
1088 jmp L_do_while_loop_check // branch to do_while_loop_check
1089
1090 .align 4,0x90
1091L_some_from_window: // from : from, out, %esi/%edx = op
1092 // do {
1093 movzbl (from), %eax // *from
1094 movb %al, (out) // *out
1095 incq from // from++
1096 incq out // out++
1097 decl %esi // --op
1098 jne L_some_from_window // } while (op);
1099 subl %edx, len // len -= op;
1100 mov dist, %eax // dist
1101 movq out, from // out
1102 subq %rax, from // from = out - dist;
1103 jmp L38 // copy from output
1104
1105 .align 4,0x90
1106L_wrap_around_window:
1107 cmpl %edx, write // write vs op
1108 jae L_contiguous_in_window // if (write >= op) branch to contiguous in window
1109 movl wsize_write, %eax // wsize+write
1110 subl %edx, %eax // wsize+write-op
1111 movq window, from // from = window - OFF
1112 addq %rax, from // from += wsize+write-op
1113 subl write, %edx // op -= write
1114 cmpl %edx, len // len vs op
1115 jbe L38 // if (len<=op) branch to copy from output
1116
1117 subl %edx, len // len -= op;
11180: // do {
1119 movzbl (from), %eax // *from
1120 movb %al, (out) // *out
1121 incq from // from++
1122 incq out // out++
1123 decl %edx // op--
1124 jne 0b // } while (op);
1125 movq window, from
1126
1127 cmpl len, write // write vs len
1128 jae L38 // if (write >= len) branch to copy from output
1129 movl write, %esi // op = write
1130 subl write, len // len -= op
11311: // do {
1132 movzbl (from), %eax // *from
1133 movb %al, (out) // *out
1134 incq from // from++
1135 incq out // out++
1136 decl %esi // op--
1137 jne 1b // } while (op);
1138 mov dist, %eax // dist
1139 movq out, from // out
1140 subq %rax, from // from = out - dist;
1141 jmp L38
1142
1143 .align 4,0x90
1144L_contiguous_in_window:
1145 movl write, %eax // write
1146 subl %edx, %eax // write - op
1147 movq window, from // from = window - OFF
1148 addq %rax, from // from += write - op
1149 cmpl %edx, len // len vs op
1150 jbe L38 // if (len <= op) branch to copy from output
1151 subl %edx, len // len -= op;
11522: // do {
1153 movzbl (from), %eax // *from
1154 movb %al, (out) // *out
1155 incq from // from++
1156 incq out // out++
1157 decl %edx // op--
1158 jne 2b // } while (op);
1159
1160 mov dist, %eax // dist
1161 movq out, from // out
1162 subq %rax, from // from = out - dist;
1163 jmp L38 // copy from output
1164
1165 .align 4,0x90
1166L_invalid_distance_code:
1167 leaq LC1(%rip), %rdx
1168 movq %rdx, 48(strm)
1169 movl $27, (state)
1170 movl out_d, %esi
1171 jmp L34
1172
1173L_invalid_distance_too_far_back:
1174 leaq LC0(%rip), %rbx
1175 movq %rbx, 48(strm) // error message
1176 movl $27, (state) // state->mode = BAD
1177 jmp L34
1178
1179#endif