3 // the follow assembly code was hard wired to POSTINC not defined,
5 #if 0 // #ifdef POSTINC
10 # define PUP(a) *++(a)
13 // the code uses r9, therefore, it does not meet the register protocol for armv5 and below
14 // the code can only be used for armv6 and above
16 #if defined _ARM_ARCH_6
21 .ascii "invalid distance too far back\0"
24 .ascii "invalid distance code\0"
27 .ascii "invalid literal/length code\0"
29 // renaming the register and stack memory use
40 // stack memory allocation
42 #define window_loc [sp,#0]
43 #define last_loc [sp,#4]
44 #define beg_loc [sp,#8]
45 #define end_loc [sp,#12]
46 #define wsize_loc [sp,#16]
47 #define whave_loc [sp,#20]
48 #define windowm1_loc [sp,#28]
49 #define lmask_loc [sp,#32]
50 #define dmask_loc [sp,#36]
51 #define dist_loc [sp,#48]
55 // the following defines the variable offset in the inflate_state structure (in inflate.h)
57 #define state_mode [state, #0]
58 #define state_last [state, #4]
59 #define state_wrap [state, #8]
60 #define state_havedict [state, #12]
61 #define state_flags [state, #16]
62 #define state_dmax [state, #20]
63 #define state_wbits [state, #36]
64 #define state_wsize [state, #40]
65 #define state_whave [state, #44]
66 #define state_write [state, #48]
67 #define state_window [state, #52]
68 #define state_hold [state, #56]
69 #define state_bits [state, #60]
70 #define state_lencode [state, #76]
71 #define state_distcode [state, #80]
72 #define state_lenbits [state, #84]
73 #define state_distbits [state, #88]
76 // void inflate_fast(z_streamp strm, unsigned start)
78 // r0 = strm, (move to r10)
86 stmfd sp!, {r4-r6,r8-r11,lr}
87 sub sp, sp, #local_size
89 #if defined(_ARM_ARCH_5)
90 ldrd r2,r3,[r0, #0] // r2 = strm->next_in, r3 = strm->avail_in
95 sub in, r2, #OFF // in = strm->next_in - OFF;
96 sub r2, #(OFF+5) // next_in -= (OFF+5);
97 ldr state, [r0, #28] // state = (struct inflate_state FAR *)strm->state;
98 add r3, r3, r2 // last = next_in - OFF + (avail_in - 5); next_in already updated
100 str r3, last_loc // store last to release r3
102 ldr r3, [r0, #12] // next_out
103 ldr r2, [strm, #16] // avail_out
105 sub out, r3, #OFF // out = strm->next_out - OFF; r0 is used as out from this point on
107 sub r3, r3, #256 // next_out - 256
108 rsb r1, r2, r1 // start - avail_out
109 sub r3, r3, #(1+OFF) // next_out-OFF-257
110 add r3, r3, r2 // r3 = end = avail_out + (next_out-OFF) - 257 = avail_out + out - 257
111 rsb r2, r1, out // r2 = beg = out - (start - avail_out);
112 #if defined(_ARM_ARCH_5)
113 strd r2,r3, beg_loc // store beg/end
114 ldrd r2,r3, state_wsize // wsize/whave
115 strd r2,r3, wsize_loc // store wsize/whave
116 //ldrd r6,hold, state_window // window/hold, hold use r7
117 ldr r6, state_window // state->window
118 ldr hold, state_hold // state->hold
121 // for architecture < armv5, ldrd/strd is not available
122 str r2, beg_loc // store beg
123 str r3, end_loc // store end
124 ldr r2, state_wsize // state->wsize
125 ldr r3, state_whave // state->whave
126 str r2, wsize_loc // store wsize
127 str r3, whave_loc // store whave
128 ldr r6, state_window // state->window
129 ldr hold, state_hold // state->hold
132 ldr ip, state_lencode // lencode
133 mov r3, #1 // used to derive lmask and dmask
134 ldr write, state_write // write (r9 from this point on) : window write index
136 str ip, [sp, #40] // save lencode
137 sub ip, r6, #1 // window-1
138 str r6, window_loc // store window
139 str ip, windowm1_loc // store window-1
140 ldr r2, state_lenbits // lenbits
141 ldr bits, state_bits // bits, use lr from this point on
142 ldr distcode, state_distcode// distcode, use r8
143 mov r2, r3, asl r2 // (1<<lensbits)
144 ldr r12, state_distbits // distbits
145 sub r2, r2, #1 // lmask = (1U << state->lenbits) - 1;
146 mov r3, r3, asl r12 // (1U << state->distbits)
147 sub r3, r3, #1 // dmask = (1U << state->distbits) - 1;
149 #if defined(_ARM_ARCH_5)
150 strd r2, r3, lmask_loc // store lmask/dmask
152 str r2, lmask_loc // lmask
153 str r3, dmask_loc // dmask
156 // start the do loop decoding literals and length/distances
157 // until end-of-block or not enough input data or output space
160 cmp bits, #15 // bits vs 15
161 ldr r1, lmask_loc // lmask
162 bge bitsge15 // if bits >= 15, skip loading new 16 bits
164 // this is a shortcut with the processor reads data in little-endian mode
165 ldrh r3, [in,#1] // read 2 bytes
166 add in, #2 // in pointer += 2
167 add hold, hold, r3, asl bits // deposit the new 2 bytes into hold
168 add bits, #16 // bits count += 16
171 ldr ip, [sp, #40] // restore lencode
172 and r3, hold, r1 // r3 = hold & lmask
177 tst r2, #16 // if (op&16)
178 bne length_base // branch to length_base
180 tst r2, #64 // else if (op&64)
181 bne end_of_block // branch to end_of_block processing
183 // 2nd-level length code, this is the part where if ((op & 64) == 0) { ... }
185 // this.val + (hold & ((1U << op) - 1));
186 // r3 = r1 + hold & ((1<<r2)-1);
188 rsb r12, r2, #32 // r12 = (32-op)
189 ror r3, hold, r2 // rotate the op least significant bits of hold to MSB
190 add r3, r1, r3, lsr r12 // r3 = r1 + (op LSBs in hold) = r1 + hold & ((1<<r2)-1);
192 ldr ip, [sp, #40] // restore lencode
196 // code -> 8-bit code, 8-bit bits, 16-bit val
197 ldrb r2, [ip,r3,asl #2] // op = (unsigned)(this.bits);
198 add r3, ip, r3, asl #2 // r3 = this
199 ldrb ip, [r3, #1] // ip = this.bits
200 ldrh r1, [r3, #2] // r1 = this.value
201 cmp r2, #0 // op == 0 ?
203 mov hold, hold, lsr ip // hold >>= this.bits
204 rsb bits, ip, bits // bits -= this.bits
205 bne op_not_zero // branch to op_not_zero if this.op != 0
207 strb r1, [out, #1]! // PUP(out) = (unsigned char)(this.val);
210 ldr r1, last_loc // last
211 ldr r2, end_loc // end
212 cmp in, r1 // compare in vs last
213 cmpcc out, r2 // if in < last, compare out vs end
214 bcc do_loop // if (in < last && out < end) go back to do_loop
216 update_state_and_return:
218 sub r2, in, bits, lsr #3 // r2 = in - (bits>>3)
220 add r3, r2, #OFF // r3 = (in - (bits>>3)) + OFF
221 str r3, [strm, #0] // strm->next_in = in + OFF;
223 add r3, out, #OFF // out + OFF
224 str r3, [strm, #12] // strm->next_out = out + OFF;
226 ldr r3, last_loc // r3 = last
227 ldr ip, end_loc // ip = end
229 cmp r3, r2 // compare last vs in
230 addhi r3, r3, #5 // if last > in, last +=5
231 movls r6, r3 // o.w., r6 = last
232 rsbls r3, r6, r2 // r3 = in-last
233 rsbhi r3, r2, r3 // r3 = (last+5) - in
234 rsbls r3, r3, #5 // r3 = 5 - (in-last);
235 cmp out, ip // compare out vs end
236 str r3, [strm, #4] // strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
237 movcs r2, ip // if out<end, r2=end
238 addcc r3, ip, #256 // if out>=end, r3 = end+256
239 rsbcs r3, r2, out // if out<end, r3 = out-end
240 addcc r3, r3, #1 // if out>=end, r3 = end+257
241 rsbcs r3, r3, #256 // if out<end, r3 = 256-(out-end) = 256 + (end-out)
242 and bits, #7 // this is equivalent to bits -= (bits>>3) << 3;
243 rsbcc r3, out, r3 // if out<end, r3 = 257+end-out
244 addcs r3, r3, #1 // if out>=end, r3 = 257 + (end-out)
245 str r3, [strm, #16] // strm->avail_out = (unsigned)(out < end ? 257 + (end - out) : 257 - (out - end));
247 // hold &= (1U << bits) - 1;
249 rsb ip, bits, #32 // 32-bits
250 ror hold, hold, bits // this is equivalent to hold<<(32-bits)
251 lsr hold, hold, ip // logical shift right by (32-bits), hold now only keeps the bits LSBs
253 str bits, state_bits // state->bits = bits;
254 str hold, state_hold // state->hold = hold;
256 add sp, #local_size // pop out stack memory
257 ldmfd sp!,{r4-r6,r8-r11,pc} // restore registers and return
259 length_base: // r2=op, r1=lmask
260 ands r2, r2, #15 // op&=15;
261 mov r6, r1 // len = (unsigned) this.val;
262 beq op_is_zero // if op==0, branch to op_is_zero
263 cmp r2, bits // op vs bits
264 ldrhib r3, [in, #1]! // if (op>bits) r3 = (PUP(in));
265 addhi hold, hold, r3, asl bits // if (op>bits) hold += (unsigned long)(PUP(in)) << bits;
267 rsb ip, r2, #32 // 32-op
268 ror r3, hold, r2 // (hold<<(32-op))
269 add r6, r1, r3, lsr ip // len += (unsigned)hold & ((1U << op) - 1);
271 addhi bits, bits, #8 // if (op>bits) bits += 8;
273 mov hold, hold, lsr r2 // hold >>= op;
274 rsb bits, r2, bits // bits -= op;
278 ldrh r3,[in,#1] // if (bits < 15) { 2 (PUP(in)); no condition code for better performance
279 addls in, #2 // in+=2;
280 addls hold, hold, r3, asl bits // twice hold += (unsigned long)(PUP(in)) << bits;
281 addls bits, #16 // 2 bits += 8; }
285 ldr r2, dmask_loc // r2 = dmask
286 and r3, hold, r2 // r3 = hold & dmask
288 add r3, r2, distcode // &dcode[hold&dmask];
289 ldrb ip, [r2, distcode] // op
290 ldrh r1, [r3, #2] // dist = (unsigned)(this.val);
291 tst ip, #16 // op vs 16
292 ldrb r3, [r3, #1] // this.bits
293 mov hold, hold, lsr r3 // hold >>= this.bits;
294 rsb bits, r3, bits // bits -= this.bits;
295 bne distance_base // if (op&16) { distance base processing }
297 beq second_distance_code // else if ((op&64)==0) branch to 2nd level distance code
299 b invalide_distance_code
301 check_2nd_level_distance_code:
303 tst r2, #64 // check for esle if ((op & 64) == 0) for 2nd level distance code
304 bne invalide_distance_code
306 second_distance_code:
308 rsb r2, ip, #32 // 32-op
309 ror r3, hold, ip // hold<<(32-op)
310 add r3, r1, r3, lsr r2 // this.val + (hold & ((1U << op) - 1))
313 add r3, r2, distcode // this = dcode[this.val + (hold & ((1U << op) - 1))];
314 ldrb r2, [r2, distcode] // this.op
315 ldrh r1, [r3, #2] // this.val
318 ldrb r3, [r3, #1] // this.bits
320 mov hold, hold, lsr r3 // hold >> = this.bits
321 rsb bits, r3, bits // bits -= this.bits
322 beq check_2nd_level_distance_code
324 distance_base: // this is invoked from if ((op&16)!=0)
326 and r2, ip, #15 // op &= 15;
327 cmp r2, bits // op vs bits
328 ldrhib r3, [in, #1]! // if (op > bits) (PUP(in))
329 addhi hold, hold, r3, asl bits // hold += (unsigned long)(PUP(in)) << bits;
330 addhi bits, bits, #8 // bits += 8;
331 cmphi r2, bits // internel (bits < op)
332 ldrhib r3, [in, #1]! // if (op > bits) (PUP(in))
333 addhi hold, hold, r3, asl bits // hold += (unsigned long)(PUP(in)) << bits;
335 rsb ip, r2, #32 // (32-op)
336 ror r3, hold, r2 // hold<<(32-op)
337 add r3, r1, r3, lsr ip // dist += (unsigned)hold & ((1U << op) - 1);
339 ldr ip, beg_loc // beg
341 #ifdef INFLATE_STRICT
342 ldr r1, state_dmax // r1 = dmax
345 str r3, dist_loc // save dist
347 #ifdef INFLATE_STRICT
348 cmp r3, r1 // dist vs dmax
349 bgt invalid_distance_too_far_back // if dist > dmax, set up msg/mode = bad and break
352 ldr r1, dist_loc // dist
353 rsb r3, ip, out // (out - beg);
354 addhi bits, bits, #8 // this is the internel bits += 8 from above
356 cmp r1, r3 // dist vs (out - beg)
358 mov hold, hold, lsr r2 // hold >>= op ;
359 rsb bits, r2, bits // bits -= op;
360 rsbls r2, r1, out // if (dist<=op) r2 = from = out-dist
361 bls copy_direct_from_output // if (dist<=op) branch to copy_direct_from_output
363 ldr r2, whave_loc // whave
364 rsb r1, r3, r1 // op = dist-op
365 cmp r2, r1 // whave vs op
366 nop // pad dummy for better performance
367 bcc invalid_distance_too_far_back // if whave < op, message invalid distance too far back, and break
369 cmp write, #0 // write
370 bne non_very_common_case // if (write ==0) non_very_common_case
372 // the following : if (write == 0) { /* very common case */ }
373 nop // pad dummy for better performance
374 ldr ip, wsize_loc // wsize
375 cmp r6, r1 // len vs op
376 rsb r3, r1, ip // wsize - op
377 ldr ip, windowm1_loc // window - 1
378 add r2, ip, r3 // from = window - 1 + wsize - op : setup for using PUP(from)
379 movhi r3, r1 // if len > op, r3 = op
380 movhi r1, out // if len > op, r1 = out
381 bhi some_from_window // if (len > op), branch to some_from_window
386 // PUP(out) = PUP(from);
387 // PUP(out) = PUP(from);
388 // PUP(out) = PUP(from);
392 // PUP(out) = PUP(from);
394 // PUP(out) = PUP(from);
397 cmp r6, #2 // len > 2 ?
398 movls r1, r6 // if (len<=2) r1 = len
399 bls lenle2 // if (len<=2) branch to lenle2
402 ldrb r3, [r2, #1] // 1st PUP(from)
403 sub r1, r1, #3 // len-=3
404 cmp r1, #2 // len > 2 ?
405 strb r3, [out, #1] // 1st PUP(out) = PUP(from);
406 ldrb r3, [r2, #2] // 2nd PUP(from)
407 add r2, r2, #3 // from+=3
408 strb r3, [out, #2] // 2nd PUP(out) = PUP(from);
409 ldrb r3, [r2, #0] // 3rd PUP(from)
410 add out, out, #3 // out+=3
411 strb r3, [out, #0] // 3rd PUP(out) = PUP(from);
412 bgt fcopy_per3bytes // while (len>3) back to loop head
415 beq do_loop_while // back to while loop head if len==0
416 ldrb r3, [r2, #1] // PUP(from)
417 cmp r1, #2 // check whether len==2
418 strb r3, [out, #1]! // PUP(out) = PUP(from);
419 bne do_loop_while // back to while loop head if len==1
420 ldrb r3, [r2, #2] // 2nd PUP(from)
421 strb r3, [out, #1]! // 2nd PUP(out) = PUP(from);
422 b do_loop_while // back to while loop head
425 tst r2, #32 // if (op&32)
426 movne r3, #11 // TYPE?
427 strne r3, state_mode // state-mode = TYPE
428 bne update_state_and_return // break the do loop and branch to get ready to return
429 ldr r3, messages // "invalid literal/length code" message
432 str r3, [strm, #24] // strm->msg = (char *)"invalid literal/length code";
434 str r3, state_mode // state->mode = BAD;
435 b update_state_and_return // break the do loop and branch to get ready to return
438 // ldrh r3,[in,#1] // 2 (PUP(in)) together
439 // add in, #2 // 2 in++
440 // add hold, hold, r3, asl bits // twice hold += (unsigned long)(PUP(in)) << bits;
441 // add bits, #16 // 2 bits += 8;
442 // b dodist // branch to dodist
443 nop // a pad dummy instruction to give better performance
445 copy_direct_from_output: // r2 = from = out - dist ;
448 ldrb r3, [r2, #1] // 1st PUP(from)
449 sub r6, r6, #3 // len-=3
450 cmp r6, #2 // len vs 2
451 strb r3, [out, #1] // 1st PUP(out) = PUP(from);
452 ldrb r3, [r2, #2] // 2nd PUP(from)
453 add r2, r2, #3 // update from+=3
454 strb r3, [out, #2] // 2nd PUP(out) = PUP(from);
455 ldrb r3, [r2, #0] // 3rd PUP(from);
456 add out, out, #3 // update out+=3
457 strb r3, [out, #0] // 3rd PUP(out) = PUP(from);
458 bhi copy_direct_from_output // while (len>2);
460 // len in r6 can now be 0 1 or 2
463 ldrb r3, [r2, #1] // PUP(from)
464 blt do_loop_while // if len<0 back to while loop head
465 strb r3, [out, #1]! // PUP(out) = PUP(from);
466 subs r6, #1 // len--;
467 ldrb r3, [r2, #2] // 2nd PUP(from)
468 blt do_loop_while // if len<0 back to while loop head
469 strb r3, [out, #1]! // 2nd PUP(out) = PUP(from);
470 b do_loop_while // back to while loop head
473 invalide_distance_code:
474 ldr r3, messages+4 // "invalid distance code"
477 str r3, [strm, #24] // strm->msg = (char *)"invalid distance code";
479 str r3, state_mode // state->mode = BAD;
480 b update_state_and_return // break, restore registers, and return
484 add out, r3, out // out += op
485 rsb r6, r3, r6 // len -= op
486 some_from_window_loop: // do {
487 ldrb ip, [r2, #1]! // PUP(from);
488 subs r3, r3, #1 // --op
489 strb ip, [r1, #1]! // PUP(out) = PUP(from);
490 bne some_from_window_loop // } while(op);
491 ldr r3, dist_loc // dist
492 rsb r2, r3, out // from = out - dist;
495 non_very_common_case:
496 cmp write, r1 // write vs op
497 nop // pad dummy for better performance
498 bcs contiguous_in_window // if (write >= op) branch to contiguous_in_window
500 /* wrap around window */
502 ldr r2, wsize_loc // wsize
503 ldr ip, windowm1_loc // window-1
504 add r3, write, r2 // r3 = wsize+write
505 rsb r3, r1, r3 // r3 = wsize+write-op
506 add r2, ip, r3 // r2 = from = wsize+write-op+window-1;
507 rsb r1, write, r1 // op -= write;
509 cmp r6, r1 // len vs op
510 bls finish_copy // if (len <= op) branch to finish_copy
511 rsb r6, r1, r6 // len -= op
513 ldrb r3, [r2, #1]! // PUP(from)
514 subs r1, r1, #1 // --op;
515 strb r3, [out, #1]! // PUP(out) = PUP(from);
516 bne waw_loop // } while (op);
518 cmp write, r6 // write vs len
519 ldrcs r2, windowm1_loc // if (write>=len) r2 = from = window-1;
520 bcs finish_copy // if (write>=len) branch to finish_copy
522 // some from start of window
524 mov r1, write // op = write
525 sub r6, write // len -= op
527 add ip, #1 // out+ip -> from
529 ldrb r3,[out, ip] // PUP(from)
531 strb r3, [out,#1]! // PUP(out) = PUP(from);
532 bne sow_loop // } while (op);
534 ldr r2, dist_loc // dist
535 sub r6, r6, write // len -= write
536 rsb r2, r2, out // r2 = from = out-dist
537 b finish_copy // continue to finish_copy
540 contiguous_in_window:
541 ldr ip, windowm1_loc // window-1
542 cmp r6, r1 // len vs op
543 rsb r3, r1, write // r3 = write-op
544 add r2, ip, r3 // r2 = from = window+write-op-1
545 bls finish_copy // if (len <= op) branch to finish_copy
546 rsb r6, r1, r6 // len -= op
547 ldr r3, dist_loc // dist
549 ldrb ip, [r2, #1]! // PUP(from)
550 subs r1, r1, #1 // op--
551 strb ip, [out, #1]! // PUP(out) = PUP(from);
552 bne ciw_loop // while (--op);
553 rsb r2, r3, out // from = out - dist;
556 invalid_distance_too_far_back:
557 ldr r3, messages+8 // "invalid distance too far back"
560 str r3, [strm, #24] // strm->msg = (char *)"invalid distance too far back";
562 str r3, state_mode // state->mode = BAD;
563 b update_state_and_return // break, restore registers, and return
571 #endif // defined _ARM_ARCH_6