arm/string/bzero_Generic.s

   1 /*
   2  * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 #include <arm/arch.h>
  25 #if !defined _ARM_ARCH_7 || defined VARIANT_DYLD
  26
  27 #include <mach/machine/asm.h>
  28 #include <architecture/arm/asm_help.h>
  29
  30 /*
  31  * A reasonably well-optimized bzero/memset. Should work equally well on arm11 and arm9 based
  32  * cores.
  33  *
  34  * The algorithm is to align the destination pointer on a 32 byte boundary and then
  35  * blast data 64 bytes at a time, in two stores of 32 bytes per loop.
  36  */
  37         .text
  38         .align 2
  39
  40         .globl _memset
  41 /* void *memset(void *ptr, int c, size_t len); */
  42 _memset:
  43         /* move len into r1, unpack c into r2 */
  44         mov             r3, r2
  45         and             r1, r1, #0xff
  46         orr             r1, r1, r1, lsl #8
  47         orr             r2, r1, r1, lsl #16
  48         mov             r1, r3
  49         b               Lbzeroengine
  50
  51         .globl _bzero
  52 /* void bzero(void *ptr, size_t len); */
  53 _bzero:
  54         /* zero out r2 so we can be just like memset(0) */
  55         mov             r2, #0
  56
  57 Lbzeroengine:
  58         /* move the base pointer into r12 and leave r0 alone so that we return the original pointer */
  59         mov             r12, r0
  60
  61         /* copy r2 into r3 for 64-bit stores */
  62         mov             r3, r2
  63
  64         /* check for zero len */
  65         cmp             r1, #0
  66         bxeq    lr
  67
  68         /* fall back to a bytewise store for less than 32 bytes */
  69         cmp             r1, #32
  70         blt             L_bytewise
  71
  72         /* check for 32 byte unaligned ptr */
  73         tst             r12, #0x1f
  74         bne             L_unaligned
  75
  76         /* make sure we have more than 64 bytes to zero */
  77         cmp             r1, #64
  78         blt             L_lessthan64aligned
  79
  80         /* >= 64 bytes of len, 32 byte aligned */
  81 L_64ormorealigned:
  82
  83         /* we need some registers, avoid r7 (frame pointer) and r9 (thread register) */
  84         stmfd   sp!, { r4-r6, r8, r10-r11 }
  85         mov             r4, r2
  86         mov             r5, r2
  87         mov             r6, r2
  88         mov             r8, r2
  89         mov             r10, r2
  90         mov             r11, r2
  91
  92         /* pre-subtract 64 from the len to avoid an extra compare in the loop */
  93         sub             r1, r1, #64
  94
  95 L_64loop:
  96         stmia   r12!, { r2-r6, r8, r10-r11 }
  97         subs    r1, r1, #64
  98         stmia   r12!, { r2-r6, r8, r10-r11 }
  99         bge             L_64loop
 100
 101         /* restore the saved regs */
 102         ldmfd   sp!, { r4-r6, r8, r10-r11 }
 103
 104         /* check for completion (had previously subtracted an extra 64 from len) */
 105         adds    r1, r1, #64
 106         bxeq    lr
 107
 108 L_lessthan64aligned:
 109         /* do we have 16 or more bytes left */
 110         cmp             r1, #16
 111         stmgeia r12!, { r2-r3 }
 112         stmgeia r12!, { r2-r3 }
 113         subges  r1, r1, #16
 114         bgt             L_lessthan64aligned
 115         bxeq    lr
 116
 117 L_lessthan16aligned:
 118         /* store 0 to 15 bytes */
 119         mov             r1, r1, lsl #28         /* move the remaining len bits [3:0] to the flags area of cpsr */
 120         msr             cpsr_f, r1
 121
 122         stmmiia r12!, { r2-r3 }         /* n is set, store 8 bytes */
 123         streq   r2, [r12], #4           /* z is set, store 4 bytes */
 124         strcsh  r2, [r12], #2           /* c is set, store 2 bytes */
 125         strvsb  r2, [r12], #1           /* v is set, store 1 byte */
 126         bx              lr
 127
 128 L_bytewise:
 129         /* bytewise copy, 2 bytes at a time, alignment not guaranteed */
 130         subs    r1, r1, #2
 131         strb    r2, [r12], #1
 132         strplb  r2, [r12], #1
 133         bhi             L_bytewise
 134         bx              lr
 135
 136 L_unaligned:
 137         /* unaligned on 32 byte boundary, store 1-15 bytes until we're 16 byte aligned */
 138         mov             r3, r12, lsl #28
 139         rsb     r3, r3, #0x00000000
 140         msr             cpsr_f, r3
 141
 142         strvsb  r2, [r12], #1           /* v is set, unaligned in the 1s column */
 143         strcsh  r2, [r12], #2           /* c is set, unaligned in the 2s column */
 144         streq   r2, [r12], #4           /* z is set, unaligned in the 4s column */
 145         strmi   r2, [r12], #4           /* n is set, unaligned in the 8s column */
 146         strmi   r2, [r12], #4
 147
 148         subs    r1, r1, r3, lsr #28
 149         bxeq    lr
 150
 151         /* we had previously trashed r3, restore it */
 152         mov             r3, r2
 153
 154         /* now make sure we're 32 byte aligned */
 155         tst             r12, #(1 << 4)
 156         stmneia r12!, { r2-r3 }
 157         stmneia r12!, { r2-r3 }
 158         subnes  r1, r1, #16
 159
 160         /* we're now aligned, check for >= 64 bytes left */
 161         cmp             r1, #64
 162         bge             L_64ormorealigned
 163         b               L_lessthan64aligned
 164
 165 X_LEAF(___bzero, _bzero)
 166
 167 #endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD