2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 #include <machine/cpu_capabilities.h>
26 /* We use mode-independent "g" opcodes such as "srgi". These expand
27 * into word operations when targeting __ppc__, and into doubleword
28 * operations when targeting __ppc64__.
30 #include <architecture/ppc/mode_independent_asm.h>
34 #define kShort 128 // threshold for calling commpage
42 * r3 = original ptr, not changed since memset returns it
43 * r4 = count of bytes to set
45 * r8 = working operand ptr
50 _memset: // void * memset(void *b, int c, size_t len);
51 andi. r7,r4,0xFF // copy value to working register, test for 0
52 mr r4,r5 // move length to working register
53 cmplgi cr1,r5,kShort // long enough to bother with _COMM_PAGE_MEMSET_PATTERN?
54 beqa++ _COMM_PAGE_BZERO // if (c==0), map to bzero()
55 rlwimi r7,r7,8,16,23 // replicate nonzero value to low 2 bytes
56 neg r5,r3 // start to compute #bytes to align
57 mr r8,r3 // make working copy of operand ptr
58 rlwimi r7,r7,16,0,15 // value now in all 4 bytes
59 blt cr1,Lmemset3 // too short to use commpage
60 andi. r0,r5,0xF // r0 <- #bytes to align on quadword
62 // Align ptr and store enough so that we have an aligned 16-byte pattern.
68 beq Lmemset1 // skip if (r0==0), ie if r8 is 16-byte aligned
69 add r8,r8,r0 // 16-byte align ptr
70 sub r4,r4,r0 // adjust length
71 stw r7,0(r8) // now we can store an aligned 16-byte pattern
76 // Call machine-specific commpage routine, which expects:
78 // r8 = ptr (16-byte aligned) to memory to store
79 // r9 = ptr (16-byte aligned) to 16-byte pattern to store
81 // r3, r7, and r12 are preserved
82 // r4 and r8 are updated to reflect a residual count of from 0..31 bytes
85 mflr r12 // save return address
86 mr r9,r8 // point to 16-byte-aligned 16-byte pattern
87 addi r8,r8,16 // point to first unstored byte
88 subi r4,r4,16 // account for the aligned bytes we have stored
89 bla _COMM_PAGE_MEMSET_PATTERN
92 // Here for short nonzero memset.
93 // r4 = count (<= kShort bytes)
94 // r7 = pattern in all four bytes
97 srgi. r0,r4,4 // any 16-byte chunks?
98 mtcrf 0x01,r4 // move length remaining to cr7 so we can test bits
99 beq Lmemset5 // fewer than 16 bytes
101 b Lmemset4 // enter loop
104 Lmemset4: // loop over 16-byte chunks
112 // Handle last 0..15 bytes.
132 /* ***********************************
133 * * M E M S E T _ P A T T E R N 1 6 *
134 * ***********************************
136 * Used to store a 16-byte pattern in memory:
138 * void memset_pattern16(void *b, const void *c16, size_t len);
140 * Where c16 points to the 16-byte pattern. None of the parameters need be aligned.
143 .globl _memset_pattern16
146 cmplgi cr1,r5,kShort // check length
147 lwz r7,0(r4) // load pattern into (these remain lwz in 64-bit mode)
149 neg r6,r3 // start to compute ptr alignment
152 b __memset_pattern_common
155 /* *********************************
156 * * M E M S E T _ P A T T E R N 8 *
157 * *********************************
159 * Used to store an 8-byte pattern in memory:
161 * void memset_pattern8(void *b, const void *c8, size_t len);
163 * Where c8 points to the 8-byte pattern. None of the parameters need be aligned.
166 .globl _memset_pattern8
169 lwz r7,0(r4) // load pattern (these remain lwz in 64-bit mode)
171 cmplgi cr1,r5,kShort // check length
172 neg r6,r3 // start to compute ptr alignment
173 mr r10,r7 // replicate into 16-byte pattern
175 b __memset_pattern_common
178 /* *********************************
179 * * M E M S E T _ P A T T E R N 4 *
180 * *********************************
182 * Used to store a 4-byte pattern in memory:
184 * void memset_pattern4(void *b, const void *c4, size_t len);
186 * Where c4 points to the 4-byte pattern. None of the parameters need be aligned.
189 .globl _memset_pattern4
192 lwz r7,0(r4) // load pattern
193 cmplgi cr1,r5,kShort // check length
194 neg r6,r3 // start to compute ptr alignment
195 mr r9,r7 // replicate into 16-byte pattern
198 b __memset_pattern_common // don't fall through because of scatter-loading
201 /* ***********************************************
202 * * _ M E M S E T _ P A T T E R N _ C O M M O N *
203 * ***********************************************
205 * This is the common code used by _memset_pattern16, 8, and 4. They all get here via
206 * long branch (ie, "b") in case the routines are re-ordered, with:
207 * r3 = ptr to memory to store pattern into (unaligned)
208 * r5 = length in bytes
209 * r6 = neg(r3), used to compute #bytes to align
210 * r7, r9, r10, r11 = 16-byte pattern to store
211 * cr1= ble if (r5 <= kShort)
214 .globl __memset_pattern_common
215 .private_extern __memset_pattern_common // avoid dyld stub, which trashes r11
217 __memset_pattern_common:
218 andi. r0,r6,0xF // get #bytes to 16-byte align ptr
219 ble-- cr1,LShort // if short operand skip out
221 // Align ptr and store enough of pattern so we have an aligned
222 // 16-byte chunk of it (this effectively rotates incoming pattern
223 // if the original ptr was not aligned.)
229 beq Laligned // skip if (r0==0), ie if r3 is 16-byte aligned
234 add r3,r3,r0 // 16-byte align ptr
235 sub r5,r5,r0 // adjust length
237 // We're ready to call the machine-specific commpage routine
238 // to do the heavy lifting. When called, _COMM_PAGE_MEMSET_PATTERN expects:
239 // r4 = length (>= 32)
240 // r8 = ptr (16-byte aligned)
241 // r9 = ptr to 16-byte pattern (16-byte aligned)
243 // r3, r7, and r12 are preserved
244 // r4 and r8 are updated to reflect a residual count of from 0..31 bytes
247 mflr r12 // save return across commpage call
248 mr r9,r3 // point to 16-byte aligned 16-byte pattern
249 addi r8,r3,16 // point to first unstored byte (r8 is 16-byte aligned)
250 subi r4,r5,16 // account for the aligned bytes we have stored
251 bla _COMM_PAGE_MEMSET_PATTERN
252 mr. r5,r4 // move length (0..31) back to original reg and test for 0
254 beqlr // done if residual length == 0
255 lwz r7,-16(r8) // load aligned pattern into r7,r9,r10, and r11
257 mr r3,r8 // move destination ptr back
261 // Handle short operands and leftovers.
264 // r7,r9,r10,r11 = pattern
266 srgi. r0,r5,4 // at least 16 bytes?
267 mtcrf 0x01,r5 // move leftover count to cr7
271 stw r7,0(r3) // replicate the pattern
276 bdnz LShortLoop // store 16 more bytes
278 // Fewer than 16 bytes remaining.
281 stw r7,0(r3) // store next 8 bytes
284 mr r7,r10 // shift pattern over
293 rlwinm r7,r7,16,0,31 // position leftmost 2 bytes for store
298 srwi r7,r7,24 // position leftmost byte for store