2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* =======================================
24 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
25 * =======================================
27 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
28 * This version might be used bringing up new processors, with known
29 * Altivec bugs that need to be worked around. It is not particularly well
32 * For 64-bit processors with a 128-byte cache line, running in either
33 * 32- or 64-bit mode. This is written for 32-bit execution, the kernel
34 * will translate to 64-bit code when it compiles the 64-bit commpage.
36 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
40 * r3 = not used, as memcpy and memmove return 1st parameter as a value
41 * r4 = source ptr ("rs")
42 * r5 = count of bytes to move ("rc")
49 * r12 = destination ptr ("rd")
66 #include <sys/appleapiopts.h>
68 #include <machine/cpu_capabilities.h>
69 #include <machine/commpage.h>
73 #define kLong 64 // too long for inline loopless code
79 bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
80 cmplwi rc,kLong // short or long?
81 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
82 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
83 mr rd,r4 // start to move registers to canonic spot
85 blt LShort // handle short operands
86 dcbt 0,r3 // touch in destination
87 b LLong // join medium/long operand code
89 // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
92 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
93 Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
94 cmplwi rc,kLong // short or long?
95 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
96 dcbt 0,r4 // touch in the first line of source
97 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
98 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
99 bge LLong // handle medium or long operands
101 // Handle short operands.
104 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
105 mtcrf 0x01,rc // put length bits 28-31 in cr7
106 blt cr1,LShortReverse
108 // Forward short operands. This is the most frequent case, so it is inline.
110 LShort64: // enter to xfer last 64 bytes
111 bf 26,0f // 64-byte chunk to xfer?
123 bf 27,1f // quadword to move?
131 bf 28,2f // doubleword?
143 bf 30,4f // halfword to move?
149 bflr 31 // skip if no odd byte
155 // Handle short reverse operands.
156 // cr6 = bits 26-27 of length
157 // cr7 = bits 28-31 of length
160 add rs,rs,rc // adjust ptrs for reverse move
162 LShortReverse64: // enter to xfer last 64 bytes
163 bf 26,0f // 64-byte chunk to xfer?
173 bf 27,1f // quadword to move?
179 bf 28,2f // doubleword?
187 bf 30,4f // halfword to move?
191 bflr 31 // done if no odd byte
192 lbz w1,-1(rs) // no update
198 // cr1 = blt iff we must move reverse
202 dcbtst 0,rd // touch in destination
203 neg w3,rd // start to compute #bytes to align destination
204 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
205 blt cr1,LLongReverse // handle reverse moves
206 mtctr w6 // set up for loop to align destination
207 sub rc,rc,w6 // adjust count
208 beq LAligned // destination already 8-byte aligned
216 // Destination is 8-byte aligned.
219 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
220 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
221 mtcrf 0x01,rc // put length bits 28-31 in cr7
222 beq LShort64 // no 64-byte chunks
226 // Loop moving 64-byte chunks.
253 // Handle reverse moves.
256 add rd,rd,rc // point to end of operands
258 andi. r0,rd,7 // is destination 8-byte aligned?
259 sub rc,rc,r0 // adjust count
260 mtctr r0 // set up for byte loop
261 beq LRevAligned // already aligned
268 // Destination is 8-byte aligned.
271 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
272 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
273 mtcrf 0x01,rc // put length bits 28-31 in cr7
274 beq LShortReverse64 // no 64-byte chunks
278 // Loop over 64-byte chunks (reverse).
302 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)