2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
30 /* =======================================
31 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
32 * =======================================
34 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
35 * This version might be used bringing up new processors, with known
36 * Altivec bugs that need to be worked around. It is not particularly well
39 * For 64-bit processors with a 128-byte cache line, running in either
40 * 32- or 64-bit mode. This is written for 32-bit execution, the kernel
41 * will translate to 64-bit code when it compiles the 64-bit commpage.
43 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
47 * r3 = not used, as memcpy and memmove return 1st parameter as a value
48 * r4 = source ptr ("rs")
49 * r5 = count of bytes to move ("rc")
56 * r12 = destination ptr ("rd")
73 #include <sys/appleapiopts.h>
75 #include <machine/cpu_capabilities.h>
76 #include <machine/commpage.h>
80 #define kLong 64 // too long for inline loopless code
86 bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
87 cmplwi rc,kLong // short or long?
88 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
89 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
90 mr rd,r4 // start to move registers to canonic spot
92 blt LShort // handle short operands
93 dcbt 0,r3 // touch in destination
94 b LLong // join medium/long operand code
96 // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
99 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
100 Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
101 cmplwi rc,kLong // short or long?
102 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
103 dcbt 0,r4 // touch in the first line of source
104 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
105 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
106 bge LLong // handle medium or long operands
108 // Handle short operands.
111 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
112 mtcrf 0x01,rc // put length bits 28-31 in cr7
113 blt cr1,LShortReverse
115 // Forward short operands. This is the most frequent case, so it is inline.
117 LShort64: // enter to xfer last 64 bytes
118 bf 26,0f // 64-byte chunk to xfer?
130 bf 27,1f // quadword to move?
138 bf 28,2f // doubleword?
150 bf 30,4f // halfword to move?
156 bflr 31 // skip if no odd byte
162 // Handle short reverse operands.
163 // cr6 = bits 26-27 of length
164 // cr7 = bits 28-31 of length
167 add rs,rs,rc // adjust ptrs for reverse move
169 LShortReverse64: // enter to xfer last 64 bytes
170 bf 26,0f // 64-byte chunk to xfer?
180 bf 27,1f // quadword to move?
186 bf 28,2f // doubleword?
194 bf 30,4f // halfword to move?
198 bflr 31 // done if no odd byte
199 lbz w1,-1(rs) // no update
205 // cr1 = blt iff we must move reverse
209 dcbtst 0,rd // touch in destination
210 neg w3,rd // start to compute #bytes to align destination
211 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
212 blt cr1,LLongReverse // handle reverse moves
213 mtctr w6 // set up for loop to align destination
214 sub rc,rc,w6 // adjust count
215 beq LAligned // destination already 8-byte aligned
223 // Destination is 8-byte aligned.
226 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
227 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
228 mtcrf 0x01,rc // put length bits 28-31 in cr7
229 beq LShort64 // no 64-byte chunks
233 // Loop moving 64-byte chunks.
260 // Handle reverse moves.
263 add rd,rd,rc // point to end of operands
265 andi. r0,rd,7 // is destination 8-byte aligned?
266 sub rc,rc,r0 // adjust count
267 mtctr r0 // set up for byte loop
268 beq LRevAligned // already aligned
275 // Destination is 8-byte aligned.
278 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
279 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
280 mtcrf 0x01,rc // put length bits 28-31 in cr7
281 beq LShortReverse64 // no 64-byte chunks
285 // Loop over 64-byte chunks (reverse).
309 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)