2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* =======================================
29 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
30 * =======================================
32 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
33 * This version might be used bringing up new processors, with known
34 * Altivec bugs that need to be worked around. It is not particularly well
37 * For 64-bit processors with a 128-byte cache line, running in either
38 * 32- or 64-bit mode. This is written for 32-bit execution, the kernel
39 * will translate to 64-bit code when it compiles the 64-bit commpage.
41 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
45 * r3 = not used, as memcpy and memmove return 1st parameter as a value
46 * r4 = source ptr ("rs")
47 * r5 = count of bytes to move ("rc")
54 * r12 = destination ptr ("rd")
71 #include <sys/appleapiopts.h>
73 #include <machine/cpu_capabilities.h>
74 #include <machine/commpage.h>
78 #define kLong 64 // too long for inline loopless code
84 bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
85 cmplwi rc,kLong // short or long?
86 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
87 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
88 mr rd,r4 // start to move registers to canonic spot
90 blt LShort // handle short operands
91 dcbt 0,r3 // touch in destination
92 b LLong // join medium/long operand code
94 // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
97 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
98 Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
99 cmplwi rc,kLong // short or long?
100 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
101 dcbt 0,r4 // touch in the first line of source
102 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
103 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
104 bge LLong // handle medium or long operands
106 // Handle short operands.
109 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
110 mtcrf 0x01,rc // put length bits 28-31 in cr7
111 blt cr1,LShortReverse
113 // Forward short operands. This is the most frequent case, so it is inline.
115 LShort64: // enter to xfer last 64 bytes
116 bf 26,0f // 64-byte chunk to xfer?
128 bf 27,1f // quadword to move?
136 bf 28,2f // doubleword?
148 bf 30,4f // halfword to move?
154 bflr 31 // skip if no odd byte
160 // Handle short reverse operands.
161 // cr6 = bits 26-27 of length
162 // cr7 = bits 28-31 of length
165 add rs,rs,rc // adjust ptrs for reverse move
167 LShortReverse64: // enter to xfer last 64 bytes
168 bf 26,0f // 64-byte chunk to xfer?
178 bf 27,1f // quadword to move?
184 bf 28,2f // doubleword?
192 bf 30,4f // halfword to move?
196 bflr 31 // done if no odd byte
197 lbz w1,-1(rs) // no update
203 // cr1 = blt iff we must move reverse
207 dcbtst 0,rd // touch in destination
208 neg w3,rd // start to compute #bytes to align destination
209 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
210 blt cr1,LLongReverse // handle reverse moves
211 mtctr w6 // set up for loop to align destination
212 sub rc,rc,w6 // adjust count
213 beq LAligned // destination already 8-byte aligned
221 // Destination is 8-byte aligned.
224 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
225 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
226 mtcrf 0x01,rc // put length bits 28-31 in cr7
227 beq LShort64 // no 64-byte chunks
231 // Loop moving 64-byte chunks.
258 // Handle reverse moves.
261 add rd,rd,rc // point to end of operands
263 andi. r0,rd,7 // is destination 8-byte aligned?
264 sub rc,rc,r0 // adjust count
265 mtctr r0 // set up for byte loop
266 beq LRevAligned // already aligned
273 // Destination is 8-byte aligned.
276 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
277 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
278 mtcrf 0x01,rc // put length bits 28-31 in cr7
279 beq LShortReverse64 // no 64-byte chunks
283 // Loop over 64-byte chunks (reverse).
307 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)