2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* =======================================
29 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
30 * =======================================
32 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
33 * This version might be used bringing up new processors, with known
34 * Altivec bugs that need to be worked around. It is not particularly well
37 * For 64-bit processors with a 128-byte cache line, running in either
38 * 32- or 64-bit mode. This is written for 32-bit execution, the kernel
39 * will translate to 64-bit code when it compiles the 64-bit commpage.
41 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
45 * r3 = not used, as memcpy and memmove return 1st parameter as a value
46 * r4 = source ptr ("rs")
47 * r5 = count of bytes to move ("rc")
54 * r12 = destination ptr ("rd")
70 #include <sys/appleapiopts.h>
72 #include <machine/cpu_capabilities.h>
73 #include <machine/commpage.h>
77 #define kLong 64 // too long for inline loopless code
83 bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
84 cmplwi rc,kLong // short or long?
85 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
86 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
87 mr rd,r4 // start to move registers to canonic spot
89 blt LShort // handle short operands
90 dcbt 0,r3 // touch in destination
91 b LLong // join medium/long operand code
93 // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
96 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
97 Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
98 cmplwi rc,kLong // short or long?
99 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
100 dcbt 0,r4 // touch in the first line of source
101 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
102 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
103 bge LLong // handle medium or long operands
105 // Handle short operands.
108 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
109 mtcrf 0x01,rc // put length bits 28-31 in cr7
110 blt cr1,LShortReverse
112 // Forward short operands. This is the most frequent case, so it is inline.
114 LShort64: // enter to xfer last 64 bytes
115 bf 26,0f // 64-byte chunk to xfer?
127 bf 27,1f // quadword to move?
135 bf 28,2f // doubleword?
147 bf 30,4f // halfword to move?
153 bflr 31 // skip if no odd byte
159 // Handle short reverse operands.
160 // cr6 = bits 26-27 of length
161 // cr7 = bits 28-31 of length
164 add rs,rs,rc // adjust ptrs for reverse move
166 LShortReverse64: // enter to xfer last 64 bytes
167 bf 26,0f // 64-byte chunk to xfer?
177 bf 27,1f // quadword to move?
183 bf 28,2f // doubleword?
191 bf 30,4f // halfword to move?
195 bflr 31 // done if no odd byte
196 lbz w1,-1(rs) // no update
202 // cr1 = blt iff we must move reverse
206 dcbtst 0,rd // touch in destination
207 neg w3,rd // start to compute #bytes to align destination
208 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
209 blt cr1,LLongReverse // handle reverse moves
210 mtctr w6 // set up for loop to align destination
211 sub rc,rc,w6 // adjust count
212 beq LAligned // destination already 8-byte aligned
220 // Destination is 8-byte aligned.
223 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
224 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
225 mtcrf 0x01,rc // put length bits 28-31 in cr7
226 beq LShort64 // no 64-byte chunks
230 // Loop moving 64-byte chunks.
257 // Handle reverse moves.
260 add rd,rd,rc // point to end of operands
262 andi. r0,rd,7 // is destination 8-byte aligned?
263 sub rc,rc,r0 // adjust count
264 mtctr r0 // set up for byte loop
265 beq LRevAligned // already aligned
272 // Destination is 8-byte aligned.
275 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
276 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
277 mtcrf 0x01,rc // put length bits 28-31 in cr7
278 beq LShortReverse64 // no 64-byte chunks
282 // Loop over 64-byte chunks (reverse).
306 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)