2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* =======================================
26 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
27 * =======================================
29 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
30 * This version might be used bringing up new processors, with known
31 * Altivec bugs that need to be worked around. It is not particularly well
34 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
38 * r3 = not used, as memcpy and memmove return 1st parameter as a value
39 * r4 = source ptr ("rs")
40 * r5 = count of bytes to move ("rc")
47 * r12 = destination ptr ("rd")
64 #include <sys/appleapiopts.h>
66 #include <machine/cpu_capabilities.h>
67 #include <machine/commpage.h>
72 #define kLong 64 // too long for inline loopless code
78 bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
79 cmplwi rc,kLong // short or long?
80 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
81 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
82 mr rd,r4 // start to move registers to canonic spot
84 blt LShort // handle short operands
85 dcbt 0,r3 // touch in destination
86 b LLong // join medium/long operand code
88 // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
91 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
92 Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
93 cmplwi rc,kLong // short or long?
94 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
95 dcbt 0,r4 // touch in the first line of source
96 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
97 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
98 bge LLong // handle medium or long operands
100 // Handle short operands.
103 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
104 mtcrf 0x01,rc // put length bits 28-31 in cr7
105 blt cr1,LShortReverse
107 // Forward short operands. This is the most frequent case, so it is inline.
109 LShort64: // enter to xfer last 64 bytes
110 bf 26,0f // 64-byte chunk to xfer?
122 bf 27,1f // quadword to move?
130 bf 28,2f // doubleword?
142 bf 30,4f // halfword to move?
148 bflr 31 // skip if no odd byte
154 // Handle short reverse operands.
155 // cr6 = bits 26-27 of length
156 // cr7 = bits 28-31 of length
159 add rs,rs,rc // adjust ptrs for reverse move
161 LShortReverse64: // enter to xfer last 64 bytes
162 bf 26,0f // 64-byte chunk to xfer?
172 bf 27,1f // quadword to move?
178 bf 28,2f // doubleword?
186 bf 30,4f // halfword to move?
190 bflr 31 // done if no odd byte
191 lbz w1,-1(rs) // no update
197 // cr1 = blt iff we must move reverse
201 dcbtst 0,rd // touch in destination
202 neg w3,rd // start to compute #bytes to align destination
203 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
204 blt cr1,LLongReverse // handle reverse moves
205 mtctr w6 // set up for loop to align destination
206 sub rc,rc,w6 // adjust count
207 beq LAligned // destination already 8-byte aligned
215 // Destination is 8-byte aligned.
218 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
219 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
220 mtcrf 0x01,rc // put length bits 28-31 in cr7
221 beq LShort64 // no 64-byte chunks
225 // Loop moving 64-byte chunks.
252 // Handle reverse moves.
255 add rd,rd,rc // point to end of operands
257 andi. r0,rd,7 // is destination 8-byte aligned?
258 sub rc,rc,r0 // adjust count
259 mtctr r0 // set up for byte loop
260 beq LRevAligned // already aligned
267 // Destination is 8-byte aligned.
270 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
271 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
272 mtcrf 0x01,rc // put length bits 28-31 in cr7
273 beq LShortReverse64 // no 64-byte chunks
277 // Loop over 64-byte chunks (reverse).
301 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,0)