2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* =======================================
23 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
24 * =======================================
26 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
27 * This version might be used bringing up new processors, with known
28 * Altivec bugs that need to be worked around. It is not particularly well
31 * For 64-bit processors with a 128-byte cache line, running in either
32 * 32- or 64-bit mode. This is written for 32-bit execution, the kernel
33 * will translate to 64-bit code when it compiles the 64-bit commpage.
35 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
39 * r3 = not used, as memcpy and memmove return 1st parameter as a value
40 * r4 = source ptr ("rs")
41 * r5 = count of bytes to move ("rc")
48 * r12 = destination ptr ("rd")
65 #include <sys/appleapiopts.h>
67 #include <machine/cpu_capabilities.h>
68 #include <machine/commpage.h>
72 #define kLong 64 // too long for inline loopless code
78 bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
79 cmplwi rc,kLong // short or long?
80 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
81 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
82 mr rd,r4 // start to move registers to canonic spot
84 blt LShort // handle short operands
85 dcbt 0,r3 // touch in destination
86 b LLong // join medium/long operand code
88 // NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
91 Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
92 Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
93 cmplwi rc,kLong // short or long?
94 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
95 dcbt 0,r4 // touch in the first line of source
96 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
97 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
98 bge LLong // handle medium or long operands
100 // Handle short operands.
103 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
104 mtcrf 0x01,rc // put length bits 28-31 in cr7
105 blt cr1,LShortReverse
107 // Forward short operands. This is the most frequent case, so it is inline.
109 LShort64: // enter to xfer last 64 bytes
110 bf 26,0f // 64-byte chunk to xfer?
122 bf 27,1f // quadword to move?
130 bf 28,2f // doubleword?
142 bf 30,4f // halfword to move?
148 bflr 31 // skip if no odd byte
154 // Handle short reverse operands.
155 // cr6 = bits 26-27 of length
156 // cr7 = bits 28-31 of length
159 add rs,rs,rc // adjust ptrs for reverse move
161 LShortReverse64: // enter to xfer last 64 bytes
162 bf 26,0f // 64-byte chunk to xfer?
172 bf 27,1f // quadword to move?
178 bf 28,2f // doubleword?
186 bf 30,4f // halfword to move?
190 bflr 31 // done if no odd byte
191 lbz w1,-1(rs) // no update
197 // cr1 = blt iff we must move reverse
201 dcbtst 0,rd // touch in destination
202 neg w3,rd // start to compute #bytes to align destination
203 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
204 blt cr1,LLongReverse // handle reverse moves
205 mtctr w6 // set up for loop to align destination
206 sub rc,rc,w6 // adjust count
207 beq LAligned // destination already 8-byte aligned
215 // Destination is 8-byte aligned.
218 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
219 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
220 mtcrf 0x01,rc // put length bits 28-31 in cr7
221 beq LShort64 // no 64-byte chunks
225 // Loop moving 64-byte chunks.
252 // Handle reverse moves.
255 add rd,rd,rc // point to end of operands
257 andi. r0,rd,7 // is destination 8-byte aligned?
258 sub rc,rc,r0 // adjust count
259 mtctr r0 // set up for byte loop
260 beq LRevAligned // already aligned
267 // Destination is 8-byte aligned.
270 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
271 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
272 mtcrf 0x01,rc // put length bits 28-31 in cr7
273 beq LShortReverse64 // no 64-byte chunks
277 // Loop over 64-byte chunks (reverse).
301 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)