2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* =======================================
26 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
27 * =======================================
29 * Version of 2/20/2003, tuned for G3.
31 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
36 * r3 = not used, as memcpy and memmove return 1st parameter as a value
37 * r4 = source ptr ("rs")
38 * r5 = count of bytes to move ("rc")
45 * r12 = destination ptr ("rd")
46 * f0-f3 = used for moving 8-byte aligned data
48 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
62 #include <sys/appleapiopts.h>
64 #include <machine/cpu_capabilities.h>
65 #include <machine/commpage.h>
71 #define kLong 33 // too long for string ops
77 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
78 cmplwi rc,kLong // length > 32 bytes?
79 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
80 mr rd,r4 // start to move source & dest to canonic spot
81 bge LLong0 // skip if long operand
82 mtxer rc // set length for string ops
83 lswx r5,0,r3 // load bytes into r5-r12
84 stswx r5,0,r4 // store them
87 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
90 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
91 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
92 cmplwi rc,kLong // length > 32 bytes?
93 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
94 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
95 bge LLong1 // longer than 32 bytes
96 mtxer rc // set length for string ops
97 lswx r5,0,r4 // load bytes into r5-r12
98 stswx r5,0,r3 // store them
101 // Long operands (more than 32 bytes.)
102 // w1 = (rd-rs), used to check for alignment
104 LLong0: // enter from bcopy()
105 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
106 LLong1: // enter from memcpy() and memmove()
107 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
108 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
109 neg w2,rd // prepare to align destination
110 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
111 blt cr1,LLongReverse // handle reverse move
112 andi. w4,w2,3 // w4 <- #bytes to word align destination
113 beq cr5,LLongFloat // relatively aligned so use FPRs
114 sub rc,rc,w4 // adjust count for alignment
115 srwi r0,rc,5 // get #chunks to xfer (>=1)
116 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
117 mtctr r0 // set up loop count
118 beq 1f // dest already word aligned
120 // Word align the destination.
122 mtxer w4 // byte count to xer
123 cmpwi r0,0 // any chunks to xfer?
124 lswx w1,0,rs // move w4 bytes to align dest
128 beq- 2f // pathologic case, no chunks to xfer
130 // Forward, unaligned loop.
152 2: // rc = remaining bytes (0-31)
153 mtxer rc // set up count for string ops
154 mr r0,rd // move dest ptr out of the way
155 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
156 stswx r5,0,r0 // store them
161 // Forward, aligned loop. We use FPRs.
164 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
165 sub rc,rc,w4 // adjust count for alignment
166 srwi r0,rc,5 // number of 32-byte chunks to xfer
167 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
168 mtctr r0 // set up loop count
169 beq 1f // dest already doubleword aligned
171 // Doubleword align the destination.
173 mtxer w4 // byte count to xer
174 cmpwi r0,0 // any chunks to xfer?
175 lswx w1,0,rs // move w4 bytes to align dest
179 beq- 2f // pathologic case, no chunks to xfer
180 1: // loop over 32-byte chunks
192 2: // rc = remaining bytes (0-31)
193 mtxer rc // set up count for string ops
194 mr r0,rd // move dest ptr out of the way
195 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
196 stswx r5,0,r0 // store them
200 // Long, reverse moves.
201 // cr5 = beq if relatively word aligned
204 add rd,rd,rc // point to end of operands + 1
206 beq cr5,LReverseFloat // aligned operands so can use FPRs
207 srwi r0,rc,5 // get chunk count
208 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
209 mtctr r0 // set up loop count
210 mtxer rc // set up for trailing bytes
230 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
231 sub r0,rd,rc // move dest ptr out of way
232 lswx r5,0,r4 // load xer bytes into r5-r12
233 stswx r5,0,r0 // store them
237 // Long, reverse aligned moves. We use FPRs.
240 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
241 sub rc,rc,w4 // adjust count for alignment
242 srwi r0,rc,5 // number of 32-byte chunks to xfer
243 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
244 mtctr r0 // set up loop count
245 beq 1f // dest already doubleword aligned
247 // Doubleword align the destination.
249 mtxer w4 // byte count to xer
250 cmpwi r0,0 // any chunks to xfer?
251 sub rs,rs,w4 // point to 1st bytes to xfer
253 lswx w1,0,rs // move w3 bytes to align dest
255 beq- 2f // pathologic case, no chunks to xfer
266 2: // rc = remaining bytes (0-31)
267 mtxer rc // set up count for string ops
268 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
269 sub r0,rd,rc // move dest ptr out of way
270 lswx r5,0,r4 // load xer bytes into r5-r12
271 stswx r5,0,r0 // store them
274 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,0)