2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
23 /* =======================================
24 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
25 * =======================================
27 * Version of 2/20/2003, tuned for G3.
29 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
34 * r3 = not used, as memcpy and memmove return 1st parameter as a value
35 * r4 = source ptr ("rs")
36 * r5 = count of bytes to move ("rc")
43 * r12 = destination ptr ("rd")
44 * f0-f3 = used for moving 8-byte aligned data
46 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
60 #include <sys/appleapiopts.h>
62 #include <machine/cpu_capabilities.h>
63 #include <machine/commpage.h>
68 #define kLong 33 // too long for string ops
74 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
75 cmplwi rc,kLong // length > 32 bytes?
76 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
77 mr rd,r4 // start to move source & dest to canonic spot
78 bge LLong0 // skip if long operand
79 mtxer rc // set length for string ops
80 lswx r5,0,r3 // load bytes into r5-r12
81 stswx r5,0,r4 // store them
84 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
87 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
88 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
89 cmplwi rc,kLong // length > 32 bytes?
90 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
91 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
92 bge LLong1 // longer than 32 bytes
93 mtxer rc // set length for string ops
94 lswx r5,0,r4 // load bytes into r5-r12
95 stswx r5,0,r3 // store them
98 // Long operands (more than 32 bytes.)
99 // w1 = (rd-rs), used to check for alignment
101 LLong0: // enter from bcopy()
102 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
103 LLong1: // enter from memcpy() and memmove()
104 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
105 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
106 neg w2,rd // prepare to align destination
107 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
108 blt cr1,LLongReverse // handle reverse move
109 andi. w4,w2,3 // w4 <- #bytes to word align destination
110 beq cr5,LLongFloat // relatively aligned so use FPRs
111 sub rc,rc,w4 // adjust count for alignment
112 srwi r0,rc,5 // get #chunks to xfer (>=1)
113 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
114 mtctr r0 // set up loop count
115 beq 1f // dest already word aligned
117 // Word align the destination.
119 mtxer w4 // byte count to xer
120 cmpwi r0,0 // any chunks to xfer?
121 lswx w1,0,rs // move w4 bytes to align dest
125 beq- 2f // pathologic case, no chunks to xfer
127 // Forward, unaligned loop.
149 2: // rc = remaining bytes (0-31)
150 mtxer rc // set up count for string ops
151 mr r0,rd // move dest ptr out of the way
152 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
153 stswx r5,0,r0 // store them
158 // Forward, aligned loop. We use FPRs.
161 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
162 sub rc,rc,w4 // adjust count for alignment
163 srwi r0,rc,5 // number of 32-byte chunks to xfer
164 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
165 mtctr r0 // set up loop count
166 beq 1f // dest already doubleword aligned
168 // Doubleword align the destination.
170 mtxer w4 // byte count to xer
171 cmpwi r0,0 // any chunks to xfer?
172 lswx w1,0,rs // move w4 bytes to align dest
176 beq- 2f // pathologic case, no chunks to xfer
177 1: // loop over 32-byte chunks
189 2: // rc = remaining bytes (0-31)
190 mtxer rc // set up count for string ops
191 mr r0,rd // move dest ptr out of the way
192 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
193 stswx r5,0,r0 // store them
197 // Long, reverse moves.
198 // cr5 = beq if relatively word aligned
201 add rd,rd,rc // point to end of operands + 1
203 beq cr5,LReverseFloat // aligned operands so can use FPRs
204 srwi r0,rc,5 // get chunk count
205 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
206 mtctr r0 // set up loop count
207 mtxer rc // set up for trailing bytes
227 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
228 sub r0,rd,rc // move dest ptr out of way
229 lswx r5,0,r4 // load xer bytes into r5-r12
230 stswx r5,0,r0 // store them
234 // Long, reverse aligned moves. We use FPRs.
237 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
238 sub rc,rc,w4 // adjust count for alignment
239 srwi r0,rc,5 // number of 32-byte chunks to xfer
240 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
241 mtctr r0 // set up loop count
242 beq 1f // dest already doubleword aligned
244 // Doubleword align the destination.
246 mtxer w4 // byte count to xer
247 cmpwi r0,0 // any chunks to xfer?
248 sub rs,rs,w4 // point to 1st bytes to xfer
250 lswx w1,0,rs // move w3 bytes to align dest
252 beq- 2f // pathologic case, no chunks to xfer
263 2: // rc = remaining bytes (0-31)
264 mtxer rc // set up count for string ops
265 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
266 sub r0,rd,rc // move dest ptr out of way
267 lswx r5,0,r4 // load xer bytes into r5-r12
268 stswx r5,0,r0 // store them
271 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)