2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* =======================================
23 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
24 * =======================================
26 * Version of 2/20/2003, tuned for G3.
28 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
33 * r3 = not used, as memcpy and memmove return 1st parameter as a value
34 * r4 = source ptr ("rs")
35 * r5 = count of bytes to move ("rc")
42 * r12 = destination ptr ("rd")
43 * f0-f3 = used for moving 8-byte aligned data
45 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
59 #include <sys/appleapiopts.h>
61 #include <machine/cpu_capabilities.h>
62 #include <machine/commpage.h>
67 #define kLong 33 // too long for string ops
73 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
74 cmplwi rc,kLong // length > 32 bytes?
75 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
76 mr rd,r4 // start to move source & dest to canonic spot
77 bge LLong0 // skip if long operand
78 mtxer rc // set length for string ops
79 lswx r5,0,r3 // load bytes into r5-r12
80 stswx r5,0,r4 // store them
83 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
86 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
87 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
88 cmplwi rc,kLong // length > 32 bytes?
89 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
90 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
91 bge LLong1 // longer than 32 bytes
92 mtxer rc // set length for string ops
93 lswx r5,0,r4 // load bytes into r5-r12
94 stswx r5,0,r3 // store them
97 // Long operands (more than 32 bytes.)
98 // w1 = (rd-rs), used to check for alignment
100 LLong0: // enter from bcopy()
101 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
102 LLong1: // enter from memcpy() and memmove()
103 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
104 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
105 neg w2,rd // prepare to align destination
106 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
107 blt cr1,LLongReverse // handle reverse move
108 andi. w4,w2,3 // w4 <- #bytes to word align destination
109 beq cr5,LLongFloat // relatively aligned so use FPRs
110 sub rc,rc,w4 // adjust count for alignment
111 srwi r0,rc,5 // get #chunks to xfer (>=1)
112 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
113 mtctr r0 // set up loop count
114 beq 1f // dest already word aligned
116 // Word align the destination.
118 mtxer w4 // byte count to xer
119 cmpwi r0,0 // any chunks to xfer?
120 lswx w1,0,rs // move w4 bytes to align dest
124 beq- 2f // pathologic case, no chunks to xfer
126 // Forward, unaligned loop.
148 2: // rc = remaining bytes (0-31)
149 mtxer rc // set up count for string ops
150 mr r0,rd // move dest ptr out of the way
151 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
152 stswx r5,0,r0 // store them
157 // Forward, aligned loop. We use FPRs.
160 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
161 sub rc,rc,w4 // adjust count for alignment
162 srwi r0,rc,5 // number of 32-byte chunks to xfer
163 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
164 mtctr r0 // set up loop count
165 beq 1f // dest already doubleword aligned
167 // Doubleword align the destination.
169 mtxer w4 // byte count to xer
170 cmpwi r0,0 // any chunks to xfer?
171 lswx w1,0,rs // move w4 bytes to align dest
175 beq- 2f // pathologic case, no chunks to xfer
176 1: // loop over 32-byte chunks
188 2: // rc = remaining bytes (0-31)
189 mtxer rc // set up count for string ops
190 mr r0,rd // move dest ptr out of the way
191 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
192 stswx r5,0,r0 // store them
196 // Long, reverse moves.
197 // cr5 = beq if relatively word aligned
200 add rd,rd,rc // point to end of operands + 1
202 beq cr5,LReverseFloat // aligned operands so can use FPRs
203 srwi r0,rc,5 // get chunk count
204 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
205 mtctr r0 // set up loop count
206 mtxer rc // set up for trailing bytes
226 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
227 sub r0,rd,rc // move dest ptr out of way
228 lswx r5,0,r4 // load xer bytes into r5-r12
229 stswx r5,0,r0 // store them
233 // Long, reverse aligned moves. We use FPRs.
236 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
237 sub rc,rc,w4 // adjust count for alignment
238 srwi r0,rc,5 // number of 32-byte chunks to xfer
239 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
240 mtctr r0 // set up loop count
241 beq 1f // dest already doubleword aligned
243 // Doubleword align the destination.
245 mtxer w4 // byte count to xer
246 cmpwi r0,0 // any chunks to xfer?
247 sub rs,rs,w4 // point to 1st bytes to xfer
249 lswx w1,0,rs // move w3 bytes to align dest
251 beq- 2f // pathologic case, no chunks to xfer
262 2: // rc = remaining bytes (0-31)
263 mtxer rc // set up count for string ops
264 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
265 sub r0,rd,rc // move dest ptr out of way
266 lswx r5,0,r4 // load xer bytes into r5-r12
267 stswx r5,0,r0 // store them
270 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)