2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
28 /* =======================================
29 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
30 * =======================================
32 * Version of 2/20/2003, tuned for G3.
34 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
39 * r3 = not used, as memcpy and memmove return 1st parameter as a value
40 * r4 = source ptr ("rs")
41 * r5 = count of bytes to move ("rc")
48 * r12 = destination ptr ("rd")
49 * f0-f3 = used for moving 8-byte aligned data
51 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
64 #include <sys/appleapiopts.h>
66 #include <machine/cpu_capabilities.h>
67 #include <machine/commpage.h>
72 #define kLong 33 // too long for string ops
78 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
79 cmplwi rc,kLong // length > 32 bytes?
80 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
81 mr rd,r4 // start to move source & dest to canonic spot
82 bge LLong0 // skip if long operand
83 mtxer rc // set length for string ops
84 lswx r5,0,r3 // load bytes into r5-r12
85 stswx r5,0,r4 // store them
88 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
91 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
92 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
93 cmplwi rc,kLong // length > 32 bytes?
94 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
95 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
96 bge LLong1 // longer than 32 bytes
97 mtxer rc // set length for string ops
98 lswx r5,0,r4 // load bytes into r5-r12
99 stswx r5,0,r3 // store them
102 // Long operands (more than 32 bytes.)
103 // w1 = (rd-rs), used to check for alignment
105 LLong0: // enter from bcopy()
106 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
107 LLong1: // enter from memcpy() and memmove()
108 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
109 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
110 neg w2,rd // prepare to align destination
111 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
112 blt cr1,LLongReverse // handle reverse move
113 andi. w4,w2,3 // w4 <- #bytes to word align destination
114 beq cr5,LLongFloat // relatively aligned so use FPRs
115 sub rc,rc,w4 // adjust count for alignment
116 srwi r0,rc,5 // get #chunks to xfer (>=1)
117 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
118 mtctr r0 // set up loop count
119 beq 1f // dest already word aligned
121 // Word align the destination.
123 mtxer w4 // byte count to xer
124 cmpwi r0,0 // any chunks to xfer?
125 lswx w1,0,rs // move w4 bytes to align dest
129 beq- 2f // pathologic case, no chunks to xfer
131 // Forward, unaligned loop.
153 2: // rc = remaining bytes (0-31)
154 mtxer rc // set up count for string ops
155 mr r0,rd // move dest ptr out of the way
156 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
157 stswx r5,0,r0 // store them
162 // Forward, aligned loop. We use FPRs.
165 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
166 sub rc,rc,w4 // adjust count for alignment
167 srwi r0,rc,5 // number of 32-byte chunks to xfer
168 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
169 mtctr r0 // set up loop count
170 beq 1f // dest already doubleword aligned
172 // Doubleword align the destination.
174 mtxer w4 // byte count to xer
175 cmpwi r0,0 // any chunks to xfer?
176 lswx w1,0,rs // move w4 bytes to align dest
180 beq- 2f // pathologic case, no chunks to xfer
181 1: // loop over 32-byte chunks
193 2: // rc = remaining bytes (0-31)
194 mtxer rc // set up count for string ops
195 mr r0,rd // move dest ptr out of the way
196 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
197 stswx r5,0,r0 // store them
201 // Long, reverse moves.
202 // cr5 = beq if relatively word aligned
205 add rd,rd,rc // point to end of operands + 1
207 beq cr5,LReverseFloat // aligned operands so can use FPRs
208 srwi r0,rc,5 // get chunk count
209 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
210 mtctr r0 // set up loop count
211 mtxer rc // set up for trailing bytes
231 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
232 sub r0,rd,rc // move dest ptr out of way
233 lswx r5,0,r4 // load xer bytes into r5-r12
234 stswx r5,0,r0 // store them
238 // Long, reverse aligned moves. We use FPRs.
241 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
242 sub rc,rc,w4 // adjust count for alignment
243 srwi r0,rc,5 // number of 32-byte chunks to xfer
244 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
245 mtctr r0 // set up loop count
246 beq 1f // dest already doubleword aligned
248 // Doubleword align the destination.
250 mtxer w4 // byte count to xer
251 cmpwi r0,0 // any chunks to xfer?
252 sub rs,rs,w4 // point to 1st bytes to xfer
254 lswx w1,0,rs // move w3 bytes to align dest
256 beq- 2f // pathologic case, no chunks to xfer
267 2: // rc = remaining bytes (0-31)
268 mtxer rc // set up count for string ops
269 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
270 sub r0,rd,rc // move dest ptr out of way
271 lswx r5,0,r4 // load xer bytes into r5-r12
272 stswx r5,0,r0 // store them
275 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)