]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/commpage/bcopy_g3.s
xnu-792.13.8.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bcopy_g3.s
CommitLineData
55e303ae
A
1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
55e303ae 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
55e303ae
A
29 */
30/* =======================================
31 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
32 * =======================================
33 *
34 * Version of 2/20/2003, tuned for G3.
35 *
36 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
37 * environment.
38 *
39 * r0 = "w7" or temp
40 * r2 = "w8"
41 * r3 = not used, as memcpy and memmove return 1st parameter as a value
42 * r4 = source ptr ("rs")
43 * r5 = count of bytes to move ("rc")
44 * r6 = "w1"
45 * r7 = "w2"
46 * r8 = "w3"
47 * r9 = "w4"
48 * r10 = "w5"
49 * r11 = "w6"
50 * r12 = destination ptr ("rd")
51 * f0-f3 = used for moving 8-byte aligned data
52 */
53#define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
54#define rd r12
55#define rc r5
56
57#define w1 r6
58#define w2 r7
59#define w3 r8
60#define w4 r9
61#define w5 r10
62#define w6 r11
63#define w7 r0
64#define w8 r2
65
66#define ASSEMBLER
67#include <sys/appleapiopts.h>
68#include <ppc/asm.h>
69#include <machine/cpu_capabilities.h>
70#include <machine/commpage.h>
71
72 .text
55e303ae
A
73
74
75#define kLong 33 // too long for string ops
76
77
78// Main entry points.
79
80 .align 5
81bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
82 cmplwi rc,kLong // length > 32 bytes?
83 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
84 mr rd,r4 // start to move source & dest to canonic spot
85 bge LLong0 // skip if long operand
86 mtxer rc // set length for string ops
87 lswx r5,0,r3 // load bytes into r5-r12
88 stswx r5,0,r4 // store them
89 blr
90
91// NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
92
93 .align 5
94Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
95Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
96 cmplwi rc,kLong // length > 32 bytes?
97 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
98 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
99 bge LLong1 // longer than 32 bytes
100 mtxer rc // set length for string ops
101 lswx r5,0,r4 // load bytes into r5-r12
102 stswx r5,0,r3 // store them
103 blr
104
105// Long operands (more than 32 bytes.)
106// w1 = (rd-rs), used to check for alignment
107
108LLong0: // enter from bcopy()
109 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
110LLong1: // enter from memcpy() and memmove()
111 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
112 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
113 neg w2,rd // prepare to align destination
114 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
115 blt cr1,LLongReverse // handle reverse move
116 andi. w4,w2,3 // w4 <- #bytes to word align destination
117 beq cr5,LLongFloat // relatively aligned so use FPRs
118 sub rc,rc,w4 // adjust count for alignment
119 srwi r0,rc,5 // get #chunks to xfer (>=1)
120 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
121 mtctr r0 // set up loop count
122 beq 1f // dest already word aligned
123
124// Word align the destination.
125
126 mtxer w4 // byte count to xer
127 cmpwi r0,0 // any chunks to xfer?
128 lswx w1,0,rs // move w4 bytes to align dest
129 add rs,rs,w4
130 stswx w1,0,rd
131 add rd,rd,w4
132 beq- 2f // pathologic case, no chunks to xfer
133
134// Forward, unaligned loop.
135
1361:
137 lwz w1,0(rs)
138 lwz w2,4(rs)
139 lwz w3,8(rs)
140 lwz w4,12(rs)
141 lwz w5,16(rs)
142 lwz w6,20(rs)
143 lwz w7,24(rs)
144 lwz w8,28(rs)
145 addi rs,rs,32
146 stw w1,0(rd)
147 stw w2,4(rd)
148 stw w3,8(rd)
149 stw w4,12(rd)
150 stw w5,16(rd)
151 stw w6,20(rd)
152 stw w7,24(rd)
153 stw w8,28(rd)
154 addi rd,rd,32
155 bdnz 1b
1562: // rc = remaining bytes (0-31)
157 mtxer rc // set up count for string ops
158 mr r0,rd // move dest ptr out of the way
159 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
160 stswx r5,0,r0 // store them
161 blr
162
163
164
165// Forward, aligned loop. We use FPRs.
166
167LLongFloat:
168 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
169 sub rc,rc,w4 // adjust count for alignment
170 srwi r0,rc,5 // number of 32-byte chunks to xfer
171 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
172 mtctr r0 // set up loop count
173 beq 1f // dest already doubleword aligned
174
175// Doubleword align the destination.
176
177 mtxer w4 // byte count to xer
178 cmpwi r0,0 // any chunks to xfer?
179 lswx w1,0,rs // move w4 bytes to align dest
180 add rs,rs,w4
181 stswx w1,0,rd
182 add rd,rd,w4
183 beq- 2f // pathologic case, no chunks to xfer
1841: // loop over 32-byte chunks
185 lfd f0,0(rs)
186 lfd f1,8(rs)
187 lfd f2,16(rs)
188 lfd f3,24(rs)
189 addi rs,rs,32
190 stfd f0,0(rd)
191 stfd f1,8(rd)
192 stfd f2,16(rd)
193 stfd f3,24(rd)
194 addi rd,rd,32
195 bdnz 1b
1962: // rc = remaining bytes (0-31)
197 mtxer rc // set up count for string ops
198 mr r0,rd // move dest ptr out of the way
199 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
200 stswx r5,0,r0 // store them
201 blr
202
203
204// Long, reverse moves.
205// cr5 = beq if relatively word aligned
206
207LLongReverse:
208 add rd,rd,rc // point to end of operands + 1
209 add rs,rs,rc
210 beq cr5,LReverseFloat // aligned operands so can use FPRs
211 srwi r0,rc,5 // get chunk count
212 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
213 mtctr r0 // set up loop count
214 mtxer rc // set up for trailing bytes
2151:
216 lwz w1,-4(rs)
217 lwz w2,-8(rs)
218 lwz w3,-12(rs)
219 lwz w4,-16(rs)
220 stw w1,-4(rd)
221 lwz w5,-20(rs)
222 stw w2,-8(rd)
223 lwz w6,-24(rs)
224 stw w3,-12(rd)
225 lwz w7,-28(rs)
226 stw w4,-16(rd)
227 lwzu w8,-32(rs)
228 stw w5,-20(rd)
229 stw w6,-24(rd)
230 stw w7,-28(rd)
231 stwu w8,-32(rd)
232 bdnz 1b
233
234 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
235 sub r0,rd,rc // move dest ptr out of way
236 lswx r5,0,r4 // load xer bytes into r5-r12
237 stswx r5,0,r0 // store them
238 blr
239
240
241// Long, reverse aligned moves. We use FPRs.
242
243LReverseFloat:
244 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
245 sub rc,rc,w4 // adjust count for alignment
246 srwi r0,rc,5 // number of 32-byte chunks to xfer
247 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
248 mtctr r0 // set up loop count
249 beq 1f // dest already doubleword aligned
250
251// Doubleword align the destination.
252
253 mtxer w4 // byte count to xer
254 cmpwi r0,0 // any chunks to xfer?
255 sub rs,rs,w4 // point to 1st bytes to xfer
256 sub rd,rd,w4
257 lswx w1,0,rs // move w3 bytes to align dest
258 stswx w1,0,rd
259 beq- 2f // pathologic case, no chunks to xfer
2601:
261 lfd f0,-8(rs)
262 lfd f1,-16(rs)
263 lfd f2,-24(rs)
264 lfdu f3,-32(rs)
265 stfd f0,-8(rd)
266 stfd f1,-16(rd)
267 stfd f2,-24(rd)
268 stfdu f3,-32(rd)
269 bdnz 1b
2702: // rc = remaining bytes (0-31)
271 mtxer rc // set up count for string ops
272 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
273 sub r0,rd,rc // move dest ptr out of way
274 lswx r5,0,r4 // load xer bytes into r5-r12
275 stswx r5,0,r0 // store them
276 blr
277
91447636 278 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)