]> git.saurik.com Git - apple/xnu.git/blame_incremental - osfmk/ppc/commpage/bcopy_g3.s
xnu-792.25.20.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bcopy_g3.s
... / ...
CommitLineData
1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* =======================================
23 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
24 * =======================================
25 *
26 * Version of 2/20/2003, tuned for G3.
27 *
28 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
29 * environment.
30 *
31 * r0 = "w7" or temp
32 * r2 = "w8"
33 * r3 = not used, as memcpy and memmove return 1st parameter as a value
34 * r4 = source ptr ("rs")
35 * r5 = count of bytes to move ("rc")
36 * r6 = "w1"
37 * r7 = "w2"
38 * r8 = "w3"
39 * r9 = "w4"
40 * r10 = "w5"
41 * r11 = "w6"
42 * r12 = destination ptr ("rd")
43 * f0-f3 = used for moving 8-byte aligned data
44 */
45#define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
46#define rd r12
47#define rc r5
48
49#define w1 r6
50#define w2 r7
51#define w3 r8
52#define w4 r9
53#define w5 r10
54#define w6 r11
55#define w7 r0
56#define w8 r2
57
58#define ASSEMBLER
59#include <sys/appleapiopts.h>
60#include <ppc/asm.h>
61#include <machine/cpu_capabilities.h>
62#include <machine/commpage.h>
63
64 .text
65
66
67#define kLong 33 // too long for string ops
68
69
70// Main entry points.
71
72 .align 5
73bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
74 cmplwi rc,kLong // length > 32 bytes?
75 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
76 mr rd,r4 // start to move source & dest to canonic spot
77 bge LLong0 // skip if long operand
78 mtxer rc // set length for string ops
79 lswx r5,0,r3 // load bytes into r5-r12
80 stswx r5,0,r4 // store them
81 blr
82
83// NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
84
85 .align 5
86Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
87Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
88 cmplwi rc,kLong // length > 32 bytes?
89 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
90 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
91 bge LLong1 // longer than 32 bytes
92 mtxer rc // set length for string ops
93 lswx r5,0,r4 // load bytes into r5-r12
94 stswx r5,0,r3 // store them
95 blr
96
97// Long operands (more than 32 bytes.)
98// w1 = (rd-rs), used to check for alignment
99
100LLong0: // enter from bcopy()
101 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
102LLong1: // enter from memcpy() and memmove()
103 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
104 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
105 neg w2,rd // prepare to align destination
106 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
107 blt cr1,LLongReverse // handle reverse move
108 andi. w4,w2,3 // w4 <- #bytes to word align destination
109 beq cr5,LLongFloat // relatively aligned so use FPRs
110 sub rc,rc,w4 // adjust count for alignment
111 srwi r0,rc,5 // get #chunks to xfer (>=1)
112 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
113 mtctr r0 // set up loop count
114 beq 1f // dest already word aligned
115
116// Word align the destination.
117
118 mtxer w4 // byte count to xer
119 cmpwi r0,0 // any chunks to xfer?
120 lswx w1,0,rs // move w4 bytes to align dest
121 add rs,rs,w4
122 stswx w1,0,rd
123 add rd,rd,w4
124 beq- 2f // pathologic case, no chunks to xfer
125
126// Forward, unaligned loop.
127
1281:
129 lwz w1,0(rs)
130 lwz w2,4(rs)
131 lwz w3,8(rs)
132 lwz w4,12(rs)
133 lwz w5,16(rs)
134 lwz w6,20(rs)
135 lwz w7,24(rs)
136 lwz w8,28(rs)
137 addi rs,rs,32
138 stw w1,0(rd)
139 stw w2,4(rd)
140 stw w3,8(rd)
141 stw w4,12(rd)
142 stw w5,16(rd)
143 stw w6,20(rd)
144 stw w7,24(rd)
145 stw w8,28(rd)
146 addi rd,rd,32
147 bdnz 1b
1482: // rc = remaining bytes (0-31)
149 mtxer rc // set up count for string ops
150 mr r0,rd // move dest ptr out of the way
151 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
152 stswx r5,0,r0 // store them
153 blr
154
155
156
157// Forward, aligned loop. We use FPRs.
158
159LLongFloat:
160 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
161 sub rc,rc,w4 // adjust count for alignment
162 srwi r0,rc,5 // number of 32-byte chunks to xfer
163 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
164 mtctr r0 // set up loop count
165 beq 1f // dest already doubleword aligned
166
167// Doubleword align the destination.
168
169 mtxer w4 // byte count to xer
170 cmpwi r0,0 // any chunks to xfer?
171 lswx w1,0,rs // move w4 bytes to align dest
172 add rs,rs,w4
173 stswx w1,0,rd
174 add rd,rd,w4
175 beq- 2f // pathologic case, no chunks to xfer
1761: // loop over 32-byte chunks
177 lfd f0,0(rs)
178 lfd f1,8(rs)
179 lfd f2,16(rs)
180 lfd f3,24(rs)
181 addi rs,rs,32
182 stfd f0,0(rd)
183 stfd f1,8(rd)
184 stfd f2,16(rd)
185 stfd f3,24(rd)
186 addi rd,rd,32
187 bdnz 1b
1882: // rc = remaining bytes (0-31)
189 mtxer rc // set up count for string ops
190 mr r0,rd // move dest ptr out of the way
191 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
192 stswx r5,0,r0 // store them
193 blr
194
195
196// Long, reverse moves.
197// cr5 = beq if relatively word aligned
198
199LLongReverse:
200 add rd,rd,rc // point to end of operands + 1
201 add rs,rs,rc
202 beq cr5,LReverseFloat // aligned operands so can use FPRs
203 srwi r0,rc,5 // get chunk count
204 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
205 mtctr r0 // set up loop count
206 mtxer rc // set up for trailing bytes
2071:
208 lwz w1,-4(rs)
209 lwz w2,-8(rs)
210 lwz w3,-12(rs)
211 lwz w4,-16(rs)
212 stw w1,-4(rd)
213 lwz w5,-20(rs)
214 stw w2,-8(rd)
215 lwz w6,-24(rs)
216 stw w3,-12(rd)
217 lwz w7,-28(rs)
218 stw w4,-16(rd)
219 lwzu w8,-32(rs)
220 stw w5,-20(rd)
221 stw w6,-24(rd)
222 stw w7,-28(rd)
223 stwu w8,-32(rd)
224 bdnz 1b
225
226 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
227 sub r0,rd,rc // move dest ptr out of way
228 lswx r5,0,r4 // load xer bytes into r5-r12
229 stswx r5,0,r0 // store them
230 blr
231
232
233// Long, reverse aligned moves. We use FPRs.
234
235LReverseFloat:
236 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
237 sub rc,rc,w4 // adjust count for alignment
238 srwi r0,rc,5 // number of 32-byte chunks to xfer
239 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
240 mtctr r0 // set up loop count
241 beq 1f // dest already doubleword aligned
242
243// Doubleword align the destination.
244
245 mtxer w4 // byte count to xer
246 cmpwi r0,0 // any chunks to xfer?
247 sub rs,rs,w4 // point to 1st bytes to xfer
248 sub rd,rd,w4
249 lswx w1,0,rs // move w3 bytes to align dest
250 stswx w1,0,rd
251 beq- 2f // pathologic case, no chunks to xfer
2521:
253 lfd f0,-8(rs)
254 lfd f1,-16(rs)
255 lfd f2,-24(rs)
256 lfdu f3,-32(rs)
257 stfd f0,-8(rd)
258 stfd f1,-16(rd)
259 stfd f2,-24(rd)
260 stfdu f3,-32(rd)
261 bdnz 1b
2622: // rc = remaining bytes (0-31)
263 mtxer rc // set up count for string ops
264 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
265 sub r0,rd,rc // move dest ptr out of way
266 lswx r5,0,r4 // load xer bytes into r5-r12
267 stswx r5,0,r0 // store them
268 blr
269
270 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)