]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/bcopy_g3.s
xnu-517.3.15.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bcopy_g3.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* =======================================
26 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
27 * =======================================
28 *
29 * Version of 2/20/2003, tuned for G3.
30 *
31 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
32 * environment.
33 *
34 * r0 = "w7" or temp
35 * r2 = "w8"
36 * r3 = not used, as memcpy and memmove return 1st parameter as a value
37 * r4 = source ptr ("rs")
38 * r5 = count of bytes to move ("rc")
39 * r6 = "w1"
40 * r7 = "w2"
41 * r8 = "w3"
42 * r9 = "w4"
43 * r10 = "w5"
44 * r11 = "w6"
45 * r12 = destination ptr ("rd")
46 * f0-f3 = used for moving 8-byte aligned data
47 */
48 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
49 #define rd r12
50 #define rc r5
51
52 #define w1 r6
53 #define w2 r7
54 #define w3 r8
55 #define w4 r9
56 #define w5 r10
57 #define w6 r11
58 #define w7 r0
59 #define w8 r2
60
61 #define ASSEMBLER
62 #include <sys/appleapiopts.h>
63 #include <ppc/asm.h>
64 #include <machine/cpu_capabilities.h>
65 #include <machine/commpage.h>
66
67 .text
68 .globl EXT(bcopy_g3)
69
70
71 #define kLong 33 // too long for string ops
72
73
74 // Main entry points.
75
76 .align 5
77 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
78 cmplwi rc,kLong // length > 32 bytes?
79 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
80 mr rd,r4 // start to move source & dest to canonic spot
81 bge LLong0 // skip if long operand
82 mtxer rc // set length for string ops
83 lswx r5,0,r3 // load bytes into r5-r12
84 stswx r5,0,r4 // store them
85 blr
86
87 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
88
89 .align 5
90 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
91 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
92 cmplwi rc,kLong // length > 32 bytes?
93 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
94 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
95 bge LLong1 // longer than 32 bytes
96 mtxer rc // set length for string ops
97 lswx r5,0,r4 // load bytes into r5-r12
98 stswx r5,0,r3 // store them
99 blr
100
101 // Long operands (more than 32 bytes.)
102 // w1 = (rd-rs), used to check for alignment
103
104 LLong0: // enter from bcopy()
105 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
106 LLong1: // enter from memcpy() and memmove()
107 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
108 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
109 neg w2,rd // prepare to align destination
110 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
111 blt cr1,LLongReverse // handle reverse move
112 andi. w4,w2,3 // w4 <- #bytes to word align destination
113 beq cr5,LLongFloat // relatively aligned so use FPRs
114 sub rc,rc,w4 // adjust count for alignment
115 srwi r0,rc,5 // get #chunks to xfer (>=1)
116 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
117 mtctr r0 // set up loop count
118 beq 1f // dest already word aligned
119
120 // Word align the destination.
121
122 mtxer w4 // byte count to xer
123 cmpwi r0,0 // any chunks to xfer?
124 lswx w1,0,rs // move w4 bytes to align dest
125 add rs,rs,w4
126 stswx w1,0,rd
127 add rd,rd,w4
128 beq- 2f // pathologic case, no chunks to xfer
129
130 // Forward, unaligned loop.
131
132 1:
133 lwz w1,0(rs)
134 lwz w2,4(rs)
135 lwz w3,8(rs)
136 lwz w4,12(rs)
137 lwz w5,16(rs)
138 lwz w6,20(rs)
139 lwz w7,24(rs)
140 lwz w8,28(rs)
141 addi rs,rs,32
142 stw w1,0(rd)
143 stw w2,4(rd)
144 stw w3,8(rd)
145 stw w4,12(rd)
146 stw w5,16(rd)
147 stw w6,20(rd)
148 stw w7,24(rd)
149 stw w8,28(rd)
150 addi rd,rd,32
151 bdnz 1b
152 2: // rc = remaining bytes (0-31)
153 mtxer rc // set up count for string ops
154 mr r0,rd // move dest ptr out of the way
155 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
156 stswx r5,0,r0 // store them
157 blr
158
159
160
161 // Forward, aligned loop. We use FPRs.
162
163 LLongFloat:
164 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
165 sub rc,rc,w4 // adjust count for alignment
166 srwi r0,rc,5 // number of 32-byte chunks to xfer
167 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
168 mtctr r0 // set up loop count
169 beq 1f // dest already doubleword aligned
170
171 // Doubleword align the destination.
172
173 mtxer w4 // byte count to xer
174 cmpwi r0,0 // any chunks to xfer?
175 lswx w1,0,rs // move w4 bytes to align dest
176 add rs,rs,w4
177 stswx w1,0,rd
178 add rd,rd,w4
179 beq- 2f // pathologic case, no chunks to xfer
180 1: // loop over 32-byte chunks
181 lfd f0,0(rs)
182 lfd f1,8(rs)
183 lfd f2,16(rs)
184 lfd f3,24(rs)
185 addi rs,rs,32
186 stfd f0,0(rd)
187 stfd f1,8(rd)
188 stfd f2,16(rd)
189 stfd f3,24(rd)
190 addi rd,rd,32
191 bdnz 1b
192 2: // rc = remaining bytes (0-31)
193 mtxer rc // set up count for string ops
194 mr r0,rd // move dest ptr out of the way
195 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
196 stswx r5,0,r0 // store them
197 blr
198
199
200 // Long, reverse moves.
201 // cr5 = beq if relatively word aligned
202
203 LLongReverse:
204 add rd,rd,rc // point to end of operands + 1
205 add rs,rs,rc
206 beq cr5,LReverseFloat // aligned operands so can use FPRs
207 srwi r0,rc,5 // get chunk count
208 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
209 mtctr r0 // set up loop count
210 mtxer rc // set up for trailing bytes
211 1:
212 lwz w1,-4(rs)
213 lwz w2,-8(rs)
214 lwz w3,-12(rs)
215 lwz w4,-16(rs)
216 stw w1,-4(rd)
217 lwz w5,-20(rs)
218 stw w2,-8(rd)
219 lwz w6,-24(rs)
220 stw w3,-12(rd)
221 lwz w7,-28(rs)
222 stw w4,-16(rd)
223 lwzu w8,-32(rs)
224 stw w5,-20(rd)
225 stw w6,-24(rd)
226 stw w7,-28(rd)
227 stwu w8,-32(rd)
228 bdnz 1b
229
230 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
231 sub r0,rd,rc // move dest ptr out of way
232 lswx r5,0,r4 // load xer bytes into r5-r12
233 stswx r5,0,r0 // store them
234 blr
235
236
237 // Long, reverse aligned moves. We use FPRs.
238
239 LReverseFloat:
240 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
241 sub rc,rc,w4 // adjust count for alignment
242 srwi r0,rc,5 // number of 32-byte chunks to xfer
243 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
244 mtctr r0 // set up loop count
245 beq 1f // dest already doubleword aligned
246
247 // Doubleword align the destination.
248
249 mtxer w4 // byte count to xer
250 cmpwi r0,0 // any chunks to xfer?
251 sub rs,rs,w4 // point to 1st bytes to xfer
252 sub rd,rd,w4
253 lswx w1,0,rs // move w3 bytes to align dest
254 stswx w1,0,rd
255 beq- 2f // pathologic case, no chunks to xfer
256 1:
257 lfd f0,-8(rs)
258 lfd f1,-16(rs)
259 lfd f2,-24(rs)
260 lfdu f3,-32(rs)
261 stfd f0,-8(rd)
262 stfd f1,-16(rd)
263 stfd f2,-24(rd)
264 stfdu f3,-32(rd)
265 bdnz 1b
266 2: // rc = remaining bytes (0-31)
267 mtxer rc // set up count for string ops
268 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
269 sub r0,rd,rc // move dest ptr out of way
270 lswx r5,0,r4 // load xer bytes into r5-r12
271 stswx r5,0,r0 // store them
272 blr
273
274 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,0)