]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/bcopy_g3.s
xnu-1228.12.14.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bcopy_g3.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* =======================================
29 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
30 * =======================================
31 *
32 * Version of 2/20/2003, tuned for G3.
33 *
34 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
35 * environment.
36 *
37 * r0 = "w7" or temp
38 * r2 = "w8"
39 * r3 = not used, as memcpy and memmove return 1st parameter as a value
40 * r4 = source ptr ("rs")
41 * r5 = count of bytes to move ("rc")
42 * r6 = "w1"
43 * r7 = "w2"
44 * r8 = "w3"
45 * r9 = "w4"
46 * r10 = "w5"
47 * r11 = "w6"
48 * r12 = destination ptr ("rd")
49 * f0-f3 = used for moving 8-byte aligned data
50 */
51 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
52 #define rd r12
53 #define rc r5
54
55 #define w1 r6
56 #define w2 r7
57 #define w3 r8
58 #define w4 r9
59 #define w5 r10
60 #define w6 r11
61 #define w7 r0
62 #define w8 r2
63
64 #include <sys/appleapiopts.h>
65 #include <ppc/asm.h>
66 #include <machine/cpu_capabilities.h>
67 #include <machine/commpage.h>
68
69 .text
70
71
72 #define kLong 33 // too long for string ops
73
74
75 // Main entry points.
76
77 .align 5
78 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
79 cmplwi rc,kLong // length > 32 bytes?
80 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
81 mr rd,r4 // start to move source & dest to canonic spot
82 bge LLong0 // skip if long operand
83 mtxer rc // set length for string ops
84 lswx r5,0,r3 // load bytes into r5-r12
85 stswx r5,0,r4 // store them
86 blr
87
88 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
89
90 .align 5
91 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
92 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
93 cmplwi rc,kLong // length > 32 bytes?
94 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
95 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
96 bge LLong1 // longer than 32 bytes
97 mtxer rc // set length for string ops
98 lswx r5,0,r4 // load bytes into r5-r12
99 stswx r5,0,r3 // store them
100 blr
101
102 // Long operands (more than 32 bytes.)
103 // w1 = (rd-rs), used to check for alignment
104
105 LLong0: // enter from bcopy()
106 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
107 LLong1: // enter from memcpy() and memmove()
108 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
109 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
110 neg w2,rd // prepare to align destination
111 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
112 blt cr1,LLongReverse // handle reverse move
113 andi. w4,w2,3 // w4 <- #bytes to word align destination
114 beq cr5,LLongFloat // relatively aligned so use FPRs
115 sub rc,rc,w4 // adjust count for alignment
116 srwi r0,rc,5 // get #chunks to xfer (>=1)
117 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
118 mtctr r0 // set up loop count
119 beq 1f // dest already word aligned
120
121 // Word align the destination.
122
123 mtxer w4 // byte count to xer
124 cmpwi r0,0 // any chunks to xfer?
125 lswx w1,0,rs // move w4 bytes to align dest
126 add rs,rs,w4
127 stswx w1,0,rd
128 add rd,rd,w4
129 beq- 2f // pathologic case, no chunks to xfer
130
131 // Forward, unaligned loop.
132
133 1:
134 lwz w1,0(rs)
135 lwz w2,4(rs)
136 lwz w3,8(rs)
137 lwz w4,12(rs)
138 lwz w5,16(rs)
139 lwz w6,20(rs)
140 lwz w7,24(rs)
141 lwz w8,28(rs)
142 addi rs,rs,32
143 stw w1,0(rd)
144 stw w2,4(rd)
145 stw w3,8(rd)
146 stw w4,12(rd)
147 stw w5,16(rd)
148 stw w6,20(rd)
149 stw w7,24(rd)
150 stw w8,28(rd)
151 addi rd,rd,32
152 bdnz 1b
153 2: // rc = remaining bytes (0-31)
154 mtxer rc // set up count for string ops
155 mr r0,rd // move dest ptr out of the way
156 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
157 stswx r5,0,r0 // store them
158 blr
159
160
161
162 // Forward, aligned loop. We use FPRs.
163
164 LLongFloat:
165 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
166 sub rc,rc,w4 // adjust count for alignment
167 srwi r0,rc,5 // number of 32-byte chunks to xfer
168 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
169 mtctr r0 // set up loop count
170 beq 1f // dest already doubleword aligned
171
172 // Doubleword align the destination.
173
174 mtxer w4 // byte count to xer
175 cmpwi r0,0 // any chunks to xfer?
176 lswx w1,0,rs // move w4 bytes to align dest
177 add rs,rs,w4
178 stswx w1,0,rd
179 add rd,rd,w4
180 beq- 2f // pathologic case, no chunks to xfer
181 1: // loop over 32-byte chunks
182 lfd f0,0(rs)
183 lfd f1,8(rs)
184 lfd f2,16(rs)
185 lfd f3,24(rs)
186 addi rs,rs,32
187 stfd f0,0(rd)
188 stfd f1,8(rd)
189 stfd f2,16(rd)
190 stfd f3,24(rd)
191 addi rd,rd,32
192 bdnz 1b
193 2: // rc = remaining bytes (0-31)
194 mtxer rc // set up count for string ops
195 mr r0,rd // move dest ptr out of the way
196 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
197 stswx r5,0,r0 // store them
198 blr
199
200
201 // Long, reverse moves.
202 // cr5 = beq if relatively word aligned
203
204 LLongReverse:
205 add rd,rd,rc // point to end of operands + 1
206 add rs,rs,rc
207 beq cr5,LReverseFloat // aligned operands so can use FPRs
208 srwi r0,rc,5 // get chunk count
209 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
210 mtctr r0 // set up loop count
211 mtxer rc // set up for trailing bytes
212 1:
213 lwz w1,-4(rs)
214 lwz w2,-8(rs)
215 lwz w3,-12(rs)
216 lwz w4,-16(rs)
217 stw w1,-4(rd)
218 lwz w5,-20(rs)
219 stw w2,-8(rd)
220 lwz w6,-24(rs)
221 stw w3,-12(rd)
222 lwz w7,-28(rs)
223 stw w4,-16(rd)
224 lwzu w8,-32(rs)
225 stw w5,-20(rd)
226 stw w6,-24(rd)
227 stw w7,-28(rd)
228 stwu w8,-32(rd)
229 bdnz 1b
230
231 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
232 sub r0,rd,rc // move dest ptr out of way
233 lswx r5,0,r4 // load xer bytes into r5-r12
234 stswx r5,0,r0 // store them
235 blr
236
237
238 // Long, reverse aligned moves. We use FPRs.
239
240 LReverseFloat:
241 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
242 sub rc,rc,w4 // adjust count for alignment
243 srwi r0,rc,5 // number of 32-byte chunks to xfer
244 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
245 mtctr r0 // set up loop count
246 beq 1f // dest already doubleword aligned
247
248 // Doubleword align the destination.
249
250 mtxer w4 // byte count to xer
251 cmpwi r0,0 // any chunks to xfer?
252 sub rs,rs,w4 // point to 1st bytes to xfer
253 sub rd,rd,w4
254 lswx w1,0,rs // move w3 bytes to align dest
255 stswx w1,0,rd
256 beq- 2f // pathologic case, no chunks to xfer
257 1:
258 lfd f0,-8(rs)
259 lfd f1,-16(rs)
260 lfd f2,-24(rs)
261 lfdu f3,-32(rs)
262 stfd f0,-8(rd)
263 stfd f1,-16(rd)
264 stfd f2,-24(rd)
265 stfdu f3,-32(rd)
266 bdnz 1b
267 2: // rc = remaining bytes (0-31)
268 mtxer rc // set up count for string ops
269 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
270 sub r0,rd,rc // move dest ptr out of way
271 lswx r5,0,r4 // load xer bytes into r5-r12
272 stswx r5,0,r0 // store them
273 blr
274
275 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)