]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/bcopy_g3.s
xnu-792.6.56.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bcopy_g3.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* =======================================
24 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
25 * =======================================
26 *
27 * Version of 2/20/2003, tuned for G3.
28 *
29 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
30 * environment.
31 *
32 * r0 = "w7" or temp
33 * r2 = "w8"
34 * r3 = not used, as memcpy and memmove return 1st parameter as a value
35 * r4 = source ptr ("rs")
36 * r5 = count of bytes to move ("rc")
37 * r6 = "w1"
38 * r7 = "w2"
39 * r8 = "w3"
40 * r9 = "w4"
41 * r10 = "w5"
42 * r11 = "w6"
43 * r12 = destination ptr ("rd")
44 * f0-f3 = used for moving 8-byte aligned data
45 */
46 #define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
47 #define rd r12
48 #define rc r5
49
50 #define w1 r6
51 #define w2 r7
52 #define w3 r8
53 #define w4 r9
54 #define w5 r10
55 #define w6 r11
56 #define w7 r0
57 #define w8 r2
58
59 #define ASSEMBLER
60 #include <sys/appleapiopts.h>
61 #include <ppc/asm.h>
62 #include <machine/cpu_capabilities.h>
63 #include <machine/commpage.h>
64
65 .text
66
67
68 #define kLong 33 // too long for string ops
69
70
71 // Main entry points.
72
73 .align 5
74 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len)
75 cmplwi rc,kLong // length > 32 bytes?
76 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
77 mr rd,r4 // start to move source & dest to canonic spot
78 bge LLong0 // skip if long operand
79 mtxer rc // set length for string ops
80 lswx r5,0,r3 // load bytes into r5-r12
81 stswx r5,0,r4 // store them
82 blr
83
84 // NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
85
86 .align 5
87 Lmemcpy_g3: // void* memcpy(void *dst, void *src, size_t len)
88 Lmemmove_g3: // void* memmove(void *dst, const void *src, size_t len)
89 cmplwi rc,kLong // length > 32 bytes?
90 sub w1,r3,rs // must move in reverse if (rd-rs)<rc
91 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
92 bge LLong1 // longer than 32 bytes
93 mtxer rc // set length for string ops
94 lswx r5,0,r4 // load bytes into r5-r12
95 stswx r5,0,r3 // store them
96 blr
97
98 // Long operands (more than 32 bytes.)
99 // w1 = (rd-rs), used to check for alignment
100
101 LLong0: // enter from bcopy()
102 mr rs,r3 // must leave r3 alone (it is return value for memcpy)
103 LLong1: // enter from memcpy() and memmove()
104 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
105 rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
106 neg w2,rd // prepare to align destination
107 cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
108 blt cr1,LLongReverse // handle reverse move
109 andi. w4,w2,3 // w4 <- #bytes to word align destination
110 beq cr5,LLongFloat // relatively aligned so use FPRs
111 sub rc,rc,w4 // adjust count for alignment
112 srwi r0,rc,5 // get #chunks to xfer (>=1)
113 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
114 mtctr r0 // set up loop count
115 beq 1f // dest already word aligned
116
117 // Word align the destination.
118
119 mtxer w4 // byte count to xer
120 cmpwi r0,0 // any chunks to xfer?
121 lswx w1,0,rs // move w4 bytes to align dest
122 add rs,rs,w4
123 stswx w1,0,rd
124 add rd,rd,w4
125 beq- 2f // pathologic case, no chunks to xfer
126
127 // Forward, unaligned loop.
128
129 1:
130 lwz w1,0(rs)
131 lwz w2,4(rs)
132 lwz w3,8(rs)
133 lwz w4,12(rs)
134 lwz w5,16(rs)
135 lwz w6,20(rs)
136 lwz w7,24(rs)
137 lwz w8,28(rs)
138 addi rs,rs,32
139 stw w1,0(rd)
140 stw w2,4(rd)
141 stw w3,8(rd)
142 stw w4,12(rd)
143 stw w5,16(rd)
144 stw w6,20(rd)
145 stw w7,24(rd)
146 stw w8,28(rd)
147 addi rd,rd,32
148 bdnz 1b
149 2: // rc = remaining bytes (0-31)
150 mtxer rc // set up count for string ops
151 mr r0,rd // move dest ptr out of the way
152 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
153 stswx r5,0,r0 // store them
154 blr
155
156
157
158 // Forward, aligned loop. We use FPRs.
159
160 LLongFloat:
161 andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
162 sub rc,rc,w4 // adjust count for alignment
163 srwi r0,rc,5 // number of 32-byte chunks to xfer
164 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
165 mtctr r0 // set up loop count
166 beq 1f // dest already doubleword aligned
167
168 // Doubleword align the destination.
169
170 mtxer w4 // byte count to xer
171 cmpwi r0,0 // any chunks to xfer?
172 lswx w1,0,rs // move w4 bytes to align dest
173 add rs,rs,w4
174 stswx w1,0,rd
175 add rd,rd,w4
176 beq- 2f // pathologic case, no chunks to xfer
177 1: // loop over 32-byte chunks
178 lfd f0,0(rs)
179 lfd f1,8(rs)
180 lfd f2,16(rs)
181 lfd f3,24(rs)
182 addi rs,rs,32
183 stfd f0,0(rd)
184 stfd f1,8(rd)
185 stfd f2,16(rd)
186 stfd f3,24(rd)
187 addi rd,rd,32
188 bdnz 1b
189 2: // rc = remaining bytes (0-31)
190 mtxer rc // set up count for string ops
191 mr r0,rd // move dest ptr out of the way
192 lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
193 stswx r5,0,r0 // store them
194 blr
195
196
197 // Long, reverse moves.
198 // cr5 = beq if relatively word aligned
199
200 LLongReverse:
201 add rd,rd,rc // point to end of operands + 1
202 add rs,rs,rc
203 beq cr5,LReverseFloat // aligned operands so can use FPRs
204 srwi r0,rc,5 // get chunk count
205 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
206 mtctr r0 // set up loop count
207 mtxer rc // set up for trailing bytes
208 1:
209 lwz w1,-4(rs)
210 lwz w2,-8(rs)
211 lwz w3,-12(rs)
212 lwz w4,-16(rs)
213 stw w1,-4(rd)
214 lwz w5,-20(rs)
215 stw w2,-8(rd)
216 lwz w6,-24(rs)
217 stw w3,-12(rd)
218 lwz w7,-28(rs)
219 stw w4,-16(rd)
220 lwzu w8,-32(rs)
221 stw w5,-20(rd)
222 stw w6,-24(rd)
223 stw w7,-28(rd)
224 stwu w8,-32(rd)
225 bdnz 1b
226
227 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
228 sub r0,rd,rc // move dest ptr out of way
229 lswx r5,0,r4 // load xer bytes into r5-r12
230 stswx r5,0,r0 // store them
231 blr
232
233
234 // Long, reverse aligned moves. We use FPRs.
235
236 LReverseFloat:
237 andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
238 sub rc,rc,w4 // adjust count for alignment
239 srwi r0,rc,5 // number of 32-byte chunks to xfer
240 rlwinm rc,rc,0,0x1F // mask down to leftover bytes
241 mtctr r0 // set up loop count
242 beq 1f // dest already doubleword aligned
243
244 // Doubleword align the destination.
245
246 mtxer w4 // byte count to xer
247 cmpwi r0,0 // any chunks to xfer?
248 sub rs,rs,w4 // point to 1st bytes to xfer
249 sub rd,rd,w4
250 lswx w1,0,rs // move w3 bytes to align dest
251 stswx w1,0,rd
252 beq- 2f // pathologic case, no chunks to xfer
253 1:
254 lfd f0,-8(rs)
255 lfd f1,-16(rs)
256 lfd f2,-24(rs)
257 lfdu f3,-32(rs)
258 stfd f0,-8(rd)
259 stfd f1,-16(rd)
260 stfd f2,-24(rd)
261 stfdu f3,-32(rd)
262 bdnz 1b
263 2: // rc = remaining bytes (0-31)
264 mtxer rc // set up count for string ops
265 sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
266 sub r0,rd,rc // move dest ptr out of way
267 lswx r5,0,r4 // load xer bytes into r5-r12
268 stswx r5,0,r0 // store them
269 blr
270
271 COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)