]> git.saurik.com Git - apple/xnu.git/blame - osfmk/ppc/commpage/bcopy_64.s
xnu-517.12.7.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bcopy_64.s
CommitLineData
55e303ae
A
1/*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
55e303ae 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
55e303ae
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
55e303ae
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* =======================================
23 * BCOPY, MEMCPY, and MEMMOVE for Mac OS X
24 * =======================================
25 *
26 * Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
27 * This version might be used bringing up new processors, with known
28 * Altivec bugs that need to be worked around. It is not particularly well
29 * optimized.
30 *
31 * Register usage. Note we use R2, so this code will not run in a PEF/CFM
32 * environment.
33 * r0 = "w7" or temp
34 * r2 = "w8"
35 * r3 = not used, as memcpy and memmove return 1st parameter as a value
36 * r4 = source ptr ("rs")
37 * r5 = count of bytes to move ("rc")
38 * r6 = "w1"
39 * r7 = "w2"
40 * r8 = "w3"
41 * r9 = "w4"
42 * r10 = "w5"
43 * r11 = "w6"
44 * r12 = destination ptr ("rd")
45 */
46#define rs r4
47#define rd r12
48#define rc r5
49#define rv r2
50
51#define w1 r6
52#define w2 r7
53#define w3 r8
54#define w4 r9
55#define w5 r10
56#define w6 r11
57#define w7 r0
58#define w8 r2
59
60#define ASSEMBLER
61#include <sys/appleapiopts.h>
62#include <ppc/asm.h>
63#include <machine/cpu_capabilities.h>
64#include <machine/commpage.h>
65
66 .text
67 .globl EXT(bcopy_64)
68
69#define kLong 64 // too long for inline loopless code
70
71
72// Main entry points.
73
74 .align 5
75bcopy_64: // void bcopy(const void *src, void *dst, size_t len)
76 cmplwi rc,kLong // short or long?
77 sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
78 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
79 mr rd,r4 // start to move registers to canonic spot
80 mr rs,r3
81 blt LShort // handle short operands
82 dcbt 0,r3 // touch in destination
83 b LLong // join medium/long operand code
84
85// NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
86
87 .align 5
88Lmemcpy_g4: // void* memcpy(void *dst, void *src, size_t len)
89Lmemmove_g4: // void* memmove(void *dst, const void *src, size_t len)
90 cmplwi rc,kLong // short or long?
91 sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
92 dcbt 0,r4 // touch in the first line of source
93 cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
94 mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
95 bge LLong // handle medium or long operands
96
97// Handle short operands.
98
99LShort:
100 mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
101 mtcrf 0x01,rc // put length bits 28-31 in cr7
102 blt cr1,LShortReverse
103
104// Forward short operands. This is the most frequent case, so it is inline.
105
106LShort64: // enter to xfer last 64 bytes
107 bf 26,0f // 64-byte chunk to xfer?
108 ld w1,0(rs)
109 ld w2,8(rs)
110 ld w3,16(rs)
111 ld w4,24(rs)
112 addi rs,rs,32
113 std w1,0(rd)
114 std w2,8(rd)
115 std w3,16(rd)
116 std w4,24(rd)
117 addi rd,rd,32
1180:
119 bf 27,1f // quadword to move?
120 ld w1,0(rs)
121 ld w2,8(rs)
122 addi rs,rs,16
123 std w1,0(rd)
124 std w2,8(rd)
125 addi rd,rd,16
1261:
127 bf 28,2f // doubleword?
128 ld w1,0(rs)
129 addi rs,rs,8
130 std w1,0(rd)
131 addi rd,rd,8
1322:
133 bf 29,3f // word?
134 lwz w1,0(rs)
135 addi rs,rs,4
136 stw w1,0(rd)
137 addi rd,rd,4
1383:
139 bf 30,4f // halfword to move?
140 lhz w1,0(rs)
141 addi rs,rs,2
142 sth w1,0(rd)
143 addi rd,rd,2
1444:
145 bflr 31 // skip if no odd byte
146 lbz w1,0(rs)
147 stb w1,0(rd)
148 blr
149
150
151// Handle short reverse operands.
152// cr6 = bits 26-27 of length
153// cr7 = bits 28-31 of length
154
155LShortReverse:
156 add rs,rs,rc // adjust ptrs for reverse move
157 add rd,rd,rc
158LShortReverse64: // enter to xfer last 64 bytes
159 bf 26,0f // 64-byte chunk to xfer?
160 ld w1,-8(rs)
161 ld w2,-16(rs)
162 ld w3,-24(rs)
163 ldu w4,-32(rs)
164 std w1,-8(rd)
165 std w2,-16(rd)
166 std w3,-24(rd)
167 stdu w4,-32(rd)
1680:
169 bf 27,1f // quadword to move?
170 ld w1,-8(rs)
171 ldu w2,-16(rs)
172 std w1,-8(rd)
173 stdu w2,-16(rd)
1741:
175 bf 28,2f // doubleword?
176 ldu w1,-8(rs)
177 stdu w1,-8(rd)
1782:
179 bf 29,3f // word?
180 lwzu w1,-4(rs)
181 stwu w1,-4(rd)
1823:
183 bf 30,4f // halfword to move?
184 lhzu w1,-2(rs)
185 sthu w1,-2(rd)
1864:
187 bflr 31 // done if no odd byte
188 lbz w1,-1(rs) // no update
189 stb w1,-1(rd)
190 blr
191
192
193// Long operands.
194// cr1 = blt iff we must move reverse
195
196 .align 4
197LLong:
198 dcbtst 0,rd // touch in destination
199 neg w3,rd // start to compute #bytes to align destination
200 andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
201 blt cr1,LLongReverse // handle reverse moves
202 mtctr w6 // set up for loop to align destination
203 sub rc,rc,w6 // adjust count
204 beq LAligned // destination already 8-byte aligned
2051:
206 lbz w1,0(rs)
207 addi rs,rs,1
208 stb w1,0(rd)
209 addi rd,rd,1
210 bdnz 1b
211
212// Destination is 8-byte aligned.
213
214LAligned:
215 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
216 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
217 mtcrf 0x01,rc // put length bits 28-31 in cr7
218 beq LShort64 // no 64-byte chunks
219 mtctr w2
220 b 1f
221
222// Loop moving 64-byte chunks.
223
224 .align 5
2251:
226 ld w1,0(rs)
227 ld w2,8(rs)
228 ld w3,16(rs)
229 ld w4,24(rs)
230 ld w5,32(rs)
231 ld w6,40(rs)
232 ld w7,48(rs)
233 ld w8,56(rs)
234 addi rs,rs,64
235 std w1,0(rd)
236 std w2,8(rd)
237 std w3,16(rd)
238 std w4,24(rd)
239 std w5,32(rd)
240 std w6,40(rd)
241 std w7,48(rd)
242 std w8,56(rd)
243 addi rd,rd,64
244 bdnz 1b
245
246 b LShort64
247
248
249// Handle reverse moves.
250
251LLongReverse:
252 add rd,rd,rc // point to end of operands
253 add rs,rs,rc
254 andi. r0,rd,7 // is destination 8-byte aligned?
255 sub rc,rc,r0 // adjust count
256 mtctr r0 // set up for byte loop
257 beq LRevAligned // already aligned
258
2591:
260 lbzu w1,-1(rs)
261 stbu w1,-1(rd)
262 bdnz 1b
263
264// Destination is 8-byte aligned.
265
266LRevAligned:
267 srwi. w2,rc,6 // w2 <- count of 64-byte chunks
268 mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
269 mtcrf 0x01,rc // put length bits 28-31 in cr7
270 beq LShortReverse64 // no 64-byte chunks
271 mtctr w2
272 b 1f
273
274// Loop over 64-byte chunks (reverse).
275
276 .align 5
2771:
278 ld w1,-8(rs)
279 ld w2,-16(rs)
280 ld w3,-24(rs)
281 ld w4,-32(rs)
282 ld w5,-40(rs)
283 ld w6,-48(rs)
284 ld w7,-56(rs)
285 ldu w8,-64(rs)
286 std w1,-8(rd)
287 std w2,-16(rd)
288 std w3,-24(rd)
289 std w4,-32(rd)
290 std w5,-40(rd)
291 std w6,-48(rd)
292 std w7,-56(rd)
293 stdu w8,-64(rd)
294 bdnz 1b
295
296 b LShortReverse64
297
298 COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,0)