]> git.saurik.com Git - apple/xnu.git/blame - osfmk/arm/bcopy.s
xnu-7195.60.75.tar.gz
[apple/xnu.git] / osfmk / arm / bcopy.s
CommitLineData
5ba3f43e
A
1/*
2 * Copyright (c) 2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <arm/proc_reg.h>
30
31.syntax unified
32.text
33.align 2
34
35 .globl _ovbcopy
36 .globl _memcpy
37 .globl _bcopy
38 .globl _memmove
39
40_bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
41_ovbcopy:
42 mov r3, r0
43 mov r0, r1
44 mov r1, r3
45
46_memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
47_memmove: /* void *memmove(void *dest, const void *src, size_t len); */
48 /* check for zero len or if the pointers are the same */
49 cmp r2, #0
50 cmpne r0, r1
51 bxeq lr
52
53 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
54 stmfd sp!, { r0, r4, r5, r7, lr }
55 add r7, sp, #12
56
57 /* check for overlap. r3 <- distance between src & dest */
58 subhs r3, r0, r1
59 sublo r3, r1, r0
60 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
61 blo Loverlap
62
63Lnormalforwardcopy:
64 /* are src and dest dissimilarly word aligned? */
65 mov r12, r0, lsl #30
66 cmp r12, r1, lsl #30
67 bne Lnonwordaligned_forward
68
69 /* if len < 64, do a quick forward copy */
70 cmp r2, #64
71 blt Lsmallforwardcopy
72
73 /* check for 16 byte src/dest unalignment */
74 tst r0, #0xf
75 bne Lsimilarlyunaligned
76
77 /* check for 32 byte dest unalignment */
78 tst r0, #(1<<4)
79 bne Lunaligned_32
80
81Lmorethan64_aligned:
82 /* save some more registers to use in the copy */
83 stmfd sp!, { r6, r8, r10, r11 }
84
85 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
86 sub r2, r2, #64
87
88L64loop:
89 /* copy 64 bytes at a time */
90 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
91 pld [r1, #32]
92 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
93 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
94 subs r2, r2, #64
95 pld [r1, #32]
96 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
97 bge L64loop
98
99 /* restore the scratch registers we just saved */
100 ldmfd sp!, { r6, r8, r10, r11 }
101
102 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
103 adds r2, r2, #64
104 beq Lexit
105
106Llessthan64_aligned:
107 /* copy 16 bytes at a time until we have < 16 bytes */
108 cmp r2, #16
109 ldmiage r1!, { r3, r4, r5, r12 }
110 stmiage r0!, { r3, r4, r5, r12 }
111 subsge r2, r2, #16
112 bgt Llessthan64_aligned
113 beq Lexit
114
115Llessthan16_aligned:
116 mov r2, r2, lsl #28
117 msr cpsr_f, r2
118
119 ldmiami r1!, { r2, r3 }
120 ldreq r4, [r1], #4
121 ldrhcs r5, [r1], #2
122 ldrbvs r12, [r1], #1
123
124 stmiami r0!, { r2, r3 }
125 streq r4, [r0], #4
126 strhcs r5, [r0], #2
127 strbvs r12, [r0], #1
128 b Lexit
129
130Lsimilarlyunaligned:
131 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
132 mov r12, r0, lsl #28
133 rsb r12, r12, #0
134 msr cpsr_f, r12
135
136 ldrbvs r3, [r1], #1
137 ldrhcs r4, [r1], #2
138 ldreq r5, [r1], #4
139
140 strbvs r3, [r0], #1
141 strhcs r4, [r0], #2
142 streq r5, [r0], #4
143
144 ldmiami r1!, { r3, r4 }
145 stmiami r0!, { r3, r4 }
146
147 subs r2, r2, r12, lsr #28
148 beq Lexit
149
150Lunaligned_32:
151 /* bring up to dest 32 byte alignment */
152 tst r0, #(1 << 4)
153 ldmiane r1!, { r3, r4, r5, r12 }
154 stmiane r0!, { r3, r4, r5, r12 }
155 subne r2, r2, #16
156
157 /* we should now be aligned, see what copy method we should use */
158 cmp r2, #64
159 bge Lmorethan64_aligned
160 b Llessthan64_aligned
161
162Lbytewise2:
163 /* copy 2 bytes at a time */
164 subs r2, r2, #2
165
166 ldrb r3, [r1], #1
167 ldrbpl r4, [r1], #1
168
169 strb r3, [r0], #1
170 strbpl r4, [r0], #1
171
172 bhi Lbytewise2
173 b Lexit
174
175Lbytewise:
176 /* simple bytewise forward copy */
177 ldrb r3, [r1], #1
178 subs r2, r2, #1
179 strb r3, [r0], #1
180 bne Lbytewise
181 b Lexit
182
183Lsmallforwardcopy:
184 /* src and dest are word aligned similarly, less than 64 bytes to copy */
185 cmp r2, #4
186 blt Lbytewise2
187
188 /* bytewise copy until word aligned */
189 tst r1, #3
190Lwordalignloop:
191 ldrbne r3, [r1], #1
192 strbne r3, [r0], #1
193 subne r2, r2, #1
194 tstne r1, #3
195 bne Lwordalignloop
196
197 cmp r2, #16
198 bge Llessthan64_aligned
199 blt Llessthan16_aligned
200
201Loverlap:
202 /* src and dest overlap in some way, len > 0 */
203 cmp r0, r1 /* if dest > src */
204 bhi Loverlap_srclower
205
206Loverlap_destlower:
207 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
208 cmp r3, #64
209 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
210
211 cmp r3, #2
212 bge Lbytewise2
213 b Lbytewise
214
215 /* the following routines deal with having to copy in the reverse direction */
216Loverlap_srclower:
217 /* src < dest, with overlap */
218
219 /* src += len; dest += len; */
220 add r0, r0, r2
221 add r1, r1, r2
222
223 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
224 cmp r2, #64 /* less than 64 bytes to copy? */
225 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
226 blt Lbytewise_reverse
227
228 /* test of src and dest are nonword aligned differently */
229 mov r3, r0, lsl #30
230 cmp r3, r1, lsl #30
231 bne Lbytewise_reverse
232
233 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
234 tst r0, #0xf
235 bne Lunaligned_reverse_similarly
236
237 /* test for dest 32 byte alignment */
238 tst r0, #(1<<4)
239 bne Lunaligned_32_reverse_similarly
240
241 /* 64 byte reverse block copy, src and dest aligned */
242Lmorethan64_aligned_reverse:
243 /* save some more registers to use in the copy */
244 stmfd sp!, { r6, r8, r10, r11 }
245
246 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
247 sub r2, r2, #64
248
249L64loop_reverse:
250 /* copy 64 bytes at a time */
251 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
252#if ARCH_ARMv5 || ARCH_ARMv5e || ARCH_ARMv6
253 pld [r1, #-32]
254#endif
255 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
256 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
257 subs r2, r2, #64
258 pld [r1, #-32]
259 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
260 bge L64loop_reverse
261
262 /* restore the scratch registers we just saved */
263 ldmfd sp!, { r6, r8, r10, r11 }
264
265 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
266 adds r2, r2, #64
267 beq Lexit
268
269Lbytewise_reverse:
270 ldrb r3, [r1, #-1]!
271 strb r3, [r0, #-1]!
272 subs r2, r2, #1
273 bne Lbytewise_reverse
274 b Lexit
275
276Lunaligned_reverse_similarly:
277 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
278 mov r12, r0, lsl #28
279 msr cpsr_f, r12
280
281 ldrbvs r3, [r1, #-1]!
282 ldrhcs r4, [r1, #-2]!
283 ldreq r5, [r1, #-4]!
284
285 strbvs r3, [r0, #-1]!
286 strhcs r4, [r0, #-2]!
287 streq r5, [r0, #-4]!
288
289 ldmdbmi r1!, { r3, r4 }
290 stmdbmi r0!, { r3, r4 }
291
292 subs r2, r2, r12, lsr #28
293 beq Lexit
294
295Lunaligned_32_reverse_similarly:
296 /* bring up to dest 32 byte alignment */
297 tst r0, #(1 << 4)
298 ldmdbne r1!, { r3, r4, r5, r12 }
299 stmdbne r0!, { r3, r4, r5, r12 }
300 subne r2, r2, #16
301
302 /* we should now be aligned, see what copy method we should use */
303 cmp r2, #64
304 bge Lmorethan64_aligned_reverse
305 b Lbytewise_reverse
306
307 /* the following routines deal with non word aligned copies */
308Lnonwordaligned_forward:
309 cmp r2, #8
310 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
311
312 /* bytewise copy until src word aligned */
313 tst r1, #3
314Lwordalignloop2:
315 ldrbne r3, [r1], #1
316 strbne r3, [r0], #1
317 subne r2, r2, #1
318 tstne r1, #3
319 bne Lwordalignloop2
320
321 /* figure out how the src and dest are unaligned */
322 and r3, r0, #3
323 cmp r3, #2
324 blt Lalign1_forward
325 beq Lalign2_forward
326 bgt Lalign3_forward
327
328Lalign1_forward:
329 /* the dest pointer is 1 byte off from src */
330 mov r12, r2, lsr #2 /* number of words we should copy */
331 sub r0, r0, #1
332
333 /* prime the copy */
334 ldrb r4, [r0] /* load D[7:0] */
335
336Lalign1_forward_loop:
337 ldr r3, [r1], #4 /* load S */
338 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
339 str r4, [r0], #4 /* save D */
340 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
341 subs r12, r12, #1
342 bne Lalign1_forward_loop
343
344 /* finish the copy off */
345 strb r4, [r0], #1 /* save D[7:0] */
346
347 ands r2, r2, #3
348 beq Lexit
349 b Lbytewise2
350
351Lalign2_forward:
352 /* the dest pointer is 2 bytes off from src */
353 mov r12, r2, lsr #2 /* number of words we should copy */
354 sub r0, r0, #2
355
356 /* prime the copy */
357 ldrh r4, [r0] /* load D[15:0] */
358
359Lalign2_forward_loop:
360 ldr r3, [r1], #4 /* load S */
361 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
362 str r4, [r0], #4 /* save D */
363 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
364 subs r12, r12, #1
365 bne Lalign2_forward_loop
366
367 /* finish the copy off */
368 strh r4, [r0], #2 /* save D[15:0] */
369
370 ands r2, r2, #3
371 beq Lexit
372 b Lbytewise2
373
374Lalign3_forward:
375 /* the dest pointer is 3 bytes off from src */
376 mov r12, r2, lsr #2 /* number of words we should copy */
377 sub r0, r0, #3
378
379 /* prime the copy */
380 ldr r4, [r0]
381 and r4, r4, #0x00ffffff /* load D[24:0] */
382
383Lalign3_forward_loop:
384 ldr r3, [r1], #4 /* load S */
385 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
386 str r4, [r0], #4 /* save D */
387 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
388 subs r12, r12, #1
389 bne Lalign3_forward_loop
390
391 /* finish the copy off */
392 strh r4, [r0], #2 /* save D[15:0] */
393 mov r4, r4, lsr #16
394 strb r4, [r0], #1 /* save D[23:16] */
395
396 ands r2, r2, #3
397 beq Lexit
398 b Lbytewise2
399
400Lexit:
401 ldmfd sp!, { r0, r4, r5, r7, pc }
402