]> git.saurik.com Git - apple/libc.git/blame - arm/string/bcopy_Generic.s
Libc-825.40.1.tar.gz
[apple/libc.git] / arm / string / bcopy_Generic.s
CommitLineData
7b00c0c4
A
1/*
2 * Copyright (c) 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
ad3c9f2a
A
24/*****************************************************************************
25 * ARMv5 and ARMv6 implementation, also used in dyld on later archs *
26 *****************************************************************************/
27
28#include <arm/arch.h>
29#if !defined _ARM_ARCH_7 || defined VARIANT_DYLD
30
31.text
32.align 2
33
34 .globl _memcpy
35 .globl _bcopy
36 .globl _memmove
37
38_bcopy: /* void bcopy(const void *src, void *dest, size_t len); */
39 mov r3, r0
40 mov r0, r1
41 mov r1, r3
42
43_memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */
44_memmove: /* void *memmove(void *dest, const void *src, size_t len); */
45 /* check for zero len or if the pointers are the same */
46 cmp r2, #0
47 cmpne r0, r1
48 bxeq lr
49
50 /* save r0 (return value), r4 (scratch), and r5 (scratch) */
51 stmfd sp!, { r0, r4, r5, r7, lr }
52 add r7, sp, #12
53
54 /* check for overlap. r3 <- distance between src & dest */
55 subhs r3, r0, r1
56 sublo r3, r1, r0
57 cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
58 blo Loverlap
59
60Lnormalforwardcopy:
61 /* are src and dest dissimilarly word aligned? */
62 mov r12, r0, lsl #30
63 cmp r12, r1, lsl #30
64 bne Lnonwordaligned_forward
65
66 /* if len < 64, do a quick forward copy */
67 cmp r2, #64
68 blt Lsmallforwardcopy
69
70 /* check for 16 byte src/dest unalignment */
71 tst r0, #0xf
72 bne Lsimilarlyunaligned
73
74 /* check for 32 byte dest unalignment */
75 tst r0, #(1<<4)
76 bne Lunaligned_32
77
78Lmorethan64_aligned:
79 /* save some more registers to use in the copy */
80 stmfd sp!, { r6, r8, r10, r11 }
81
82 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
83 sub r2, r2, #64
84
85L64loop:
86 /* copy 64 bytes at a time */
87 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
88#ifdef _ARM_ARCH_6
89 pld [r1, #32]
90#endif
91 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
92 ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
93 subs r2, r2, #64
94#ifdef _ARM_ARCH_6
95 pld [r1, #32]
96#endif
97 stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
98 bge L64loop
99
100 /* restore the scratch registers we just saved */
101 ldmfd sp!, { r6, r8, r10, r11 }
102
103 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
104 adds r2, r2, #64
105 beq Lexit
106
107Llessthan64_aligned:
108 /* copy 16 bytes at a time until we have < 16 bytes */
109 cmp r2, #16
110 ldmgeia r1!, { r3, r4, r5, r12 }
111 stmgeia r0!, { r3, r4, r5, r12 }
112 subges r2, r2, #16
113 bgt Llessthan64_aligned
114 beq Lexit
115
116Llessthan16_aligned:
117 mov r2, r2, lsl #28
118 msr cpsr_f, r2
119
120 ldmmiia r1!, { r2, r3 }
121 ldreq r4, [r1], #4
122 ldrcsh r5, [r1], #2
123 ldrvsb r12, [r1], #1
124
125 stmmiia r0!, { r2, r3 }
126 streq r4, [r0], #4
127 strcsh r5, [r0], #2
128 strvsb r12, [r0], #1
129 b Lexit
130
131Lsimilarlyunaligned:
132 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
133 mov r12, r0, lsl #28
134 rsb r12, r12, #0
135 msr cpsr_f, r12
136
137 ldrvsb r3, [r1], #1
138 ldrcsh r4, [r1], #2
139 ldreq r5, [r1], #4
140
141 strvsb r3, [r0], #1
142 strcsh r4, [r0], #2
143 streq r5, [r0], #4
144
145 ldmmiia r1!, { r3, r4 }
146 stmmiia r0!, { r3, r4 }
147
148 subs r2, r2, r12, lsr #28
149 beq Lexit
150
151Lunaligned_32:
152 /* bring up to dest 32 byte alignment */
153 tst r0, #(1 << 4)
154 ldmneia r1!, { r3, r4, r5, r12 }
155 stmneia r0!, { r3, r4, r5, r12 }
156 subne r2, r2, #16
157
158 /* we should now be aligned, see what copy method we should use */
159 cmp r2, #64
160 bge Lmorethan64_aligned
161 b Llessthan64_aligned
162
163Lbytewise2:
164 /* copy 2 bytes at a time */
165 subs r2, r2, #2
166
167 ldrb r3, [r1], #1
168 ldrplb r4, [r1], #1
169
170 strb r3, [r0], #1
171 strplb r4, [r0], #1
172
173 bhi Lbytewise2
174 b Lexit
175
176Lbytewise:
177 /* simple bytewise forward copy */
178 ldrb r3, [r1], #1
179 subs r2, r2, #1
180 strb r3, [r0], #1
181 bne Lbytewise
182 b Lexit
183
184Lsmallforwardcopy:
185 /* src and dest are word aligned similarly, less than 64 bytes to copy */
186 cmp r2, #4
187 blt Lbytewise2
188
189 /* bytewise copy until word aligned */
190 tst r1, #3
191Lwordalignloop:
192 ldrneb r3, [r1], #1
193 strneb r3, [r0], #1
194 subne r2, r2, #1
195 tstne r1, #3
196 bne Lwordalignloop
197
198 cmp r2, #16
199 bge Llessthan64_aligned
200 blt Llessthan16_aligned
201
202Loverlap:
203 /* src and dest overlap in some way, len > 0 */
204 cmp r0, r1 /* if dest > src */
205 bhi Loverlap_srclower
206
207Loverlap_destlower:
208 /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
209 cmp r3, #64
210 bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
211
212 cmp r3, #2
213 bge Lbytewise2
214 b Lbytewise
215
216 /* the following routines deal with having to copy in the reverse direction */
217Loverlap_srclower:
218 /* src < dest, with overlap */
219
220 /* src += len; dest += len; */
221 add r0, r0, r2
222 add r1, r1, r2
223
224 /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
225 cmp r2, #64 /* less than 64 bytes to copy? */
226 cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
227 blt Lbytewise_reverse
228
229 /* test of src and dest are nonword aligned differently */
230 mov r3, r0, lsl #30
231 cmp r3, r1, lsl #30
232 bne Lbytewise_reverse
233
234 /* test if src and dest are non word aligned or dest is non 16 byte aligned */
235 tst r0, #0xf
236 bne Lunaligned_reverse_similarly
237
238 /* test for dest 32 byte alignment */
239 tst r0, #(1<<4)
240 bne Lunaligned_32_reverse_similarly
241
242 /* 64 byte reverse block copy, src and dest aligned */
243Lmorethan64_aligned_reverse:
244 /* save some more registers to use in the copy */
245 stmfd sp!, { r6, r8, r10, r11 }
246
247 /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
248 sub r2, r2, #64
249
250L64loop_reverse:
251 /* copy 64 bytes at a time */
252 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
253#ifdef _ARM_ARCH_6
254 pld [r1, #-32]
255#endif
256 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
257 ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
258 subs r2, r2, #64
259#ifdef _ARM_ARCH_6
260 pld [r1, #-32]
261#endif
262 stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
263 bge L64loop_reverse
264
265 /* restore the scratch registers we just saved */
266 ldmfd sp!, { r6, r8, r10, r11 }
267
268 /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
269 adds r2, r2, #64
270 beq Lexit
271
272Lbytewise_reverse:
273 ldrb r3, [r1, #-1]!
274 strb r3, [r0, #-1]!
275 subs r2, r2, #1
276 bne Lbytewise_reverse
277 b Lexit
278
279Lunaligned_reverse_similarly:
280 /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
281 mov r12, r0, lsl #28
282 msr cpsr_f, r12
283
284 ldrvsb r3, [r1, #-1]!
285 ldrcsh r4, [r1, #-2]!
286 ldreq r5, [r1, #-4]!
287
288 strvsb r3, [r0, #-1]!
289 strcsh r4, [r0, #-2]!
290 streq r5, [r0, #-4]!
291
292 ldmmidb r1!, { r3, r4 }
293 stmmidb r0!, { r3, r4 }
294
295 subs r2, r2, r12, lsr #28
296 beq Lexit
297
298Lunaligned_32_reverse_similarly:
299 /* bring up to dest 32 byte alignment */
300 tst r0, #(1 << 4)
301 ldmnedb r1!, { r3, r4, r5, r12 }
302 stmnedb r0!, { r3, r4, r5, r12 }
303 subne r2, r2, #16
304
305 /* we should now be aligned, see what copy method we should use */
306 cmp r2, #64
307 bge Lmorethan64_aligned_reverse
308 b Lbytewise_reverse
309
310 /* the following routines deal with non word aligned copies */
311Lnonwordaligned_forward:
312 cmp r2, #8
313 blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
314
315 /* bytewise copy until src word aligned */
316 tst r1, #3
317Lwordalignloop2:
318 ldrneb r3, [r1], #1
319 strneb r3, [r0], #1
320 subne r2, r2, #1
321 tstne r1, #3
322 bne Lwordalignloop2
323
324 /* figure out how the src and dest are unaligned */
325 and r3, r0, #3
326 cmp r3, #2
327 blt Lalign1_forward
328 beq Lalign2_forward
329 bgt Lalign3_forward
330
331Lalign1_forward:
332 /* the dest pointer is 1 byte off from src */
333 mov r12, r2, lsr #2 /* number of words we should copy */
334 sub r0, r0, #1
335
336 /* prime the copy */
337 ldrb r4, [r0] /* load D[7:0] */
338
339Lalign1_forward_loop:
340 ldr r3, [r1], #4 /* load S */
341 orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
342 str r4, [r0], #4 /* save D */
343 mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
344 subs r12, r12, #1
345 bne Lalign1_forward_loop
346
347 /* finish the copy off */
348 strb r4, [r0], #1 /* save D[7:0] */
349
350 ands r2, r2, #3
351 beq Lexit
352 b Lbytewise2
353
354Lalign2_forward:
355 /* the dest pointer is 2 bytes off from src */
356 mov r12, r2, lsr #2 /* number of words we should copy */
357 sub r0, r0, #2
358
359 /* prime the copy */
360 ldrh r4, [r0] /* load D[15:0] */
361
362Lalign2_forward_loop:
363 ldr r3, [r1], #4 /* load S */
364 orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
365 str r4, [r0], #4 /* save D */
366 mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
367 subs r12, r12, #1
368 bne Lalign2_forward_loop
369
370 /* finish the copy off */
371 strh r4, [r0], #2 /* save D[15:0] */
372
373 ands r2, r2, #3
374 beq Lexit
375 b Lbytewise2
376
377Lalign3_forward:
378 /* the dest pointer is 3 bytes off from src */
379 mov r12, r2, lsr #2 /* number of words we should copy */
380 sub r0, r0, #3
381
382 /* prime the copy */
383 ldr r4, [r0]
384 and r4, r4, #0x00ffffff /* load D[24:0] */
385
386Lalign3_forward_loop:
387 ldr r3, [r1], #4 /* load S */
388 orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
389 str r4, [r0], #4 /* save D */
390 mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
391 subs r12, r12, #1
392 bne Lalign3_forward_loop
393
394 /* finish the copy off */
395 strh r4, [r0], #2 /* save D[15:0] */
396 mov r4, r4, lsr #16
397 strb r4, [r0], #1 /* save D[23:16] */
398
399 ands r2, r2, #3
400 beq Lexit
401 b Lbytewise2
402
403Lexit:
404 ldmfd sp!, {r0, r4, r5, r7, pc}
405
406#endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD
407