]>
Commit | Line | Data |
---|---|---|
7b00c0c4 A |
1 | /* |
2 | * Copyright (c) 2006, 2009 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | ||
ad3c9f2a A |
24 | /***************************************************************************** |
25 | * ARMv5 and ARMv6 implementation, also used in dyld on later archs * | |
26 | *****************************************************************************/ | |
27 | ||
28 | #include <arm/arch.h> | |
29 | #if !defined _ARM_ARCH_7 || defined VARIANT_DYLD | |
30 | ||
31 | .text | |
32 | .align 2 | |
33 | ||
34 | .globl _memcpy | |
35 | .globl _bcopy | |
36 | .globl _memmove | |
37 | ||
38 | _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ | |
39 | mov r3, r0 | |
40 | mov r0, r1 | |
41 | mov r1, r3 | |
42 | ||
43 | _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ | |
44 | _memmove: /* void *memmove(void *dest, const void *src, size_t len); */ | |
45 | /* check for zero len or if the pointers are the same */ | |
46 | cmp r2, #0 | |
47 | cmpne r0, r1 | |
48 | bxeq lr | |
49 | ||
50 | /* save r0 (return value), r4 (scratch), and r5 (scratch) */ | |
51 | stmfd sp!, { r0, r4, r5, r7, lr } | |
52 | add r7, sp, #12 | |
53 | ||
54 | /* check for overlap. r3 <- distance between src & dest */ | |
55 | subhs r3, r0, r1 | |
56 | sublo r3, r1, r0 | |
57 | cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ | |
58 | blo Loverlap | |
59 | ||
60 | Lnormalforwardcopy: | |
61 | /* are src and dest dissimilarly word aligned? */ | |
62 | mov r12, r0, lsl #30 | |
63 | cmp r12, r1, lsl #30 | |
64 | bne Lnonwordaligned_forward | |
65 | ||
66 | /* if len < 64, do a quick forward copy */ | |
67 | cmp r2, #64 | |
68 | blt Lsmallforwardcopy | |
69 | ||
70 | /* check for 16 byte src/dest unalignment */ | |
71 | tst r0, #0xf | |
72 | bne Lsimilarlyunaligned | |
73 | ||
74 | /* check for 32 byte dest unalignment */ | |
75 | tst r0, #(1<<4) | |
76 | bne Lunaligned_32 | |
77 | ||
78 | Lmorethan64_aligned: | |
79 | /* save some more registers to use in the copy */ | |
80 | stmfd sp!, { r6, r8, r10, r11 } | |
81 | ||
82 | /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ | |
83 | sub r2, r2, #64 | |
84 | ||
85 | L64loop: | |
86 | /* copy 64 bytes at a time */ | |
87 | ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
88 | #ifdef _ARM_ARCH_6 | |
89 | pld [r1, #32] | |
90 | #endif | |
91 | stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
92 | ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
93 | subs r2, r2, #64 | |
94 | #ifdef _ARM_ARCH_6 | |
95 | pld [r1, #32] | |
96 | #endif | |
97 | stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
98 | bge L64loop | |
99 | ||
100 | /* restore the scratch registers we just saved */ | |
101 | ldmfd sp!, { r6, r8, r10, r11 } | |
102 | ||
103 | /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ | |
104 | adds r2, r2, #64 | |
105 | beq Lexit | |
106 | ||
107 | Llessthan64_aligned: | |
108 | /* copy 16 bytes at a time until we have < 16 bytes */ | |
109 | cmp r2, #16 | |
110 | ldmgeia r1!, { r3, r4, r5, r12 } | |
111 | stmgeia r0!, { r3, r4, r5, r12 } | |
112 | subges r2, r2, #16 | |
113 | bgt Llessthan64_aligned | |
114 | beq Lexit | |
115 | ||
116 | Llessthan16_aligned: | |
117 | mov r2, r2, lsl #28 | |
118 | msr cpsr_f, r2 | |
119 | ||
120 | ldmmiia r1!, { r2, r3 } | |
121 | ldreq r4, [r1], #4 | |
122 | ldrcsh r5, [r1], #2 | |
123 | ldrvsb r12, [r1], #1 | |
124 | ||
125 | stmmiia r0!, { r2, r3 } | |
126 | streq r4, [r0], #4 | |
127 | strcsh r5, [r0], #2 | |
128 | strvsb r12, [r0], #1 | |
129 | b Lexit | |
130 | ||
131 | Lsimilarlyunaligned: | |
132 | /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ | |
133 | mov r12, r0, lsl #28 | |
134 | rsb r12, r12, #0 | |
135 | msr cpsr_f, r12 | |
136 | ||
137 | ldrvsb r3, [r1], #1 | |
138 | ldrcsh r4, [r1], #2 | |
139 | ldreq r5, [r1], #4 | |
140 | ||
141 | strvsb r3, [r0], #1 | |
142 | strcsh r4, [r0], #2 | |
143 | streq r5, [r0], #4 | |
144 | ||
145 | ldmmiia r1!, { r3, r4 } | |
146 | stmmiia r0!, { r3, r4 } | |
147 | ||
148 | subs r2, r2, r12, lsr #28 | |
149 | beq Lexit | |
150 | ||
151 | Lunaligned_32: | |
152 | /* bring up to dest 32 byte alignment */ | |
153 | tst r0, #(1 << 4) | |
154 | ldmneia r1!, { r3, r4, r5, r12 } | |
155 | stmneia r0!, { r3, r4, r5, r12 } | |
156 | subne r2, r2, #16 | |
157 | ||
158 | /* we should now be aligned, see what copy method we should use */ | |
159 | cmp r2, #64 | |
160 | bge Lmorethan64_aligned | |
161 | b Llessthan64_aligned | |
162 | ||
163 | Lbytewise2: | |
164 | /* copy 2 bytes at a time */ | |
165 | subs r2, r2, #2 | |
166 | ||
167 | ldrb r3, [r1], #1 | |
168 | ldrplb r4, [r1], #1 | |
169 | ||
170 | strb r3, [r0], #1 | |
171 | strplb r4, [r0], #1 | |
172 | ||
173 | bhi Lbytewise2 | |
174 | b Lexit | |
175 | ||
176 | Lbytewise: | |
177 | /* simple bytewise forward copy */ | |
178 | ldrb r3, [r1], #1 | |
179 | subs r2, r2, #1 | |
180 | strb r3, [r0], #1 | |
181 | bne Lbytewise | |
182 | b Lexit | |
183 | ||
184 | Lsmallforwardcopy: | |
185 | /* src and dest are word aligned similarly, less than 64 bytes to copy */ | |
186 | cmp r2, #4 | |
187 | blt Lbytewise2 | |
188 | ||
189 | /* bytewise copy until word aligned */ | |
190 | tst r1, #3 | |
191 | Lwordalignloop: | |
192 | ldrneb r3, [r1], #1 | |
193 | strneb r3, [r0], #1 | |
194 | subne r2, r2, #1 | |
195 | tstne r1, #3 | |
196 | bne Lwordalignloop | |
197 | ||
198 | cmp r2, #16 | |
199 | bge Llessthan64_aligned | |
200 | blt Llessthan16_aligned | |
201 | ||
202 | Loverlap: | |
203 | /* src and dest overlap in some way, len > 0 */ | |
204 | cmp r0, r1 /* if dest > src */ | |
205 | bhi Loverlap_srclower | |
206 | ||
207 | Loverlap_destlower: | |
208 | /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ | |
209 | cmp r3, #64 | |
210 | bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ | |
211 | ||
212 | cmp r3, #2 | |
213 | bge Lbytewise2 | |
214 | b Lbytewise | |
215 | ||
216 | /* the following routines deal with having to copy in the reverse direction */ | |
217 | Loverlap_srclower: | |
218 | /* src < dest, with overlap */ | |
219 | ||
220 | /* src += len; dest += len; */ | |
221 | add r0, r0, r2 | |
222 | add r1, r1, r2 | |
223 | ||
224 | /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ | |
225 | cmp r2, #64 /* less than 64 bytes to copy? */ | |
226 | cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ | |
227 | blt Lbytewise_reverse | |
228 | ||
229 | /* test of src and dest are nonword aligned differently */ | |
230 | mov r3, r0, lsl #30 | |
231 | cmp r3, r1, lsl #30 | |
232 | bne Lbytewise_reverse | |
233 | ||
234 | /* test if src and dest are non word aligned or dest is non 16 byte aligned */ | |
235 | tst r0, #0xf | |
236 | bne Lunaligned_reverse_similarly | |
237 | ||
238 | /* test for dest 32 byte alignment */ | |
239 | tst r0, #(1<<4) | |
240 | bne Lunaligned_32_reverse_similarly | |
241 | ||
242 | /* 64 byte reverse block copy, src and dest aligned */ | |
243 | Lmorethan64_aligned_reverse: | |
244 | /* save some more registers to use in the copy */ | |
245 | stmfd sp!, { r6, r8, r10, r11 } | |
246 | ||
247 | /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ | |
248 | sub r2, r2, #64 | |
249 | ||
250 | L64loop_reverse: | |
251 | /* copy 64 bytes at a time */ | |
252 | ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
253 | #ifdef _ARM_ARCH_6 | |
254 | pld [r1, #-32] | |
255 | #endif | |
256 | stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
257 | ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
258 | subs r2, r2, #64 | |
259 | #ifdef _ARM_ARCH_6 | |
260 | pld [r1, #-32] | |
261 | #endif | |
262 | stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
263 | bge L64loop_reverse | |
264 | ||
265 | /* restore the scratch registers we just saved */ | |
266 | ldmfd sp!, { r6, r8, r10, r11 } | |
267 | ||
268 | /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ | |
269 | adds r2, r2, #64 | |
270 | beq Lexit | |
271 | ||
272 | Lbytewise_reverse: | |
273 | ldrb r3, [r1, #-1]! | |
274 | strb r3, [r0, #-1]! | |
275 | subs r2, r2, #1 | |
276 | bne Lbytewise_reverse | |
277 | b Lexit | |
278 | ||
279 | Lunaligned_reverse_similarly: | |
280 | /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ | |
281 | mov r12, r0, lsl #28 | |
282 | msr cpsr_f, r12 | |
283 | ||
284 | ldrvsb r3, [r1, #-1]! | |
285 | ldrcsh r4, [r1, #-2]! | |
286 | ldreq r5, [r1, #-4]! | |
287 | ||
288 | strvsb r3, [r0, #-1]! | |
289 | strcsh r4, [r0, #-2]! | |
290 | streq r5, [r0, #-4]! | |
291 | ||
292 | ldmmidb r1!, { r3, r4 } | |
293 | stmmidb r0!, { r3, r4 } | |
294 | ||
295 | subs r2, r2, r12, lsr #28 | |
296 | beq Lexit | |
297 | ||
298 | Lunaligned_32_reverse_similarly: | |
299 | /* bring up to dest 32 byte alignment */ | |
300 | tst r0, #(1 << 4) | |
301 | ldmnedb r1!, { r3, r4, r5, r12 } | |
302 | stmnedb r0!, { r3, r4, r5, r12 } | |
303 | subne r2, r2, #16 | |
304 | ||
305 | /* we should now be aligned, see what copy method we should use */ | |
306 | cmp r2, #64 | |
307 | bge Lmorethan64_aligned_reverse | |
308 | b Lbytewise_reverse | |
309 | ||
310 | /* the following routines deal with non word aligned copies */ | |
311 | Lnonwordaligned_forward: | |
312 | cmp r2, #8 | |
313 | blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ | |
314 | ||
315 | /* bytewise copy until src word aligned */ | |
316 | tst r1, #3 | |
317 | Lwordalignloop2: | |
318 | ldrneb r3, [r1], #1 | |
319 | strneb r3, [r0], #1 | |
320 | subne r2, r2, #1 | |
321 | tstne r1, #3 | |
322 | bne Lwordalignloop2 | |
323 | ||
324 | /* figure out how the src and dest are unaligned */ | |
325 | and r3, r0, #3 | |
326 | cmp r3, #2 | |
327 | blt Lalign1_forward | |
328 | beq Lalign2_forward | |
329 | bgt Lalign3_forward | |
330 | ||
331 | Lalign1_forward: | |
332 | /* the dest pointer is 1 byte off from src */ | |
333 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
334 | sub r0, r0, #1 | |
335 | ||
336 | /* prime the copy */ | |
337 | ldrb r4, [r0] /* load D[7:0] */ | |
338 | ||
339 | Lalign1_forward_loop: | |
340 | ldr r3, [r1], #4 /* load S */ | |
341 | orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ | |
342 | str r4, [r0], #4 /* save D */ | |
343 | mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ | |
344 | subs r12, r12, #1 | |
345 | bne Lalign1_forward_loop | |
346 | ||
347 | /* finish the copy off */ | |
348 | strb r4, [r0], #1 /* save D[7:0] */ | |
349 | ||
350 | ands r2, r2, #3 | |
351 | beq Lexit | |
352 | b Lbytewise2 | |
353 | ||
354 | Lalign2_forward: | |
355 | /* the dest pointer is 2 bytes off from src */ | |
356 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
357 | sub r0, r0, #2 | |
358 | ||
359 | /* prime the copy */ | |
360 | ldrh r4, [r0] /* load D[15:0] */ | |
361 | ||
362 | Lalign2_forward_loop: | |
363 | ldr r3, [r1], #4 /* load S */ | |
364 | orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ | |
365 | str r4, [r0], #4 /* save D */ | |
366 | mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ | |
367 | subs r12, r12, #1 | |
368 | bne Lalign2_forward_loop | |
369 | ||
370 | /* finish the copy off */ | |
371 | strh r4, [r0], #2 /* save D[15:0] */ | |
372 | ||
373 | ands r2, r2, #3 | |
374 | beq Lexit | |
375 | b Lbytewise2 | |
376 | ||
377 | Lalign3_forward: | |
378 | /* the dest pointer is 3 bytes off from src */ | |
379 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
380 | sub r0, r0, #3 | |
381 | ||
382 | /* prime the copy */ | |
383 | ldr r4, [r0] | |
384 | and r4, r4, #0x00ffffff /* load D[24:0] */ | |
385 | ||
386 | Lalign3_forward_loop: | |
387 | ldr r3, [r1], #4 /* load S */ | |
388 | orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ | |
389 | str r4, [r0], #4 /* save D */ | |
390 | mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ | |
391 | subs r12, r12, #1 | |
392 | bne Lalign3_forward_loop | |
393 | ||
394 | /* finish the copy off */ | |
395 | strh r4, [r0], #2 /* save D[15:0] */ | |
396 | mov r4, r4, lsr #16 | |
397 | strb r4, [r0], #1 /* save D[23:16] */ | |
398 | ||
399 | ands r2, r2, #3 | |
400 | beq Lexit | |
401 | b Lbytewise2 | |
402 | ||
403 | Lexit: | |
404 | ldmfd sp!, {r0, r4, r5, r7, pc} | |
405 | ||
406 | #endif // !defined _ARM_ARCH_7 || defined VARIANT_DYLD | |
407 |