]>
Commit | Line | Data |
---|---|---|
b5d655f7 A |
1 | /* |
2 | * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
23 | ||
24 | #include <arm/arch.h> | |
25 | ||
26 | .text | |
27 | .align 2 | |
28 | ||
29 | .globl _memcpy | |
30 | .globl _bcopy | |
31 | .globl _memmove | |
32 | ||
33 | _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ | |
34 | mov r3, r0 | |
35 | mov r0, r1 | |
36 | mov r1, r3 | |
37 | ||
38 | _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ | |
39 | _memmove: /* void *memmove(void *dest, const void *src, size_t len); */ | |
40 | /* check for zero len or if the pointers are the same */ | |
41 | cmp r2, #0 | |
42 | cmpne r0, r1 | |
43 | bxeq lr | |
44 | ||
45 | /* save r0 (return value), r4 (scratch), and r5 (scratch) */ | |
46 | stmfd sp!, { r0, r4, r5, r7, lr } | |
47 | add r7, sp, #12 | |
48 | ||
49 | /* check for overlap. r3 <- distance between src & dest */ | |
50 | subhs r3, r0, r1 | |
51 | sublo r3, r1, r0 | |
52 | cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ | |
53 | blo Loverlap | |
54 | ||
55 | Lnormalforwardcopy: | |
56 | /* are src and dest dissimilarly word aligned? */ | |
57 | mov r12, r0, lsl #30 | |
58 | cmp r12, r1, lsl #30 | |
59 | bne Lnonwordaligned_forward | |
60 | ||
61 | /* if len < 64, do a quick forward copy */ | |
62 | cmp r2, #64 | |
63 | blt Lsmallforwardcopy | |
64 | ||
65 | /* check for 16 byte src/dest unalignment */ | |
66 | tst r0, #0xf | |
67 | bne Lsimilarlyunaligned | |
68 | ||
69 | /* check for 32 byte dest unalignment */ | |
70 | tst r0, #(1<<4) | |
71 | bne Lunaligned_32 | |
72 | ||
73 | Lmorethan64_aligned: | |
74 | /* save some more registers to use in the copy */ | |
75 | stmfd sp!, { r6, r8, r10, r11 } | |
76 | ||
77 | /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ | |
78 | sub r2, r2, #64 | |
79 | ||
80 | L64loop: | |
81 | /* copy 64 bytes at a time */ | |
82 | ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
83 | #ifdef _ARM_ARCH_6 | |
84 | pld [r1, #32] | |
85 | #endif | |
86 | stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
87 | ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
88 | subs r2, r2, #64 | |
89 | #ifdef _ARM_ARCH_6 | |
90 | pld [r1, #32] | |
91 | #endif | |
92 | stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
93 | bge L64loop | |
94 | ||
95 | /* restore the scratch registers we just saved */ | |
96 | ldmfd sp!, { r6, r8, r10, r11 } | |
97 | ||
98 | /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ | |
99 | adds r2, r2, #64 | |
100 | beq Lexit | |
101 | ||
102 | Llessthan64_aligned: | |
103 | /* copy 16 bytes at a time until we have < 16 bytes */ | |
104 | cmp r2, #16 | |
105 | ldmgeia r1!, { r3, r4, r5, r12 } | |
106 | stmgeia r0!, { r3, r4, r5, r12 } | |
107 | subges r2, r2, #16 | |
108 | bgt Llessthan64_aligned | |
109 | beq Lexit | |
110 | ||
111 | Llessthan16_aligned: | |
112 | mov r2, r2, lsl #28 | |
113 | msr cpsr_f, r2 | |
114 | ||
115 | ldmmiia r1!, { r2, r3 } | |
116 | ldreq r4, [r1], #4 | |
117 | ldrcsh r5, [r1], #2 | |
118 | ldrvsb r12, [r1], #1 | |
119 | ||
120 | stmmiia r0!, { r2, r3 } | |
121 | streq r4, [r0], #4 | |
122 | strcsh r5, [r0], #2 | |
123 | strvsb r12, [r0], #1 | |
124 | b Lexit | |
125 | ||
126 | Lsimilarlyunaligned: | |
127 | /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ | |
128 | mov r12, r0, lsl #28 | |
129 | rsb r12, r12, #0 | |
130 | msr cpsr_f, r12 | |
131 | ||
132 | ldrvsb r3, [r1], #1 | |
133 | ldrcsh r4, [r1], #2 | |
134 | ldreq r5, [r1], #4 | |
135 | ||
136 | strvsb r3, [r0], #1 | |
137 | strcsh r4, [r0], #2 | |
138 | streq r5, [r0], #4 | |
139 | ||
140 | ldmmiia r1!, { r3, r4 } | |
141 | stmmiia r0!, { r3, r4 } | |
142 | ||
143 | subs r2, r2, r12, lsr #28 | |
144 | beq Lexit | |
145 | ||
146 | Lunaligned_32: | |
147 | /* bring up to dest 32 byte alignment */ | |
148 | tst r0, #(1 << 4) | |
149 | ldmneia r1!, { r3, r4, r5, r12 } | |
150 | stmneia r0!, { r3, r4, r5, r12 } | |
151 | subne r2, r2, #16 | |
152 | ||
153 | /* we should now be aligned, see what copy method we should use */ | |
154 | cmp r2, #64 | |
155 | bge Lmorethan64_aligned | |
156 | b Llessthan64_aligned | |
157 | ||
158 | Lbytewise2: | |
159 | /* copy 2 bytes at a time */ | |
160 | subs r2, r2, #2 | |
161 | ||
162 | ldrb r3, [r1], #1 | |
163 | ldrplb r4, [r1], #1 | |
164 | ||
165 | strb r3, [r0], #1 | |
166 | strplb r4, [r0], #1 | |
167 | ||
168 | bhi Lbytewise2 | |
169 | b Lexit | |
170 | ||
171 | Lbytewise: | |
172 | /* simple bytewise forward copy */ | |
173 | ldrb r3, [r1], #1 | |
174 | subs r2, r2, #1 | |
175 | strb r3, [r0], #1 | |
176 | bne Lbytewise | |
177 | b Lexit | |
178 | ||
179 | Lsmallforwardcopy: | |
180 | /* src and dest are word aligned similarly, less than 64 bytes to copy */ | |
181 | cmp r2, #4 | |
182 | blt Lbytewise2 | |
183 | ||
184 | /* bytewise copy until word aligned */ | |
185 | tst r1, #3 | |
186 | Lwordalignloop: | |
187 | ldrneb r3, [r1], #1 | |
188 | strneb r3, [r0], #1 | |
189 | subne r2, r2, #1 | |
190 | tstne r1, #3 | |
191 | bne Lwordalignloop | |
192 | ||
193 | cmp r2, #16 | |
194 | bge Llessthan64_aligned | |
195 | blt Llessthan16_aligned | |
196 | ||
197 | Loverlap: | |
198 | /* src and dest overlap in some way, len > 0 */ | |
199 | cmp r0, r1 /* if dest > src */ | |
200 | bhi Loverlap_srclower | |
201 | ||
202 | Loverlap_destlower: | |
203 | /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ | |
204 | cmp r3, #64 | |
205 | bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ | |
206 | ||
207 | cmp r3, #2 | |
208 | bge Lbytewise2 | |
209 | b Lbytewise | |
210 | ||
211 | /* the following routines deal with having to copy in the reverse direction */ | |
212 | Loverlap_srclower: | |
213 | /* src < dest, with overlap */ | |
214 | ||
215 | /* src += len; dest += len; */ | |
216 | add r0, r0, r2 | |
217 | add r1, r1, r2 | |
218 | ||
219 | /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ | |
220 | cmp r2, #64 /* less than 64 bytes to copy? */ | |
221 | cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ | |
222 | blt Lbytewise_reverse | |
223 | ||
224 | /* test of src and dest are nonword aligned differently */ | |
225 | mov r3, r0, lsl #30 | |
226 | cmp r3, r1, lsl #30 | |
227 | bne Lbytewise_reverse | |
228 | ||
229 | /* test if src and dest are non word aligned or dest is non 16 byte aligned */ | |
230 | tst r0, #0xf | |
231 | bne Lunaligned_reverse_similarly | |
232 | ||
233 | /* test for dest 32 byte alignment */ | |
234 | tst r0, #(1<<4) | |
235 | bne Lunaligned_32_reverse_similarly | |
236 | ||
237 | /* 64 byte reverse block copy, src and dest aligned */ | |
238 | Lmorethan64_aligned_reverse: | |
239 | /* save some more registers to use in the copy */ | |
240 | stmfd sp!, { r6, r8, r10, r11 } | |
241 | ||
242 | /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ | |
243 | sub r2, r2, #64 | |
244 | ||
245 | L64loop_reverse: | |
246 | /* copy 64 bytes at a time */ | |
247 | ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
248 | #ifdef _ARM_ARCH_6 | |
249 | pld [r1, #-32] | |
250 | #endif | |
251 | stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
252 | ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
253 | subs r2, r2, #64 | |
254 | #ifdef _ARM_ARCH_6 | |
255 | pld [r1, #-32] | |
256 | #endif | |
257 | stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
258 | bge L64loop_reverse | |
259 | ||
260 | /* restore the scratch registers we just saved */ | |
261 | ldmfd sp!, { r6, r8, r10, r11 } | |
262 | ||
263 | /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ | |
264 | adds r2, r2, #64 | |
265 | beq Lexit | |
266 | ||
267 | Lbytewise_reverse: | |
268 | ldrb r3, [r1, #-1]! | |
269 | strb r3, [r0, #-1]! | |
270 | subs r2, r2, #1 | |
271 | bne Lbytewise_reverse | |
272 | b Lexit | |
273 | ||
274 | Lunaligned_reverse_similarly: | |
275 | /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ | |
276 | mov r12, r0, lsl #28 | |
277 | msr cpsr_f, r12 | |
278 | ||
279 | ldrvsb r3, [r1, #-1]! | |
280 | ldrcsh r4, [r1, #-2]! | |
281 | ldreq r5, [r1, #-4]! | |
282 | ||
283 | strvsb r3, [r0, #-1]! | |
284 | strcsh r4, [r0, #-2]! | |
285 | streq r5, [r0, #-4]! | |
286 | ||
287 | ldmmidb r1!, { r3, r4 } | |
288 | stmmidb r0!, { r3, r4 } | |
289 | ||
290 | subs r2, r2, r12, lsr #28 | |
291 | beq Lexit | |
292 | ||
293 | Lunaligned_32_reverse_similarly: | |
294 | /* bring up to dest 32 byte alignment */ | |
295 | tst r0, #(1 << 4) | |
296 | ldmnedb r1!, { r3, r4, r5, r12 } | |
297 | stmnedb r0!, { r3, r4, r5, r12 } | |
298 | subne r2, r2, #16 | |
299 | ||
300 | /* we should now be aligned, see what copy method we should use */ | |
301 | cmp r2, #64 | |
302 | bge Lmorethan64_aligned_reverse | |
303 | b Lbytewise_reverse | |
304 | ||
305 | /* the following routines deal with non word aligned copies */ | |
306 | Lnonwordaligned_forward: | |
307 | cmp r2, #8 | |
308 | blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ | |
309 | ||
310 | /* bytewise copy until src word aligned */ | |
311 | tst r1, #3 | |
312 | Lwordalignloop2: | |
313 | ldrneb r3, [r1], #1 | |
314 | strneb r3, [r0], #1 | |
315 | subne r2, r2, #1 | |
316 | tstne r1, #3 | |
317 | bne Lwordalignloop2 | |
318 | ||
319 | /* figure out how the src and dest are unaligned */ | |
320 | and r3, r0, #3 | |
321 | cmp r3, #2 | |
322 | blt Lalign1_forward | |
323 | beq Lalign2_forward | |
324 | bgt Lalign3_forward | |
325 | ||
326 | Lalign1_forward: | |
327 | /* the dest pointer is 1 byte off from src */ | |
328 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
329 | sub r0, r0, #1 | |
330 | ||
331 | /* prime the copy */ | |
332 | ldrb r4, [r0] /* load D[7:0] */ | |
333 | ||
334 | Lalign1_forward_loop: | |
335 | ldr r3, [r1], #4 /* load S */ | |
336 | orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ | |
337 | str r4, [r0], #4 /* save D */ | |
338 | mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ | |
339 | subs r12, r12, #1 | |
340 | bne Lalign1_forward_loop | |
341 | ||
342 | /* finish the copy off */ | |
343 | strb r4, [r0], #1 /* save D[7:0] */ | |
344 | ||
345 | ands r2, r2, #3 | |
346 | beq Lexit | |
347 | b Lbytewise2 | |
348 | ||
349 | Lalign2_forward: | |
350 | /* the dest pointer is 2 bytes off from src */ | |
351 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
352 | sub r0, r0, #2 | |
353 | ||
354 | /* prime the copy */ | |
355 | ldrh r4, [r0] /* load D[15:0] */ | |
356 | ||
357 | Lalign2_forward_loop: | |
358 | ldr r3, [r1], #4 /* load S */ | |
359 | orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ | |
360 | str r4, [r0], #4 /* save D */ | |
361 | mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ | |
362 | subs r12, r12, #1 | |
363 | bne Lalign2_forward_loop | |
364 | ||
365 | /* finish the copy off */ | |
366 | strh r4, [r0], #2 /* save D[15:0] */ | |
367 | ||
368 | ands r2, r2, #3 | |
369 | beq Lexit | |
370 | b Lbytewise2 | |
371 | ||
372 | Lalign3_forward: | |
373 | /* the dest pointer is 3 bytes off from src */ | |
374 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
375 | sub r0, r0, #3 | |
376 | ||
377 | /* prime the copy */ | |
378 | ldr r4, [r0] | |
379 | and r4, r4, #0x00ffffff /* load D[24:0] */ | |
380 | ||
381 | Lalign3_forward_loop: | |
382 | ldr r3, [r1], #4 /* load S */ | |
383 | orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ | |
384 | str r4, [r0], #4 /* save D */ | |
385 | mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ | |
386 | subs r12, r12, #1 | |
387 | bne Lalign3_forward_loop | |
388 | ||
389 | /* finish the copy off */ | |
390 | strh r4, [r0], #2 /* save D[15:0] */ | |
391 | mov r4, r4, lsr #16 | |
392 | strb r4, [r0], #1 /* save D[23:16] */ | |
393 | ||
394 | ands r2, r2, #3 | |
395 | beq Lexit | |
396 | b Lbytewise2 | |
397 | ||
398 | Lexit: | |
399 | ldmfd sp!, {r0, r4, r5, r7, pc} | |
400 | ||
401 |