]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2007 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <arm/proc_reg.h> | |
30 | ||
31 | .syntax unified | |
32 | .text | |
33 | .align 2 | |
34 | ||
35 | .globl _ovbcopy | |
36 | .globl _memcpy | |
37 | .globl _bcopy | |
38 | .globl _memmove | |
39 | ||
40 | _bcopy: /* void bcopy(const void *src, void *dest, size_t len); */ | |
41 | _ovbcopy: | |
42 | mov r3, r0 | |
43 | mov r0, r1 | |
44 | mov r1, r3 | |
45 | ||
46 | _memcpy: /* void *memcpy(void *dest, const void *src, size_t len); */ | |
47 | _memmove: /* void *memmove(void *dest, const void *src, size_t len); */ | |
48 | /* check for zero len or if the pointers are the same */ | |
49 | cmp r2, #0 | |
50 | cmpne r0, r1 | |
51 | bxeq lr | |
52 | ||
53 | /* save r0 (return value), r4 (scratch), and r5 (scratch) */ | |
54 | stmfd sp!, { r0, r4, r5, r7, lr } | |
55 | add r7, sp, #12 | |
56 | ||
57 | /* check for overlap. r3 <- distance between src & dest */ | |
58 | subhs r3, r0, r1 | |
59 | sublo r3, r1, r0 | |
60 | cmp r3, r2 /* if distance(src, dest) < len, we have overlap */ | |
61 | blo Loverlap | |
62 | ||
63 | Lnormalforwardcopy: | |
64 | /* are src and dest dissimilarly word aligned? */ | |
65 | mov r12, r0, lsl #30 | |
66 | cmp r12, r1, lsl #30 | |
67 | bne Lnonwordaligned_forward | |
68 | ||
69 | /* if len < 64, do a quick forward copy */ | |
70 | cmp r2, #64 | |
71 | blt Lsmallforwardcopy | |
72 | ||
73 | /* check for 16 byte src/dest unalignment */ | |
74 | tst r0, #0xf | |
75 | bne Lsimilarlyunaligned | |
76 | ||
77 | /* check for 32 byte dest unalignment */ | |
78 | tst r0, #(1<<4) | |
79 | bne Lunaligned_32 | |
80 | ||
81 | Lmorethan64_aligned: | |
82 | /* save some more registers to use in the copy */ | |
83 | stmfd sp!, { r6, r8, r10, r11 } | |
84 | ||
85 | /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ | |
86 | sub r2, r2, #64 | |
87 | ||
88 | L64loop: | |
89 | /* copy 64 bytes at a time */ | |
90 | ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
91 | pld [r1, #32] | |
92 | stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
93 | ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
94 | subs r2, r2, #64 | |
95 | pld [r1, #32] | |
96 | stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
97 | bge L64loop | |
98 | ||
99 | /* restore the scratch registers we just saved */ | |
100 | ldmfd sp!, { r6, r8, r10, r11 } | |
101 | ||
102 | /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ | |
103 | adds r2, r2, #64 | |
104 | beq Lexit | |
105 | ||
106 | Llessthan64_aligned: | |
107 | /* copy 16 bytes at a time until we have < 16 bytes */ | |
108 | cmp r2, #16 | |
109 | ldmiage r1!, { r3, r4, r5, r12 } | |
110 | stmiage r0!, { r3, r4, r5, r12 } | |
111 | subsge r2, r2, #16 | |
112 | bgt Llessthan64_aligned | |
113 | beq Lexit | |
114 | ||
115 | Llessthan16_aligned: | |
116 | mov r2, r2, lsl #28 | |
117 | msr cpsr_f, r2 | |
118 | ||
119 | ldmiami r1!, { r2, r3 } | |
120 | ldreq r4, [r1], #4 | |
121 | ldrhcs r5, [r1], #2 | |
122 | ldrbvs r12, [r1], #1 | |
123 | ||
124 | stmiami r0!, { r2, r3 } | |
125 | streq r4, [r0], #4 | |
126 | strhcs r5, [r0], #2 | |
127 | strbvs r12, [r0], #1 | |
128 | b Lexit | |
129 | ||
130 | Lsimilarlyunaligned: | |
131 | /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ | |
132 | mov r12, r0, lsl #28 | |
133 | rsb r12, r12, #0 | |
134 | msr cpsr_f, r12 | |
135 | ||
136 | ldrbvs r3, [r1], #1 | |
137 | ldrhcs r4, [r1], #2 | |
138 | ldreq r5, [r1], #4 | |
139 | ||
140 | strbvs r3, [r0], #1 | |
141 | strhcs r4, [r0], #2 | |
142 | streq r5, [r0], #4 | |
143 | ||
144 | ldmiami r1!, { r3, r4 } | |
145 | stmiami r0!, { r3, r4 } | |
146 | ||
147 | subs r2, r2, r12, lsr #28 | |
148 | beq Lexit | |
149 | ||
150 | Lunaligned_32: | |
151 | /* bring up to dest 32 byte alignment */ | |
152 | tst r0, #(1 << 4) | |
153 | ldmiane r1!, { r3, r4, r5, r12 } | |
154 | stmiane r0!, { r3, r4, r5, r12 } | |
155 | subne r2, r2, #16 | |
156 | ||
157 | /* we should now be aligned, see what copy method we should use */ | |
158 | cmp r2, #64 | |
159 | bge Lmorethan64_aligned | |
160 | b Llessthan64_aligned | |
161 | ||
162 | Lbytewise2: | |
163 | /* copy 2 bytes at a time */ | |
164 | subs r2, r2, #2 | |
165 | ||
166 | ldrb r3, [r1], #1 | |
167 | ldrbpl r4, [r1], #1 | |
168 | ||
169 | strb r3, [r0], #1 | |
170 | strbpl r4, [r0], #1 | |
171 | ||
172 | bhi Lbytewise2 | |
173 | b Lexit | |
174 | ||
175 | Lbytewise: | |
176 | /* simple bytewise forward copy */ | |
177 | ldrb r3, [r1], #1 | |
178 | subs r2, r2, #1 | |
179 | strb r3, [r0], #1 | |
180 | bne Lbytewise | |
181 | b Lexit | |
182 | ||
183 | Lsmallforwardcopy: | |
184 | /* src and dest are word aligned similarly, less than 64 bytes to copy */ | |
185 | cmp r2, #4 | |
186 | blt Lbytewise2 | |
187 | ||
188 | /* bytewise copy until word aligned */ | |
189 | tst r1, #3 | |
190 | Lwordalignloop: | |
191 | ldrbne r3, [r1], #1 | |
192 | strbne r3, [r0], #1 | |
193 | subne r2, r2, #1 | |
194 | tstne r1, #3 | |
195 | bne Lwordalignloop | |
196 | ||
197 | cmp r2, #16 | |
198 | bge Llessthan64_aligned | |
199 | blt Llessthan16_aligned | |
200 | ||
201 | Loverlap: | |
202 | /* src and dest overlap in some way, len > 0 */ | |
203 | cmp r0, r1 /* if dest > src */ | |
204 | bhi Loverlap_srclower | |
205 | ||
206 | Loverlap_destlower: | |
207 | /* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */ | |
208 | cmp r3, #64 | |
209 | bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */ | |
210 | ||
211 | cmp r3, #2 | |
212 | bge Lbytewise2 | |
213 | b Lbytewise | |
214 | ||
215 | /* the following routines deal with having to copy in the reverse direction */ | |
216 | Loverlap_srclower: | |
217 | /* src < dest, with overlap */ | |
218 | ||
219 | /* src += len; dest += len; */ | |
220 | add r0, r0, r2 | |
221 | add r1, r1, r2 | |
222 | ||
223 | /* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */ | |
224 | cmp r2, #64 /* less than 64 bytes to copy? */ | |
225 | cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */ | |
226 | blt Lbytewise_reverse | |
227 | ||
228 | /* test of src and dest are nonword aligned differently */ | |
229 | mov r3, r0, lsl #30 | |
230 | cmp r3, r1, lsl #30 | |
231 | bne Lbytewise_reverse | |
232 | ||
233 | /* test if src and dest are non word aligned or dest is non 16 byte aligned */ | |
234 | tst r0, #0xf | |
235 | bne Lunaligned_reverse_similarly | |
236 | ||
237 | /* test for dest 32 byte alignment */ | |
238 | tst r0, #(1<<4) | |
239 | bne Lunaligned_32_reverse_similarly | |
240 | ||
241 | /* 64 byte reverse block copy, src and dest aligned */ | |
242 | Lmorethan64_aligned_reverse: | |
243 | /* save some more registers to use in the copy */ | |
244 | stmfd sp!, { r6, r8, r10, r11 } | |
245 | ||
246 | /* pre-subtract 64 from the len counter to avoid an extra compare in the loop */ | |
247 | sub r2, r2, #64 | |
248 | ||
249 | L64loop_reverse: | |
250 | /* copy 64 bytes at a time */ | |
251 | ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
252 | #if ARCH_ARMv5 || ARCH_ARMv5e || ARCH_ARMv6 | |
253 | pld [r1, #-32] | |
254 | #endif | |
255 | stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
256 | ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
257 | subs r2, r2, #64 | |
258 | pld [r1, #-32] | |
259 | stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 } | |
260 | bge L64loop_reverse | |
261 | ||
262 | /* restore the scratch registers we just saved */ | |
263 | ldmfd sp!, { r6, r8, r10, r11 } | |
264 | ||
265 | /* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */ | |
266 | adds r2, r2, #64 | |
267 | beq Lexit | |
268 | ||
269 | Lbytewise_reverse: | |
270 | ldrb r3, [r1, #-1]! | |
271 | strb r3, [r0, #-1]! | |
272 | subs r2, r2, #1 | |
273 | bne Lbytewise_reverse | |
274 | b Lexit | |
275 | ||
276 | Lunaligned_reverse_similarly: | |
277 | /* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */ | |
278 | mov r12, r0, lsl #28 | |
279 | msr cpsr_f, r12 | |
280 | ||
281 | ldrbvs r3, [r1, #-1]! | |
282 | ldrhcs r4, [r1, #-2]! | |
283 | ldreq r5, [r1, #-4]! | |
284 | ||
285 | strbvs r3, [r0, #-1]! | |
286 | strhcs r4, [r0, #-2]! | |
287 | streq r5, [r0, #-4]! | |
288 | ||
289 | ldmdbmi r1!, { r3, r4 } | |
290 | stmdbmi r0!, { r3, r4 } | |
291 | ||
292 | subs r2, r2, r12, lsr #28 | |
293 | beq Lexit | |
294 | ||
295 | Lunaligned_32_reverse_similarly: | |
296 | /* bring up to dest 32 byte alignment */ | |
297 | tst r0, #(1 << 4) | |
298 | ldmdbne r1!, { r3, r4, r5, r12 } | |
299 | stmdbne r0!, { r3, r4, r5, r12 } | |
300 | subne r2, r2, #16 | |
301 | ||
302 | /* we should now be aligned, see what copy method we should use */ | |
303 | cmp r2, #64 | |
304 | bge Lmorethan64_aligned_reverse | |
305 | b Lbytewise_reverse | |
306 | ||
307 | /* the following routines deal with non word aligned copies */ | |
308 | Lnonwordaligned_forward: | |
309 | cmp r2, #8 | |
310 | blt Lbytewise2 /* not worth the effort with less than 24 bytes total */ | |
311 | ||
312 | /* bytewise copy until src word aligned */ | |
313 | tst r1, #3 | |
314 | Lwordalignloop2: | |
315 | ldrbne r3, [r1], #1 | |
316 | strbne r3, [r0], #1 | |
317 | subne r2, r2, #1 | |
318 | tstne r1, #3 | |
319 | bne Lwordalignloop2 | |
320 | ||
321 | /* figure out how the src and dest are unaligned */ | |
322 | and r3, r0, #3 | |
323 | cmp r3, #2 | |
324 | blt Lalign1_forward | |
325 | beq Lalign2_forward | |
326 | bgt Lalign3_forward | |
327 | ||
328 | Lalign1_forward: | |
329 | /* the dest pointer is 1 byte off from src */ | |
330 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
331 | sub r0, r0, #1 | |
332 | ||
333 | /* prime the copy */ | |
334 | ldrb r4, [r0] /* load D[7:0] */ | |
335 | ||
336 | Lalign1_forward_loop: | |
337 | ldr r3, [r1], #4 /* load S */ | |
338 | orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */ | |
339 | str r4, [r0], #4 /* save D */ | |
340 | mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */ | |
341 | subs r12, r12, #1 | |
342 | bne Lalign1_forward_loop | |
343 | ||
344 | /* finish the copy off */ | |
345 | strb r4, [r0], #1 /* save D[7:0] */ | |
346 | ||
347 | ands r2, r2, #3 | |
348 | beq Lexit | |
349 | b Lbytewise2 | |
350 | ||
351 | Lalign2_forward: | |
352 | /* the dest pointer is 2 bytes off from src */ | |
353 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
354 | sub r0, r0, #2 | |
355 | ||
356 | /* prime the copy */ | |
357 | ldrh r4, [r0] /* load D[15:0] */ | |
358 | ||
359 | Lalign2_forward_loop: | |
360 | ldr r3, [r1], #4 /* load S */ | |
361 | orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */ | |
362 | str r4, [r0], #4 /* save D */ | |
363 | mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */ | |
364 | subs r12, r12, #1 | |
365 | bne Lalign2_forward_loop | |
366 | ||
367 | /* finish the copy off */ | |
368 | strh r4, [r0], #2 /* save D[15:0] */ | |
369 | ||
370 | ands r2, r2, #3 | |
371 | beq Lexit | |
372 | b Lbytewise2 | |
373 | ||
374 | Lalign3_forward: | |
375 | /* the dest pointer is 3 bytes off from src */ | |
376 | mov r12, r2, lsr #2 /* number of words we should copy */ | |
377 | sub r0, r0, #3 | |
378 | ||
379 | /* prime the copy */ | |
380 | ldr r4, [r0] | |
381 | and r4, r4, #0x00ffffff /* load D[24:0] */ | |
382 | ||
383 | Lalign3_forward_loop: | |
384 | ldr r3, [r1], #4 /* load S */ | |
385 | orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */ | |
386 | str r4, [r0], #4 /* save D */ | |
387 | mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */ | |
388 | subs r12, r12, #1 | |
389 | bne Lalign3_forward_loop | |
390 | ||
391 | /* finish the copy off */ | |
392 | strh r4, [r0], #2 /* save D[15:0] */ | |
393 | mov r4, r4, lsr #16 | |
394 | strb r4, [r0], #1 /* save D[23:16] */ | |
395 | ||
396 | ands r2, r2, #3 | |
397 | beq Lexit | |
398 | b Lbytewise2 | |
399 | ||
400 | Lexit: | |
401 | ldmfd sp!, { r0, r4, r5, r7, pc } | |
402 |