git.saurik.com Git - apple/xnu.git/blame

Commit	Line	Data
5ba3f43e A	1	/*
	2	* Copyright (c) 2007 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <arm/proc_reg.h>
	30
	31	.syntax unified
	32	.text
	33	.align 2
	34
	35	.globl _ovbcopy
	36	.globl _memcpy
	37	.globl _bcopy
	38	.globl _memmove
	39
	40	_bcopy: /* void bcopy(const void src, void dest, size_t len); */
	41	_ovbcopy:
	42	mov r3, r0
	43	mov r0, r1
	44	mov r1, r3
	45
	46	_memcpy: /* void memcpy(void dest, const void src, size_t len); /
	47	_memmove: /* void memmove(void dest, const void src, size_t len); /
	48	/* check for zero len or if the pointers are the same */
	49	cmp r2, #0
	50	cmpne r0, r1
	51	bxeq lr
	52
	53	/* save r0 (return value), r4 (scratch), and r5 (scratch) */
	54	stmfd sp!, { r0, r4, r5, r7, lr }
	55	add r7, sp, #12
	56
	57	/* check for overlap. r3 <- distance between src & dest */
	58	subhs r3, r0, r1
	59	sublo r3, r1, r0
	60	cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
	61	blo Loverlap
	62
	63	Lnormalforwardcopy:
	64	/* are src and dest dissimilarly word aligned? */
65	mov r12, r0, lsl #30
66	cmp r12, r1, lsl #30
67	bne Lnonwordaligned_forward
68
69	/* if len < 64, do a quick forward copy */
70	cmp r2, #64
71	blt Lsmallforwardcopy
72
73	/* check for 16 byte src/dest unalignment */
74	tst r0, #0xf
75	bne Lsimilarlyunaligned
76
77	/* check for 32 byte dest unalignment */
78	tst r0, #(1<<4)
79	bne Lunaligned_32
80
81	Lmorethan64_aligned:
82	/* save some more registers to use in the copy */
83	stmfd sp!, { r6, r8, r10, r11 }
84
85	/* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
86	sub r2, r2, #64
87
88	L64loop:
89	/* copy 64 bytes at a time */
90	ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
91	pld [r1, #32]
92	stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
93	ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
94	subs r2, r2, #64
95	pld [r1, #32]
96	stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
97	bge L64loop
98
99	/* restore the scratch registers we just saved */
100	ldmfd sp!, { r6, r8, r10, r11 }
101
102	/* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
103	adds r2, r2, #64
104	beq Lexit
105
106	Llessthan64_aligned:
107	/* copy 16 bytes at a time until we have < 16 bytes */
108	cmp r2, #16
109	ldmiage r1!, { r3, r4, r5, r12 }
110	stmiage r0!, { r3, r4, r5, r12 }
111	subsge r2, r2, #16
112	bgt Llessthan64_aligned
113	beq Lexit
114
115	Llessthan16_aligned:
116	mov r2, r2, lsl #28
117	msr cpsr_f, r2
118
119	ldmiami r1!, { r2, r3 }
120	ldreq r4, [r1], #4
121	ldrhcs r5, [r1], #2
122	ldrbvs r12, [r1], #1
123
124	stmiami r0!, { r2, r3 }
125	streq r4, [r0], #4
126	strhcs r5, [r0], #2
127	strbvs r12, [r0], #1
128	b Lexit
129
130	Lsimilarlyunaligned:
131	/* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
132	mov r12, r0, lsl #28
133	rsb r12, r12, #0
134	msr cpsr_f, r12
135
136	ldrbvs r3, [r1], #1
137	ldrhcs r4, [r1], #2
138	ldreq r5, [r1], #4
139
140	strbvs r3, [r0], #1
141	strhcs r4, [r0], #2
142	streq r5, [r0], #4
143
144	ldmiami r1!, { r3, r4 }
145	stmiami r0!, { r3, r4 }
146
147	subs r2, r2, r12, lsr #28
148	beq Lexit
149
150	Lunaligned_32:
151	/* bring up to dest 32 byte alignment */
152	tst r0, #(1 << 4)
153	ldmiane r1!, { r3, r4, r5, r12 }
154	stmiane r0!, { r3, r4, r5, r12 }
155	subne r2, r2, #16
156
157	/* we should now be aligned, see what copy method we should use */
158	cmp r2, #64
159	bge Lmorethan64_aligned
160	b Llessthan64_aligned
161
162	Lbytewise2:
163	/* copy 2 bytes at a time */
164	subs r2, r2, #2
165
166	ldrb r3, [r1], #1
167	ldrbpl r4, [r1], #1
168
169	strb r3, [r0], #1
170	strbpl r4, [r0], #1
171
172	bhi Lbytewise2
173	b Lexit
174
175	Lbytewise:
176	/* simple bytewise forward copy */
177	ldrb r3, [r1], #1
178	subs r2, r2, #1
179	strb r3, [r0], #1
180	bne Lbytewise
181	b Lexit
182
183	Lsmallforwardcopy:
184	/* src and dest are word aligned similarly, less than 64 bytes to copy */
185	cmp r2, #4
186	blt Lbytewise2
187
188	/* bytewise copy until word aligned */
189	tst r1, #3
190	Lwordalignloop:
191	ldrbne r3, [r1], #1
192	strbne r3, [r0], #1
193	subne r2, r2, #1
194	tstne r1, #3
195	bne Lwordalignloop
196
197	cmp r2, #16
198	bge Llessthan64_aligned
199	blt Llessthan16_aligned
200
201	Loverlap:
202	/* src and dest overlap in some way, len > 0 */
203	cmp r0, r1 /* if dest > src */
204	bhi Loverlap_srclower
205
206	Loverlap_destlower:
207	/* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
208	cmp r3, #64
209	bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
210
211	cmp r3, #2
212	bge Lbytewise2
213	b Lbytewise
214
215	/* the following routines deal with having to copy in the reverse direction */
216	Loverlap_srclower:
217	/* src < dest, with overlap */
218
219	/* src += len; dest += len; */
220	add r0, r0, r2
221	add r1, r1, r2
222
223	/* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
224	cmp r2, #64 /* less than 64 bytes to copy? */
225	cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
226	blt Lbytewise_reverse
227
228	/* test of src and dest are nonword aligned differently */
229	mov r3, r0, lsl #30
230	cmp r3, r1, lsl #30
231	bne Lbytewise_reverse
232
233	/* test if src and dest are non word aligned or dest is non 16 byte aligned */
234	tst r0, #0xf
235	bne Lunaligned_reverse_similarly
236
237	/* test for dest 32 byte alignment */
238	tst r0, #(1<<4)
239	bne Lunaligned_32_reverse_similarly
240
241	/* 64 byte reverse block copy, src and dest aligned */
242	Lmorethan64_aligned_reverse:
243	/* save some more registers to use in the copy */
244	stmfd sp!, { r6, r8, r10, r11 }
245
246	/* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
247	sub r2, r2, #64
248
249	L64loop_reverse:
250	/* copy 64 bytes at a time */
251	ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
252	#if ARCH_ARMv5 \|\| ARCH_ARMv5e \|\| ARCH_ARMv6
253	pld [r1, #-32]
254	#endif
255	stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
256	ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
257	subs r2, r2, #64
258	pld [r1, #-32]
259	stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
260	bge L64loop_reverse
261
262	/* restore the scratch registers we just saved */
263	ldmfd sp!, { r6, r8, r10, r11 }
264
265	/* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
266	adds r2, r2, #64
267	beq Lexit
268
269	Lbytewise_reverse:
270	ldrb r3, [r1, #-1]!
271	strb r3, [r0, #-1]!
272	subs r2, r2, #1
273	bne Lbytewise_reverse
274	b Lexit
275
276	Lunaligned_reverse_similarly:
277	/* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
278	mov r12, r0, lsl #28
279	msr cpsr_f, r12
280
281	ldrbvs r3, [r1, #-1]!
282	ldrhcs r4, [r1, #-2]!
283	ldreq r5, [r1, #-4]!
284
285	strbvs r3, [r0, #-1]!
286	strhcs r4, [r0, #-2]!
287	streq r5, [r0, #-4]!
288
289	ldmdbmi r1!, { r3, r4 }
290	stmdbmi r0!, { r3, r4 }
291
292	subs r2, r2, r12, lsr #28
293	beq Lexit
294
295	Lunaligned_32_reverse_similarly:
296	/* bring up to dest 32 byte alignment */
297	tst r0, #(1 << 4)
298	ldmdbne r1!, { r3, r4, r5, r12 }
299	stmdbne r0!, { r3, r4, r5, r12 }
300	subne r2, r2, #16
301
302	/* we should now be aligned, see what copy method we should use */
303	cmp r2, #64
304	bge Lmorethan64_aligned_reverse
305	b Lbytewise_reverse
306
307	/* the following routines deal with non word aligned copies */
308	Lnonwordaligned_forward:
309	cmp r2, #8
310	blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
311
312	/* bytewise copy until src word aligned */
313	tst r1, #3
314	Lwordalignloop2:
315	ldrbne r3, [r1], #1
316	strbne r3, [r0], #1
317	subne r2, r2, #1
318	tstne r1, #3
319	bne Lwordalignloop2
320
321	/* figure out how the src and dest are unaligned */
322	and r3, r0, #3
323	cmp r3, #2
324	blt Lalign1_forward
325	beq Lalign2_forward
326	bgt Lalign3_forward
327
328	Lalign1_forward:
329	/* the dest pointer is 1 byte off from src */
330	mov r12, r2, lsr #2 /* number of words we should copy */
331	sub r0, r0, #1
332
333	/* prime the copy */
334	ldrb r4, [r0] /* load D[7:0] */
335
336	Lalign1_forward_loop:
337	ldr r3, [r1], #4 /* load S */
338	orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
339	str r4, [r0], #4 /* save D */
340	mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
341	subs r12, r12, #1
342	bne Lalign1_forward_loop
343
344	/* finish the copy off */
345	strb r4, [r0], #1 /* save D[7:0] */
346
347	ands r2, r2, #3
348	beq Lexit
349	b Lbytewise2
350
351	Lalign2_forward:
352	/* the dest pointer is 2 bytes off from src */
353	mov r12, r2, lsr #2 /* number of words we should copy */
354	sub r0, r0, #2
355
356	/* prime the copy */
357	ldrh r4, [r0] /* load D[15:0] */
358
359	Lalign2_forward_loop:
360	ldr r3, [r1], #4 /* load S */
361	orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
362	str r4, [r0], #4 /* save D */
363	mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
364	subs r12, r12, #1
365	bne Lalign2_forward_loop
366
367	/* finish the copy off */
368	strh r4, [r0], #2 /* save D[15:0] */
369
370	ands r2, r2, #3
371	beq Lexit
372	b Lbytewise2
373
374	Lalign3_forward:
375	/* the dest pointer is 3 bytes off from src */
376	mov r12, r2, lsr #2 /* number of words we should copy */
377	sub r0, r0, #3
378
379	/* prime the copy */
380	ldr r4, [r0]
381	and r4, r4, #0x00ffffff /* load D[24:0] */
382
383	Lalign3_forward_loop:
384	ldr r3, [r1], #4 /* load S */
385	orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
386	str r4, [r0], #4 /* save D */
387	mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
388	subs r12, r12, #1
389	bne Lalign3_forward_loop
390
391	/* finish the copy off */
392	strh r4, [r0], #2 /* save D[15:0] */
393	mov r4, r4, lsr #16
394	strb r4, [r0], #1 /* save D[23:16] */
395
396	ands r2, r2, #3
397	beq Lexit
398	b Lbytewise2
399
400	Lexit:
401	ldmfd sp!, { r0, r4, r5, r7, pc }
402