git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2007 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28
	29	#include <arm/proc_reg.h>
	30
	31	.syntax unified
	32	.text
	33	.align 2
	34
	35	.globl _ovbcopy
	36	.globl _memcpy
	37	.globl _bcopy
	38	.globl _memmove
	39
	40	_bcopy: /* void bcopy(const void src, void dest, size_t len); */
	41	_ovbcopy:
	42	mov r3, r0
	43	mov r0, r1
	44	mov r1, r3
	45
	46	_memcpy: /* void memcpy(void dest, const void src, size_t len); /
	47	_memmove: /* void memmove(void dest, const void src, size_t len); /
	48	/* check for zero len or if the pointers are the same */
	49	cmp r2, #0
	50	cmpne r0, r1
	51	bxeq lr
	52
	53	/* save r0 (return value), r4 (scratch), and r5 (scratch) */
	54	stmfd sp!, { r0, r4, r5, r7, lr }
	55	add r7, sp, #12
	56
	57	/* check for overlap. r3 <- distance between src & dest */
	58	subhs r3, r0, r1
	59	sublo r3, r1, r0
	60	cmp r3, r2 /* if distance(src, dest) < len, we have overlap */
	61	blo Loverlap
	62
	63	Lnormalforwardcopy:
	64	/* are src and dest dissimilarly word aligned? */
	65	mov r12, r0, lsl #30
	66	cmp r12, r1, lsl #30
	67	bne Lnonwordaligned_forward
	68
	69	/* if len < 64, do a quick forward copy */
	70	cmp r2, #64
	71	blt Lsmallforwardcopy
	72
	73	/* check for 16 byte src/dest unalignment */
	74	tst r0, #0xf
	75	bne Lsimilarlyunaligned
	76
	77	/* check for 32 byte dest unalignment */
	78	tst r0, #(1<<4)
	79	bne Lunaligned_32
	80
	81	Lmorethan64_aligned:
	82	/* save some more registers to use in the copy */
	83	stmfd sp!, { r6, r8, r10, r11 }
	84
	85	/* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
	86	sub r2, r2, #64
	87
	88	L64loop:
	89	/* copy 64 bytes at a time */
	90	ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	91	pld [r1, #32]
	92	stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	93	ldmia r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	94	subs r2, r2, #64
	95	pld [r1, #32]
	96	stmia r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	97	bge L64loop
	98
	99	/* restore the scratch registers we just saved */
	100	ldmfd sp!, { r6, r8, r10, r11 }
	101
	102	/* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
	103	adds r2, r2, #64
	104	beq Lexit
	105
	106	Llessthan64_aligned:
	107	/* copy 16 bytes at a time until we have < 16 bytes */
	108	cmp r2, #16
	109	ldmiage r1!, { r3, r4, r5, r12 }
	110	stmiage r0!, { r3, r4, r5, r12 }
	111	subsge r2, r2, #16
	112	bgt Llessthan64_aligned
	113	beq Lexit
	114
	115	Llessthan16_aligned:
	116	mov r2, r2, lsl #28
	117	msr cpsr_f, r2
	118
	119	ldmiami r1!, { r2, r3 }
	120	ldreq r4, [r1], #4
	121	ldrhcs r5, [r1], #2
	122	ldrbvs r12, [r1], #1
	123
	124	stmiami r0!, { r2, r3 }
	125	streq r4, [r0], #4
	126	strhcs r5, [r0], #2
	127	strbvs r12, [r0], #1
	128	b Lexit
	129
	130	Lsimilarlyunaligned:
	131	/* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
	132	mov r12, r0, lsl #28
	133	rsb r12, r12, #0
	134	msr cpsr_f, r12
	135
	136	ldrbvs r3, [r1], #1
	137	ldrhcs r4, [r1], #2
	138	ldreq r5, [r1], #4
	139
	140	strbvs r3, [r0], #1
	141	strhcs r4, [r0], #2
	142	streq r5, [r0], #4
	143
	144	ldmiami r1!, { r3, r4 }
	145	stmiami r0!, { r3, r4 }
	146
	147	subs r2, r2, r12, lsr #28
	148	beq Lexit
	149
	150	Lunaligned_32:
	151	/* bring up to dest 32 byte alignment */
	152	tst r0, #(1 << 4)
	153	ldmiane r1!, { r3, r4, r5, r12 }
	154	stmiane r0!, { r3, r4, r5, r12 }
	155	subne r2, r2, #16
	156
	157	/* we should now be aligned, see what copy method we should use */
	158	cmp r2, #64
	159	bge Lmorethan64_aligned
	160	b Llessthan64_aligned
	161
	162	Lbytewise2:
	163	/* copy 2 bytes at a time */
	164	subs r2, r2, #2
	165
	166	ldrb r3, [r1], #1
	167	ldrbpl r4, [r1], #1
	168
	169	strb r3, [r0], #1
	170	strbpl r4, [r0], #1
	171
	172	bhi Lbytewise2
	173	b Lexit
	174
	175	Lbytewise:
	176	/* simple bytewise forward copy */
	177	ldrb r3, [r1], #1
	178	subs r2, r2, #1
	179	strb r3, [r0], #1
	180	bne Lbytewise
	181	b Lexit
	182
	183	Lsmallforwardcopy:
	184	/* src and dest are word aligned similarly, less than 64 bytes to copy */
	185	cmp r2, #4
	186	blt Lbytewise2
	187
	188	/* bytewise copy until word aligned */
	189	tst r1, #3
	190	Lwordalignloop:
	191	ldrbne r3, [r1], #1
	192	strbne r3, [r0], #1
	193	subne r2, r2, #1
	194	tstne r1, #3
	195	bne Lwordalignloop
	196
	197	cmp r2, #16
	198	bge Llessthan64_aligned
	199	blt Llessthan16_aligned
	200
	201	Loverlap:
	202	/* src and dest overlap in some way, len > 0 */
	203	cmp r0, r1 /* if dest > src */
	204	bhi Loverlap_srclower
	205
	206	Loverlap_destlower:
	207	/* dest < src, see if we can still do a fast forward copy or fallback to slow forward copy */
	208	cmp r3, #64
	209	bge Lnormalforwardcopy /* overlap is greater than one stride of the copy, use normal copy */
	210
	211	cmp r3, #2
	212	bge Lbytewise2
	213	b Lbytewise
	214
	215	/* the following routines deal with having to copy in the reverse direction */
	216	Loverlap_srclower:
	217	/* src < dest, with overlap */
	218
	219	/* src += len; dest += len; */
	220	add r0, r0, r2
	221	add r1, r1, r2
	222
	223	/* we have to copy in reverse no matter what, test if we can we use a large block reverse copy */
	224	cmp r2, #64 /* less than 64 bytes to copy? */
	225	cmpgt r3, #64 /* less than 64 bytes of nonoverlap? */
	226	blt Lbytewise_reverse
	227
	228	/* test of src and dest are nonword aligned differently */
	229	mov r3, r0, lsl #30
	230	cmp r3, r1, lsl #30
	231	bne Lbytewise_reverse
	232
	233	/* test if src and dest are non word aligned or dest is non 16 byte aligned */
	234	tst r0, #0xf
	235	bne Lunaligned_reverse_similarly
	236
	237	/* test for dest 32 byte alignment */
	238	tst r0, #(1<<4)
	239	bne Lunaligned_32_reverse_similarly
	240
	241	/* 64 byte reverse block copy, src and dest aligned */
	242	Lmorethan64_aligned_reverse:
	243	/* save some more registers to use in the copy */
	244	stmfd sp!, { r6, r8, r10, r11 }
	245
	246	/* pre-subtract 64 from the len counter to avoid an extra compare in the loop */
	247	sub r2, r2, #64
	248
	249	L64loop_reverse:
	250	/* copy 64 bytes at a time */
	251	ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	252	#if ARCH_ARMv5 \|\| ARCH_ARMv5e \|\| ARCH_ARMv6
	253	pld [r1, #-32]
	254	#endif
	255	stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	256	ldmdb r1!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	257	subs r2, r2, #64
	258	pld [r1, #-32]
	259	stmdb r0!, { r3, r4, r5, r6, r8, r10, r11, r12 }
	260	bge L64loop_reverse
	261
	262	/* restore the scratch registers we just saved */
	263	ldmfd sp!, { r6, r8, r10, r11 }
	264
	265	/* fix up the len counter (previously subtracted an extra 64 from it) and test for completion */
	266	adds r2, r2, #64
	267	beq Lexit
	268
	269	Lbytewise_reverse:
	270	ldrb r3, [r1, #-1]!
	271	strb r3, [r0, #-1]!
	272	subs r2, r2, #1
	273	bne Lbytewise_reverse
	274	b Lexit
	275
	276	Lunaligned_reverse_similarly:
	277	/* both src and dest are unaligned in similar ways, align to dest on 32 byte boundary */
	278	mov r12, r0, lsl #28
	279	msr cpsr_f, r12
	280
	281	ldrbvs r3, [r1, #-1]!
	282	ldrhcs r4, [r1, #-2]!
	283	ldreq r5, [r1, #-4]!
	284
	285	strbvs r3, [r0, #-1]!
	286	strhcs r4, [r0, #-2]!
	287	streq r5, [r0, #-4]!
	288
	289	ldmdbmi r1!, { r3, r4 }
	290	stmdbmi r0!, { r3, r4 }
	291
	292	subs r2, r2, r12, lsr #28
	293	beq Lexit
	294
	295	Lunaligned_32_reverse_similarly:
	296	/* bring up to dest 32 byte alignment */
	297	tst r0, #(1 << 4)
	298	ldmdbne r1!, { r3, r4, r5, r12 }
	299	stmdbne r0!, { r3, r4, r5, r12 }
	300	subne r2, r2, #16
	301
	302	/* we should now be aligned, see what copy method we should use */
	303	cmp r2, #64
	304	bge Lmorethan64_aligned_reverse
	305	b Lbytewise_reverse
	306
	307	/* the following routines deal with non word aligned copies */
	308	Lnonwordaligned_forward:
	309	cmp r2, #8
	310	blt Lbytewise2 /* not worth the effort with less than 24 bytes total */
	311
	312	/* bytewise copy until src word aligned */
	313	tst r1, #3
	314	Lwordalignloop2:
	315	ldrbne r3, [r1], #1
	316	strbne r3, [r0], #1
	317	subne r2, r2, #1
	318	tstne r1, #3
	319	bne Lwordalignloop2
	320
	321	/* figure out how the src and dest are unaligned */
	322	and r3, r0, #3
	323	cmp r3, #2
	324	blt Lalign1_forward
	325	beq Lalign2_forward
	326	bgt Lalign3_forward
	327
	328	Lalign1_forward:
	329	/* the dest pointer is 1 byte off from src */
	330	mov r12, r2, lsr #2 /* number of words we should copy */
	331	sub r0, r0, #1
	332
	333	/* prime the copy */
	334	ldrb r4, [r0] /* load D[7:0] */
	335
	336	Lalign1_forward_loop:
	337	ldr r3, [r1], #4 /* load S */
	338	orr r4, r4, r3, lsl #8 /* D[31:8] = S[24:0] */
	339	str r4, [r0], #4 /* save D */
	340	mov r4, r3, lsr #24 /* D[7:0] = S[31:25] */
	341	subs r12, r12, #1
	342	bne Lalign1_forward_loop
	343
	344	/* finish the copy off */
	345	strb r4, [r0], #1 /* save D[7:0] */
	346
	347	ands r2, r2, #3
	348	beq Lexit
	349	b Lbytewise2
	350
	351	Lalign2_forward:
	352	/* the dest pointer is 2 bytes off from src */
	353	mov r12, r2, lsr #2 /* number of words we should copy */
	354	sub r0, r0, #2
	355
	356	/* prime the copy */
	357	ldrh r4, [r0] /* load D[15:0] */
	358
	359	Lalign2_forward_loop:
	360	ldr r3, [r1], #4 /* load S */
	361	orr r4, r4, r3, lsl #16 /* D[31:16] = S[15:0] */
	362	str r4, [r0], #4 /* save D */
	363	mov r4, r3, lsr #16 /* D[15:0] = S[31:15] */
	364	subs r12, r12, #1
	365	bne Lalign2_forward_loop
	366
	367	/* finish the copy off */
	368	strh r4, [r0], #2 /* save D[15:0] */
	369
	370	ands r2, r2, #3
	371	beq Lexit
	372	b Lbytewise2
	373
	374	Lalign3_forward:
	375	/* the dest pointer is 3 bytes off from src */
	376	mov r12, r2, lsr #2 /* number of words we should copy */
	377	sub r0, r0, #3
	378
	379	/* prime the copy */
	380	ldr r4, [r0]
	381	and r4, r4, #0x00ffffff /* load D[24:0] */
	382
	383	Lalign3_forward_loop:
	384	ldr r3, [r1], #4 /* load S */
	385	orr r4, r4, r3, lsl #24 /* D[31:25] = S[7:0] */
	386	str r4, [r0], #4 /* save D */
	387	mov r4, r3, lsr #8 /* D[24:0] = S[31:8] */
	388	subs r12, r12, #1
	389	bne Lalign3_forward_loop
	390
	391	/* finish the copy off */
	392	strh r4, [r0], #2 /* save D[15:0] */
	393	mov r4, r4, lsr #16
	394	strb r4, [r0], #1 /* save D[23:16] */
	395
	396	ands r2, r2, #3
	397	beq Lexit
	398	b Lbytewise2
	399
	400	Lexit:
	401	ldmfd sp!, { r0, r4, r5, r7, pc }
	402