git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* =======================================
	29	* BCOPY, MEMCPY, and MEMMOVE for Mac OS X
	30	* =======================================
	31	*
	32	* Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
	33	* This version might be used bringing up new processors, with known
	34	* Altivec bugs that need to be worked around. It is not particularly well
	35	* optimized.
	36	*
	37	* For 64-bit processors with a 128-byte cache line, running in either
	38	* 32- or 64-bit mode. This is written for 32-bit execution, the kernel
	39	* will translate to 64-bit code when it compiles the 64-bit commpage.
	40	*
	41	* Register usage. Note we use R2, so this code will not run in a PEF/CFM
	42	* environment.
	43	* r0 = "w7" or temp
	44	* r2 = "w8"
	45	* r3 = not used, as memcpy and memmove return 1st parameter as a value
	46	* r4 = source ptr ("rs")
	47	* r5 = count of bytes to move ("rc")
	48	* r6 = "w1"
	49	* r7 = "w2"
	50	* r8 = "w3"
	51	* r9 = "w4"
	52	* r10 = "w5"
	53	* r11 = "w6"
	54	* r12 = destination ptr ("rd")
	55	*/
	56	#define rs r4
	57	#define rd r12
	58	#define rc r5
	59	#define rv r2
	60
	61	#define w1 r6
	62	#define w2 r7
	63	#define w3 r8
	64	#define w4 r9
	65	#define w5 r10
	66	#define w6 r11
	67	#define w7 r0
	68	#define w8 r2
	69
	70	#define ASSEMBLER
	71	#include <sys/appleapiopts.h>
	72	#include <ppc/asm.h>
	73	#include <machine/cpu_capabilities.h>
	74	#include <machine/commpage.h>
	75
	76	.text
	77
	78	#define kLong 64 // too long for inline loopless code
	79
	80
	81	// Main entry points.
	82
	83	.align 5
	84	bcopy_64: // void bcopy(const void src, void dst, size_t len)
	85	cmplwi rc,kLong // short or long?
	86	sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
	87	cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
	88	mr rd,r4 // start to move registers to canonic spot
	89	mr rs,r3
	90	blt LShort // handle short operands
	91	dcbt 0,r3 // touch in destination
	92	b LLong // join medium/long operand code
	93
	94	// NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
	95
	96	.align 5
	97	Lmemcpy_g4: // void* memcpy(void dst, void src, size_t len)
	98	Lmemmove_g4: // void* memmove(void dst, const void src, size_t len)
	99	cmplwi rc,kLong // short or long?
	100	sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
	101	dcbt 0,r4 // touch in the first line of source
	102	cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
	103	mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
	104	bge LLong // handle medium or long operands
	105
	106	// Handle short operands.
	107
	108	LShort:
	109	mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
	110	mtcrf 0x01,rc // put length bits 28-31 in cr7
	111	blt cr1,LShortReverse
	112
	113	// Forward short operands. This is the most frequent case, so it is inline.
	114
	115	LShort64: // enter to xfer last 64 bytes
	116	bf 26,0f // 64-byte chunk to xfer?
	117	ld w1,0(rs)
	118	ld w2,8(rs)
	119	ld w3,16(rs)
	120	ld w4,24(rs)
	121	addi rs,rs,32
	122	std w1,0(rd)
	123	std w2,8(rd)
	124	std w3,16(rd)
	125	std w4,24(rd)
	126	addi rd,rd,32
	127	0:
	128	bf 27,1f // quadword to move?
	129	ld w1,0(rs)
	130	ld w2,8(rs)
	131	addi rs,rs,16
	132	std w1,0(rd)
	133	std w2,8(rd)
	134	addi rd,rd,16
	135	1:
	136	bf 28,2f // doubleword?
	137	ld w1,0(rs)
	138	addi rs,rs,8
	139	std w1,0(rd)
	140	addi rd,rd,8
	141	2:
	142	bf 29,3f // word?
	143	lwz w1,0(rs)
	144	addi rs,rs,4
	145	stw w1,0(rd)
	146	addi rd,rd,4
	147	3:
	148	bf 30,4f // halfword to move?
	149	lhz w1,0(rs)
	150	addi rs,rs,2
	151	sth w1,0(rd)
	152	addi rd,rd,2
	153	4:
	154	bflr 31 // skip if no odd byte
	155	lbz w1,0(rs)
	156	stb w1,0(rd)
	157	blr
	158
	159
	160	// Handle short reverse operands.
	161	// cr6 = bits 26-27 of length
	162	// cr7 = bits 28-31 of length
	163
	164	LShortReverse:
	165	add rs,rs,rc // adjust ptrs for reverse move
	166	add rd,rd,rc
	167	LShortReverse64: // enter to xfer last 64 bytes
	168	bf 26,0f // 64-byte chunk to xfer?
	169	ld w1,-8(rs)
	170	ld w2,-16(rs)
	171	ld w3,-24(rs)
	172	ldu w4,-32(rs)
	173	std w1,-8(rd)
	174	std w2,-16(rd)
	175	std w3,-24(rd)
	176	stdu w4,-32(rd)
	177	0:
	178	bf 27,1f // quadword to move?
	179	ld w1,-8(rs)
	180	ldu w2,-16(rs)
	181	std w1,-8(rd)
	182	stdu w2,-16(rd)
	183	1:
	184	bf 28,2f // doubleword?
	185	ldu w1,-8(rs)
	186	stdu w1,-8(rd)
	187	2:
	188	bf 29,3f // word?
	189	lwzu w1,-4(rs)
	190	stwu w1,-4(rd)
	191	3:
	192	bf 30,4f // halfword to move?
	193	lhzu w1,-2(rs)
	194	sthu w1,-2(rd)
	195	4:
	196	bflr 31 // done if no odd byte
	197	lbz w1,-1(rs) // no update
	198	stb w1,-1(rd)
	199	blr
	200
	201
	202	// Long operands.
	203	// cr1 = blt iff we must move reverse
	204
	205	.align 4
	206	LLong:
	207	dcbtst 0,rd // touch in destination
	208	neg w3,rd // start to compute #bytes to align destination
	209	andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
	210	blt cr1,LLongReverse // handle reverse moves
	211	mtctr w6 // set up for loop to align destination
	212	sub rc,rc,w6 // adjust count
	213	beq LAligned // destination already 8-byte aligned
	214	1:
	215	lbz w1,0(rs)
	216	addi rs,rs,1
	217	stb w1,0(rd)
	218	addi rd,rd,1
	219	bdnz 1b
	220
	221	// Destination is 8-byte aligned.
	222
	223	LAligned:
	224	srwi. w2,rc,6 // w2 <- count of 64-byte chunks
	225	mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
	226	mtcrf 0x01,rc // put length bits 28-31 in cr7
	227	beq LShort64 // no 64-byte chunks
	228	mtctr w2
	229	b 1f
	230
	231	// Loop moving 64-byte chunks.
	232
	233	.align 5
	234	1:
	235	ld w1,0(rs)
	236	ld w2,8(rs)
	237	ld w3,16(rs)
	238	ld w4,24(rs)
	239	ld w5,32(rs)
	240	ld w6,40(rs)
	241	ld w7,48(rs)
	242	ld w8,56(rs)
	243	addi rs,rs,64
	244	std w1,0(rd)
	245	std w2,8(rd)
	246	std w3,16(rd)
	247	std w4,24(rd)
	248	std w5,32(rd)
	249	std w6,40(rd)
	250	std w7,48(rd)
	251	std w8,56(rd)
	252	addi rd,rd,64
	253	bdnz 1b
	254
	255	b LShort64
	256
	257
	258	// Handle reverse moves.
	259
	260	LLongReverse:
	261	add rd,rd,rc // point to end of operands
	262	add rs,rs,rc
	263	andi. r0,rd,7 // is destination 8-byte aligned?
	264	sub rc,rc,r0 // adjust count
	265	mtctr r0 // set up for byte loop
	266	beq LRevAligned // already aligned
	267
	268	1:
	269	lbzu w1,-1(rs)
	270	stbu w1,-1(rd)
	271	bdnz 1b
	272
	273	// Destination is 8-byte aligned.
	274
	275	LRevAligned:
	276	srwi. w2,rc,6 // w2 <- count of 64-byte chunks
	277	mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
	278	mtcrf 0x01,rc // put length bits 28-31 in cr7
	279	beq LShortReverse64 // no 64-byte chunks
	280	mtctr w2
	281	b 1f
	282
	283	// Loop over 64-byte chunks (reverse).
	284
	285	.align 5
	286	1:
	287	ld w1,-8(rs)
	288	ld w2,-16(rs)
	289	ld w3,-24(rs)
	290	ld w4,-32(rs)
	291	ld w5,-40(rs)
	292	ld w6,-48(rs)
	293	ld w7,-56(rs)
	294	ldu w8,-64(rs)
	295	std w1,-8(rd)
	296	std w2,-16(rd)
	297	std w3,-24(rd)
	298	std w4,-32(rd)
	299	std w5,-40(rd)
	300	std w6,-48(rd)
	301	std w7,-56(rd)
	302	stdu w8,-64(rd)
	303	bdnz 1b
	304
	305	b LShortReverse64
	306
	307	COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)