git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* =======================================
	29	* BCOPY, MEMCPY, and MEMMOVE for Mac OS X
	30	* =======================================
	31	*
	32	* Version of 2/20/2003, for a hypothetic 64-bit processor without Altivec.
	33	* This version might be used bringing up new processors, with known
	34	* Altivec bugs that need to be worked around. It is not particularly well
	35	* optimized.
	36	*
	37	* For 64-bit processors with a 128-byte cache line, running in either
	38	* 32- or 64-bit mode. This is written for 32-bit execution, the kernel
	39	* will translate to 64-bit code when it compiles the 64-bit commpage.
	40	*
	41	* Register usage. Note we use R2, so this code will not run in a PEF/CFM
	42	* environment.
	43	* r0 = "w7" or temp
	44	* r2 = "w8"
	45	* r3 = not used, as memcpy and memmove return 1st parameter as a value
	46	* r4 = source ptr ("rs")
	47	* r5 = count of bytes to move ("rc")
	48	* r6 = "w1"
	49	* r7 = "w2"
	50	* r8 = "w3"
	51	* r9 = "w4"
	52	* r10 = "w5"
	53	* r11 = "w6"
	54	* r12 = destination ptr ("rd")
	55	*/
	56	#define rs r4
	57	#define rd r12
	58	#define rc r5
	59	#define rv r2
	60
	61	#define w1 r6
	62	#define w2 r7
	63	#define w3 r8
	64	#define w4 r9
	65	#define w5 r10
	66	#define w6 r11
	67	#define w7 r0
	68	#define w8 r2
	69
	70	#include <sys/appleapiopts.h>
	71	#include <ppc/asm.h>
	72	#include <machine/cpu_capabilities.h>
	73	#include <machine/commpage.h>
	74
	75	.text
	76
	77	#define kLong 64 // too long for inline loopless code
	78
	79
	80	// Main entry points.
	81
	82	.align 5
	83	bcopy_64: // void bcopy(const void src, void dst, size_t len)
	84	cmplwi rc,kLong // short or long?
	85	sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
	86	cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
	87	mr rd,r4 // start to move registers to canonic spot
	88	mr rs,r3
	89	blt LShort // handle short operands
	90	dcbt 0,r3 // touch in destination
	91	b LLong // join medium/long operand code
	92
	93	// NB: memmove() must be 8 words past bcopy(), to agree with comm page addresses.
	94
	95	.align 5
	96	Lmemcpy_g4: // void* memcpy(void dst, void src, size_t len)
	97	Lmemmove_g4: // void* memmove(void dst, const void src, size_t len)
	98	cmplwi rc,kLong // short or long?
	99	sub w1,r3,r4 // must move in reverse if (rd-rs)<rc
	100	dcbt 0,r4 // touch in the first line of source
	101	cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
	102	mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
	103	bge LLong // handle medium or long operands
	104
	105	// Handle short operands.
	106
	107	LShort:
	108	mtcrf 0x02,rc // put length bits 26-27 in cr6 (faster one cr at a time)
	109	mtcrf 0x01,rc // put length bits 28-31 in cr7
	110	blt cr1,LShortReverse
	111
	112	// Forward short operands. This is the most frequent case, so it is inline.
	113
	114	LShort64: // enter to xfer last 64 bytes
	115	bf 26,0f // 64-byte chunk to xfer?
	116	ld w1,0(rs)
	117	ld w2,8(rs)
	118	ld w3,16(rs)
	119	ld w4,24(rs)
	120	addi rs,rs,32
	121	std w1,0(rd)
	122	std w2,8(rd)
	123	std w3,16(rd)
	124	std w4,24(rd)
	125	addi rd,rd,32
	126	0:
	127	bf 27,1f // quadword to move?
	128	ld w1,0(rs)
	129	ld w2,8(rs)
	130	addi rs,rs,16
	131	std w1,0(rd)
	132	std w2,8(rd)
	133	addi rd,rd,16
	134	1:
	135	bf 28,2f // doubleword?
	136	ld w1,0(rs)
	137	addi rs,rs,8
	138	std w1,0(rd)
	139	addi rd,rd,8
	140	2:
	141	bf 29,3f // word?
	142	lwz w1,0(rs)
	143	addi rs,rs,4
	144	stw w1,0(rd)
	145	addi rd,rd,4
	146	3:
	147	bf 30,4f // halfword to move?
	148	lhz w1,0(rs)
	149	addi rs,rs,2
	150	sth w1,0(rd)
	151	addi rd,rd,2
	152	4:
	153	bflr 31 // skip if no odd byte
	154	lbz w1,0(rs)
	155	stb w1,0(rd)
	156	blr
	157
	158
	159	// Handle short reverse operands.
	160	// cr6 = bits 26-27 of length
	161	// cr7 = bits 28-31 of length
	162
	163	LShortReverse:
	164	add rs,rs,rc // adjust ptrs for reverse move
	165	add rd,rd,rc
	166	LShortReverse64: // enter to xfer last 64 bytes
	167	bf 26,0f // 64-byte chunk to xfer?
	168	ld w1,-8(rs)
	169	ld w2,-16(rs)
	170	ld w3,-24(rs)
	171	ldu w4,-32(rs)
	172	std w1,-8(rd)
	173	std w2,-16(rd)
	174	std w3,-24(rd)
	175	stdu w4,-32(rd)
	176	0:
	177	bf 27,1f // quadword to move?
	178	ld w1,-8(rs)
	179	ldu w2,-16(rs)
	180	std w1,-8(rd)
	181	stdu w2,-16(rd)
	182	1:
	183	bf 28,2f // doubleword?
	184	ldu w1,-8(rs)
	185	stdu w1,-8(rd)
	186	2:
	187	bf 29,3f // word?
	188	lwzu w1,-4(rs)
	189	stwu w1,-4(rd)
	190	3:
	191	bf 30,4f // halfword to move?
	192	lhzu w1,-2(rs)
	193	sthu w1,-2(rd)
	194	4:
	195	bflr 31 // done if no odd byte
	196	lbz w1,-1(rs) // no update
	197	stb w1,-1(rd)
	198	blr
	199
	200
	201	// Long operands.
	202	// cr1 = blt iff we must move reverse
	203
	204	.align 4
	205	LLong:
	206	dcbtst 0,rd // touch in destination
	207	neg w3,rd // start to compute #bytes to align destination
	208	andi. w6,w3,7 // w6 <- #bytes to 8-byte align destination
	209	blt cr1,LLongReverse // handle reverse moves
	210	mtctr w6 // set up for loop to align destination
	211	sub rc,rc,w6 // adjust count
	212	beq LAligned // destination already 8-byte aligned
	213	1:
	214	lbz w1,0(rs)
	215	addi rs,rs,1
	216	stb w1,0(rd)
	217	addi rd,rd,1
	218	bdnz 1b
	219
	220	// Destination is 8-byte aligned.
	221
	222	LAligned:
	223	srwi. w2,rc,6 // w2 <- count of 64-byte chunks
	224	mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
	225	mtcrf 0x01,rc // put length bits 28-31 in cr7
	226	beq LShort64 // no 64-byte chunks
	227	mtctr w2
	228	b 1f
	229
	230	// Loop moving 64-byte chunks.
	231
	232	.align 5
	233	1:
	234	ld w1,0(rs)
	235	ld w2,8(rs)
	236	ld w3,16(rs)
	237	ld w4,24(rs)
	238	ld w5,32(rs)
	239	ld w6,40(rs)
	240	ld w7,48(rs)
	241	ld w8,56(rs)
	242	addi rs,rs,64
	243	std w1,0(rd)
	244	std w2,8(rd)
	245	std w3,16(rd)
	246	std w4,24(rd)
	247	std w5,32(rd)
	248	std w6,40(rd)
	249	std w7,48(rd)
	250	std w8,56(rd)
	251	addi rd,rd,64
	252	bdnz 1b
	253
	254	b LShort64
	255
	256
	257	// Handle reverse moves.
	258
	259	LLongReverse:
	260	add rd,rd,rc // point to end of operands
	261	add rs,rs,rc
	262	andi. r0,rd,7 // is destination 8-byte aligned?
	263	sub rc,rc,r0 // adjust count
	264	mtctr r0 // set up for byte loop
	265	beq LRevAligned // already aligned
	266
	267	1:
	268	lbzu w1,-1(rs)
	269	stbu w1,-1(rd)
	270	bdnz 1b
	271
	272	// Destination is 8-byte aligned.
	273
	274	LRevAligned:
	275	srwi. w2,rc,6 // w2 <- count of 64-byte chunks
	276	mtcrf 0x02,rc // leftover byte count to cr (faster one cr at a time)
	277	mtcrf 0x01,rc // put length bits 28-31 in cr7
	278	beq LShortReverse64 // no 64-byte chunks
	279	mtctr w2
	280	b 1f
	281
	282	// Loop over 64-byte chunks (reverse).
	283
	284	.align 5
	285	1:
	286	ld w1,-8(rs)
	287	ld w2,-16(rs)
	288	ld w3,-24(rs)
	289	ld w4,-32(rs)
	290	ld w5,-40(rs)
	291	ld w6,-48(rs)
	292	ld w7,-56(rs)
	293	ldu w8,-64(rs)
	294	std w1,-8(rd)
	295	std w2,-16(rd)
	296	std w3,-24(rd)
	297	std w4,-32(rd)
	298	std w5,-40(rd)
	299	std w6,-48(rd)
	300	std w7,-56(rd)
	301	stdu w8,-64(rd)
	302	bdnz 1b
	303
	304	b LShortReverse64
	305
	306	COMMPAGE_DESCRIPTOR(bcopy_64,_COMM_PAGE_BCOPY,k64Bit,kHasAltivec,kCommPageBoth+kPort32to64)