git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
	11	*
	12	* This Original Code and all software distributed under the License are
	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22	/* =======================================
	23	* BCOPY, MEMCPY, and MEMMOVE for Mac OS X
	24	* =======================================
	25	*
	26	* Version of 2/20/2003, tuned for G3.
	27	*
	28	* Register usage. Note we use R2, so this code will not run in a PEF/CFM
	29	* environment.
	30	*
	31	* r0 = "w7" or temp
	32	* r2 = "w8"
	33	* r3 = not used, as memcpy and memmove return 1st parameter as a value
	34	* r4 = source ptr ("rs")
	35	* r5 = count of bytes to move ("rc")
	36	* r6 = "w1"
	37	* r7 = "w2"
	38	* r8 = "w3"
	39	* r9 = "w4"
	40	* r10 = "w5"
	41	* r11 = "w6"
	42	* r12 = destination ptr ("rd")
	43	* f0-f3 = used for moving 8-byte aligned data
	44	*/
	45	#define rs r4 // NB: we depend on rs==r4 in "lswx" instructions
	46	#define rd r12
	47	#define rc r5
	48
	49	#define w1 r6
	50	#define w2 r7
	51	#define w3 r8
	52	#define w4 r9
	53	#define w5 r10
	54	#define w6 r11
	55	#define w7 r0
	56	#define w8 r2
	57
	58	#define ASSEMBLER
	59	#include <sys/appleapiopts.h>
	60	#include <ppc/asm.h>
	61	#include <machine/cpu_capabilities.h>
	62	#include <machine/commpage.h>
	63
	64	.text
	65
	66
	67	#define kLong 33 // too long for string ops
	68
	69
	70	// Main entry points.
	71
	72	.align 5
	73	bcopy_g3: // void bcopy(const void src, void dst, size_t len)
	74	cmplwi rc,kLong // length > 32 bytes?
	75	sub w1,r4,r3 // must move in reverse if (rd-rs)<rc
	76	mr rd,r4 // start to move source & dest to canonic spot
	77	bge LLong0 // skip if long operand
	78	mtxer rc // set length for string ops
	79	lswx r5,0,r3 // load bytes into r5-r12
	80	stswx r5,0,r4 // store them
	81	blr
	82
	83	// NB: memcpy() and memmove() must follow bcopy() by 32 bytes, for comm page.
	84
	85	.align 5
	86	Lmemcpy_g3: // void* memcpy(void dst, void src, size_t len)
	87	Lmemmove_g3: // void* memmove(void dst, const void src, size_t len)
	88	cmplwi rc,kLong // length > 32 bytes?
	89	sub w1,r3,rs // must move in reverse if (rd-rs)<rc
	90	mr rd,r3 // must leave r3 alone, it is return value for memcpy etc
	91	bge LLong1 // longer than 32 bytes
	92	mtxer rc // set length for string ops
	93	lswx r5,0,r4 // load bytes into r5-r12
	94	stswx r5,0,r3 // store them
	95	blr
	96
	97	// Long operands (more than 32 bytes.)
	98	// w1 = (rd-rs), used to check for alignment
	99
	100	LLong0: // enter from bcopy()
	101	mr rs,r3 // must leave r3 alone (it is return value for memcpy)
	102	LLong1: // enter from memcpy() and memmove()
	103	cmplw cr1,w1,rc // set cr1 blt iff we must move reverse
	104	rlwinm r0,w1,0,0x3 // are operands relatively word-aligned?
	105	neg w2,rd // prepare to align destination
	106	cmpwi cr5,r0,0 // set cr5 beq if relatively word aligned
	107	blt cr1,LLongReverse // handle reverse move
	108	andi. w4,w2,3 // w4 <- #bytes to word align destination
	109	beq cr5,LLongFloat // relatively aligned so use FPRs
	110	sub rc,rc,w4 // adjust count for alignment
	111	srwi r0,rc,5 // get #chunks to xfer (>=1)
	112	rlwinm rc,rc,0,0x1F // mask down to leftover bytes
	113	mtctr r0 // set up loop count
	114	beq 1f // dest already word aligned
	115
	116	// Word align the destination.
	117
	118	mtxer w4 // byte count to xer
	119	cmpwi r0,0 // any chunks to xfer?
	120	lswx w1,0,rs // move w4 bytes to align dest
	121	add rs,rs,w4
	122	stswx w1,0,rd
	123	add rd,rd,w4
	124	beq- 2f // pathologic case, no chunks to xfer
	125
	126	// Forward, unaligned loop.
	127
	128	1:
	129	lwz w1,0(rs)
	130	lwz w2,4(rs)
	131	lwz w3,8(rs)
	132	lwz w4,12(rs)
	133	lwz w5,16(rs)
	134	lwz w6,20(rs)
	135	lwz w7,24(rs)
	136	lwz w8,28(rs)
	137	addi rs,rs,32
	138	stw w1,0(rd)
	139	stw w2,4(rd)
	140	stw w3,8(rd)
	141	stw w4,12(rd)
	142	stw w5,16(rd)
	143	stw w6,20(rd)
	144	stw w7,24(rd)
	145	stw w8,28(rd)
	146	addi rd,rd,32
	147	bdnz 1b
	148	2: // rc = remaining bytes (0-31)
	149	mtxer rc // set up count for string ops
	150	mr r0,rd // move dest ptr out of the way
	151	lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
	152	stswx r5,0,r0 // store them
	153	blr
	154
	155
	156
	157	// Forward, aligned loop. We use FPRs.
	158
	159	LLongFloat:
	160	andi. w4,w2,7 // W4 <- #bytes to doubleword-align destination
	161	sub rc,rc,w4 // adjust count for alignment
	162	srwi r0,rc,5 // number of 32-byte chunks to xfer
	163	rlwinm rc,rc,0,0x1F // mask down to leftover bytes
	164	mtctr r0 // set up loop count
	165	beq 1f // dest already doubleword aligned
	166
	167	// Doubleword align the destination.
	168
	169	mtxer w4 // byte count to xer
	170	cmpwi r0,0 // any chunks to xfer?
	171	lswx w1,0,rs // move w4 bytes to align dest
	172	add rs,rs,w4
	173	stswx w1,0,rd
	174	add rd,rd,w4
	175	beq- 2f // pathologic case, no chunks to xfer
	176	1: // loop over 32-byte chunks
	177	lfd f0,0(rs)
	178	lfd f1,8(rs)
	179	lfd f2,16(rs)
	180	lfd f3,24(rs)
	181	addi rs,rs,32
	182	stfd f0,0(rd)
	183	stfd f1,8(rd)
	184	stfd f2,16(rd)
	185	stfd f3,24(rd)
	186	addi rd,rd,32
	187	bdnz 1b
	188	2: // rc = remaining bytes (0-31)
	189	mtxer rc // set up count for string ops
	190	mr r0,rd // move dest ptr out of the way
	191	lswx r5,0,rs // load xer bytes into r5-r12 (rs==r4)
	192	stswx r5,0,r0 // store them
	193	blr
	194
	195
	196	// Long, reverse moves.
	197	// cr5 = beq if relatively word aligned
	198
	199	LLongReverse:
	200	add rd,rd,rc // point to end of operands + 1
	201	add rs,rs,rc
	202	beq cr5,LReverseFloat // aligned operands so can use FPRs
	203	srwi r0,rc,5 // get chunk count
	204	rlwinm rc,rc,0,0x1F // mask down to leftover bytes
	205	mtctr r0 // set up loop count
	206	mtxer rc // set up for trailing bytes
	207	1:
	208	lwz w1,-4(rs)
	209	lwz w2,-8(rs)
	210	lwz w3,-12(rs)
	211	lwz w4,-16(rs)
	212	stw w1,-4(rd)
	213	lwz w5,-20(rs)
	214	stw w2,-8(rd)
	215	lwz w6,-24(rs)
	216	stw w3,-12(rd)
	217	lwz w7,-28(rs)
	218	stw w4,-16(rd)
	219	lwzu w8,-32(rs)
	220	stw w5,-20(rd)
	221	stw w6,-24(rd)
	222	stw w7,-28(rd)
	223	stwu w8,-32(rd)
	224	bdnz 1b
	225
	226	sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
	227	sub r0,rd,rc // move dest ptr out of way
	228	lswx r5,0,r4 // load xer bytes into r5-r12
	229	stswx r5,0,r0 // store them
	230	blr
	231
	232
	233	// Long, reverse aligned moves. We use FPRs.
	234
	235	LReverseFloat:
	236	andi. w4,rd,7 // W3 <- #bytes to doubleword-align destination
	237	sub rc,rc,w4 // adjust count for alignment
	238	srwi r0,rc,5 // number of 32-byte chunks to xfer
	239	rlwinm rc,rc,0,0x1F // mask down to leftover bytes
	240	mtctr r0 // set up loop count
	241	beq 1f // dest already doubleword aligned
	242
	243	// Doubleword align the destination.
	244
	245	mtxer w4 // byte count to xer
	246	cmpwi r0,0 // any chunks to xfer?
	247	sub rs,rs,w4 // point to 1st bytes to xfer
	248	sub rd,rd,w4
	249	lswx w1,0,rs // move w3 bytes to align dest
	250	stswx w1,0,rd
	251	beq- 2f // pathologic case, no chunks to xfer
	252	1:
	253	lfd f0,-8(rs)
	254	lfd f1,-16(rs)
	255	lfd f2,-24(rs)
	256	lfdu f3,-32(rs)
	257	stfd f0,-8(rd)
	258	stfd f1,-16(rd)
	259	stfd f2,-24(rd)
	260	stfdu f3,-32(rd)
	261	bdnz 1b
	262	2: // rc = remaining bytes (0-31)
	263	mtxer rc // set up count for string ops
	264	sub r4,rs,rc // point to 1st (leftmost) leftover byte (0..31)
	265	sub r0,rd,rc // move dest ptr out of way
	266	lswx r5,0,r4 // load xer bytes into r5-r12
	267	stswx r5,0,r0 // store them
	268	blr
	269
	270	COMMPAGE_DESCRIPTOR(bcopy_g3,_COMM_PAGE_BCOPY,0,k64Bit+kHasAltivec,kCommPage32)