[apple/xnu.git] / osfmk / ppc / commpage / memset_64.s

/*
 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * The contents of this file constitute Original Code as defined in and
 * are subject to the Apple Public Source License Version 1.1 (the
 * "License").  You may not use this file except in compliance with the
 * License.  Please obtain a copy of the License at
 * http://www.apple.com/publicsource and read it before using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */

#define	ASSEMBLER
#include <sys/appleapiopts.h>
#include <ppc/asm.h>
#include <machine/cpu_capabilities.h>
#include <machine/commpage.h>

/*
 * WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
 * to 64-bit mode for use in the 64-bit commpage.  This "port" consists of the following
 * simple transformations:
 *      - all word compares are changed to doubleword
 *      - all "srwi[.]" opcodes are changed to "srdi[.]"                      
 * Nothing else is done.  For this to work, the following rules must be
 * carefully followed:
 *      - do not use carry or overflow
 *      - only use record mode if you are sure the results are mode-invariant
 *        for example, all "andi." and almost all "rlwinm." are fine
 *      - do not use "slwi", "slw", or "srw"
 * An imaginative programmer could break the porting model in other ways, but the above
 * are the most likely problem areas.  It is perhaps surprising how well in practice
 * this simple method works.
 */        

        .text
        .align	2


/* *********************
 * * M E M S E T _ 6 4 *
 * *********************
 *
 * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
 * operands (zero operands are funneled into bzero.)  This version is for a
 * hypothetic processor that is 64-bit but not Altivec.
 * It is not optimized, since it would only be used during bringup.
 *
 * Registers at entry:
 *		r4 = count of bytes to store (must be >= 32)
 *      r8 = ptr to the 1st byte to store (16-byte aligned)
 *      r9 = ptr to 16-byte pattern to store (16-byte aligned)
 * When we return:
 *		r3 = not changed, since memset returns it
 *      r4 = bytes remaining to store (will be <32)
 *      r7 = not changed
 *      r8 = ptr to next byte to store (still 16-byte aligned)
 *     r12 = not changed (holds return value for memset)
 */

memset_64:
        srwi    r0,r4,5                 // get number of 32-byte chunks (>0)
        ld      r10,0(r9)               // load pattern
        ld      r11,8(r9)
        rlwinm  r4,r4,0,0x1F            // mask down count
        mtctr   r0                      // set up loop count
        
        // Loop over 32-byte chunks.
1:
        std     r10,0(r8)
        std     r11,8(r8)
        std     r10,16(r8)
        std     r11,24(r8)
        addi    r8,r8,32
        bdnz++  1b

        blr


	COMMPAGE_DESCRIPTOR(memset_64,_COMM_PAGE_MEMSET_PATTERN,k64Bit,kHasAltivec, \
				kCommPageBoth+kPort32to64)
Commit	Line	Data
91447636 A	1	/*
	2	* Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
	11	*
	12	* This Original Code and all software distributed under the License are
	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22
	23	#define ASSEMBLER
	24	#include <sys/appleapiopts.h>
	25	#include <ppc/asm.h>
	26	#include <machine/cpu_capabilities.h>
	27	#include <machine/commpage.h>
	28
	29	/*
	30	* WARNING: this code is written for 32-bit mode, and ported by the kernel if necessary
	31	* to 64-bit mode for use in the 64-bit commpage. This "port" consists of the following
	32	* simple transformations:
	33	* - all word compares are changed to doubleword
	34	* - all "srwi[.]" opcodes are changed to "srdi[.]"
	35	* Nothing else is done. For this to work, the following rules must be
	36	* carefully followed:
	37	* - do not use carry or overflow
	38	* - only use record mode if you are sure the results are mode-invariant
	39	* for example, all "andi." and almost all "rlwinm." are fine
	40	* - do not use "slwi", "slw", or "srw"
	41	* An imaginative programmer could break the porting model in other ways, but the above
	42	* are the most likely problem areas. It is perhaps surprising how well in practice
	43	* this simple method works.
	44	*/
	45
	46	.text
	47	.align 2
	48
	49
	50	/* *********************
	51	* * M E M S E T _ 6 4 *
	52	* *********************
	53	*
	54	* This is a subroutine called by Libc memset and _memset_pattern for large nonzero
	55	* operands (zero operands are funneled into bzero.) This version is for a
	56	* hypothetic processor that is 64-bit but not Altivec.
	57	* It is not optimized, since it would only be used during bringup.
	58	*
	59	* Registers at entry:
	60	* r4 = count of bytes to store (must be >= 32)
	61	* r8 = ptr to the 1st byte to store (16-byte aligned)
	62	* r9 = ptr to 16-byte pattern to store (16-byte aligned)
	63	* When we return:
	64	* r3 = not changed, since memset returns it
65	* r4 = bytes remaining to store (will be <32)
66	* r7 = not changed
67	* r8 = ptr to next byte to store (still 16-byte aligned)
68	* r12 = not changed (holds return value for memset)
69	*/
70
71	memset_64:
72	srwi r0,r4,5 // get number of 32-byte chunks (>0)
73	ld r10,0(r9) // load pattern
74	ld r11,8(r9)
75	rlwinm r4,r4,0,0x1F // mask down count
76	mtctr r0 // set up loop count
77
78	// Loop over 32-byte chunks.
79	1:
80	std r10,0(r8)
81	std r11,8(r8)
82	std r10,16(r8)
83	std r11,24(r8)
84	addi r8,r8,32
85	bdnz++ 1b
86
87	blr
88
89
90	COMMPAGE_DESCRIPTOR(memset_64,_COMM_PAGE_MEMSET_PATTERN,k64Bit,kHasAltivec, \
91	kCommPageBoth+kPort32to64)