2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/appleapiopts.h>
31 #include <machine/cpu_capabilities.h>
32 #include <machine/commpage.h>
37 /* *********************
38 * * M E M S E T _ G 3 *
39 * *********************
41 * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
42 * operands (zero operands are funneled into bzero.) This version is for
43 * 32-bit processors with a 32-byte cache line and no Altivec.
46 * r4 = count of bytes to store (must be >= 32)
47 * r8 = ptr to the 1st byte to store (16-byte aligned)
48 * r9 = ptr to 16-byte pattern to store (16-byte aligned)
50 * r3 = not changed, since memset returns it
51 * r4 = bytes remaining to store (will be <32)
53 * r8 = ptr to next byte to store (still 16-byte aligned)
54 * r12 = not changed (holds return value for memset)
59 andi. r0,r8,16 // cache line aligned?
60 lfd f0,0(r9) // pick up the pattern in two FPRs
62 beq 1f // skip if already aligned
66 stfd f0,0(r8) // no, store another 16 bytes to align
68 subi r4,r4,16 // skip past the 16 bytes we just stored
71 // Loop over cache lines. This code uses a private protocol with the kernel:
72 // when the kernel emulates an alignment exception on a DCBZ that occurs in the
73 // commpage, it zeroes CR7. We use this to detect the case where we are operating on
74 // uncached memory, and do not use DCBZ again in this code. We assume that either
75 // all the operand is cacheable or none of it is, so we only check the first DCBZ.
77 srwi. r0,r4,6 // get count of 64-byte chunks
78 cmpw cr7,r0,r0 // set cr7_eq (kernel turns off on alignment exception)
79 rlwinm r4,r4,0,0x3F // mask down to residual count (0..63)
80 beq Lleftover // no chunks
81 dcbz 0,r8 // zero first cache line (clearing cr7 if alignment exception)
83 li r6,32 // get an offset for DCBZ
84 beq+ cr7,LDcbzEnter // enter DCBZ loop (we didn't get an alignment exception)
86 // Loop over 64-byte chunks without DCBZ.
101 // Loop over 64-byte chunks using DCBZ.
117 // Handle leftovers (0..63 bytes)
119 srwi. r0,r4,4 // get count of 16-byte chunks
120 rlwinm r4,r4,0,0xF // mask down to residuals
121 beqlr // no 16-byte chunks so done
131 COMMPAGE_DESCRIPTOR(memset_g3,_COMM_PAGE_MEMSET_PATTERN,kCache32,kHasAltivec, \