]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/memset_g3.s
xnu-1228.12.14.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / memset_g3.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/appleapiopts.h>
30 #include <ppc/asm.h>
31 #include <machine/cpu_capabilities.h>
32 #include <machine/commpage.h>
33
34 .text
35 .align 2
36
37 /* *********************
38 * * M E M S E T _ G 3 *
39 * *********************
40 *
41 * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
42 * operands (zero operands are funneled into bzero.) This version is for
43 * 32-bit processors with a 32-byte cache line and no Altivec.
44 *
45 * Registers at entry:
46 * r4 = count of bytes to store (must be >= 32)
47 * r8 = ptr to the 1st byte to store (16-byte aligned)
48 * r9 = ptr to 16-byte pattern to store (16-byte aligned)
49 * When we return:
50 * r3 = not changed, since memset returns it
51 * r4 = bytes remaining to store (will be <32)
52 * r7 = not changed
53 * r8 = ptr to next byte to store (still 16-byte aligned)
54 * r12 = not changed (holds return value for memset)
55 */
56
57 .align 4
58 memset_g3:
59 andi. r0,r8,16 // cache line aligned?
60 lfd f0,0(r9) // pick up the pattern in two FPRs
61 lfd f1,8(r9)
62 beq 1f // skip if already aligned
63
64 // cache line align
65
66 stfd f0,0(r8) // no, store another 16 bytes to align
67 stfd f1,8(r8)
68 subi r4,r4,16 // skip past the 16 bytes we just stored
69 addi r8,r8,16
70
71 // Loop over cache lines. This code uses a private protocol with the kernel:
72 // when the kernel emulates an alignment exception on a DCBZ that occurs in the
73 // commpage, it zeroes CR7. We use this to detect the case where we are operating on
74 // uncached memory, and do not use DCBZ again in this code. We assume that either
75 // all the operand is cacheable or none of it is, so we only check the first DCBZ.
76 1:
77 srwi. r0,r4,6 // get count of 64-byte chunks
78 cmpw cr7,r0,r0 // set cr7_eq (kernel turns off on alignment exception)
79 rlwinm r4,r4,0,0x3F // mask down to residual count (0..63)
80 beq Lleftover // no chunks
81 dcbz 0,r8 // zero first cache line (clearing cr7 if alignment exception)
82 mtctr r0
83 li r6,32 // get an offset for DCBZ
84 beq+ cr7,LDcbzEnter // enter DCBZ loop (we didn't get an alignment exception)
85
86 // Loop over 64-byte chunks without DCBZ.
87 LNoDcbz:
88 stfd f0,0(r8)
89 stfd f1,8(r8)
90 stfd f0,16(r8)
91 stfd f1,24(r8)
92 stfd f0,32(r8)
93 stfd f1,40(r8)
94 stfd f0,48(r8)
95 stfd f1,56(r8)
96 addi r8,r8,64
97 bdnz LNoDcbz
98
99 b Lleftover
100
101 // Loop over 64-byte chunks using DCBZ.
102 LDcbz:
103 dcbz 0,r8
104 LDcbzEnter:
105 dcbz r6,r8
106 stfd f0,0(r8)
107 stfd f1,8(r8)
108 stfd f0,16(r8)
109 stfd f1,24(r8)
110 stfd f0,32(r8)
111 stfd f1,40(r8)
112 stfd f0,48(r8)
113 stfd f1,56(r8)
114 addi r8,r8,64
115 bdnz LDcbz
116
117 // Handle leftovers (0..63 bytes)
118 Lleftover:
119 srwi. r0,r4,4 // get count of 16-byte chunks
120 rlwinm r4,r4,0,0xF // mask down to residuals
121 beqlr // no 16-byte chunks so done
122 mtctr r0
123 2:
124 stfd f0,0(r8)
125 stfd f1,8(r8)
126 addi r8,r8,16
127 bdnz 2b
128
129 blr
130
131 COMMPAGE_DESCRIPTOR(memset_g3,_COMM_PAGE_MEMSET_PATTERN,kCache32,kHasAltivec, \
132 kCommPage32)