]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/memset_g3.s
xnu-792.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / memset_g3.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 #define ASSEMBLER
24 #include <sys/appleapiopts.h>
25 #include <ppc/asm.h>
26 #include <machine/cpu_capabilities.h>
27 #include <machine/commpage.h>
28
29 .text
30 .align 2
31
32 /* *********************
33 * * M E M S E T _ G 3 *
34 * *********************
35 *
36 * This is a subroutine called by Libc memset and _memset_pattern for large nonzero
37 * operands (zero operands are funneled into bzero.) This version is for
38 * 32-bit processors with a 32-byte cache line and no Altivec.
39 *
40 * Registers at entry:
41 * r4 = count of bytes to store (must be >= 32)
42 * r8 = ptr to the 1st byte to store (16-byte aligned)
43 * r9 = ptr to 16-byte pattern to store (16-byte aligned)
44 * When we return:
45 * r3 = not changed, since memset returns it
46 * r4 = bytes remaining to store (will be <32)
47 * r7 = not changed
48 * r8 = ptr to next byte to store (still 16-byte aligned)
49 * r12 = not changed (holds return value for memset)
50 */
51
52 .align 4
53 memset_g3:
54 andi. r0,r8,16 // cache line aligned?
55 lfd f0,0(r9) // pick up the pattern in two FPRs
56 lfd f1,8(r9)
57 beq 1f // skip if already aligned
58
59 // cache line align
60
61 stfd f0,0(r8) // no, store another 16 bytes to align
62 stfd f1,8(r8)
63 subi r4,r4,16 // skip past the 16 bytes we just stored
64 addi r8,r8,16
65
66 // Loop over cache lines. This code uses a private protocol with the kernel:
67 // when the kernel emulates an alignment exception on a DCBZ that occurs in the
68 // commpage, it zeroes CR7. We use this to detect the case where we are operating on
69 // uncached memory, and do not use DCBZ again in this code. We assume that either
70 // all the operand is cacheable or none of it is, so we only check the first DCBZ.
71 1:
72 srwi. r0,r4,6 // get count of 64-byte chunks
73 cmpw cr7,r0,r0 // set cr7_eq (kernel turns off on alignment exception)
74 rlwinm r4,r4,0,0x3F // mask down to residual count (0..63)
75 beq Lleftover // no chunks
76 dcbz 0,r8 // zero first cache line (clearing cr7 if alignment exception)
77 mtctr r0
78 li r6,32 // get an offset for DCBZ
79 beq+ cr7,LDcbzEnter // enter DCBZ loop (we didn't get an alignment exception)
80
81 // Loop over 64-byte chunks without DCBZ.
82 LNoDcbz:
83 stfd f0,0(r8)
84 stfd f1,8(r8)
85 stfd f0,16(r8)
86 stfd f1,24(r8)
87 stfd f0,32(r8)
88 stfd f1,40(r8)
89 stfd f0,48(r8)
90 stfd f1,56(r8)
91 addi r8,r8,64
92 bdnz LNoDcbz
93
94 b Lleftover
95
96 // Loop over 64-byte chunks using DCBZ.
97 LDcbz:
98 dcbz 0,r8
99 LDcbzEnter:
100 dcbz r6,r8
101 stfd f0,0(r8)
102 stfd f1,8(r8)
103 stfd f0,16(r8)
104 stfd f1,24(r8)
105 stfd f0,32(r8)
106 stfd f1,40(r8)
107 stfd f0,48(r8)
108 stfd f1,56(r8)
109 addi r8,r8,64
110 bdnz LDcbz
111
112 // Handle leftovers (0..63 bytes)
113 Lleftover:
114 srwi. r0,r4,4 // get count of 16-byte chunks
115 rlwinm r4,r4,0,0xF // mask down to residuals
116 beqlr // no 16-byte chunks so done
117 mtctr r0
118 2:
119 stfd f0,0(r8)
120 stfd f1,8(r8)
121 addi r8,r8,16
122 bdnz 2b
123
124 blr
125
126 COMMPAGE_DESCRIPTOR(memset_g3,_COMM_PAGE_MEMSET_PATTERN,kCache32,kHasAltivec, \
127 kCommPage32)