]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/bzero_32.s
xnu-792.2.4.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bzero_32.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 #define ASSEMBLER
24 #include <sys/appleapiopts.h>
25 #include <ppc/asm.h>
26 #include <machine/cpu_capabilities.h>
27 #include <machine/commpage.h>
28
29 .text
30 .align 2
31
32
33 // *******************
34 // * B Z E R O _ 3 2 *
35 // *******************
36 //
37 // For 32-bit processors with a 32-byte cache line.
38 //
39 // Register use:
40 // r0 = zero
41 // r3 = original ptr, not changed since memset returns it
42 // r4 = count of bytes to set
43 // r9 = working operand ptr
44 // We do not touch r2 and r10-r12, which some callers depend on.
45
46 .align 5
47 bzero_32: // void bzero(void *b, size_t len);
48 cmplwi cr7,r4,32 // too short for DCBZ?
49 li r0,0 // get a 0
50 neg r5,r3 // start to compute #bytes to align
51 mr r9,r3 // make copy of operand ptr (can't change r3)
52 blt cr7,Ltail // length < 32, too short for DCBZ
53
54 // At least 32 bytes long, so compute alignment and #cache blocks.
55
56 andi. r5,r5,0x1F // r5 <- #bytes to 32-byte align
57 sub r4,r4,r5 // adjust length
58 srwi r8,r4,5 // r8 <- #32-byte chunks
59 cmpwi cr1,r8,0 // any chunks?
60 mtctr r8 // set up loop count
61 beq 1f // skip if already 32-byte aligned (r8!=0)
62
63 // 32-byte align. We just store 32 0s, rather than test and use conditional
64 // branches. We've already stored the first few bytes above.
65
66 stw r0,0(r9)
67 stw r0,4(r9)
68 stw r0,8(r9)
69 stw r0,12(r9)
70 stw r0,16(r9)
71 stw r0,20(r9)
72 stw r0,24(r9)
73 stw r0,28(r9)
74 add r9,r9,r5 // now rp is 32-byte aligned
75 beq cr1,Ltail // skip if no 32-byte chunks
76
77 // Loop doing 32-byte version of DCBZ instruction.
78 // NB: we take alignment exceptions on cache-inhibited memory.
79 // The kernel could be changed to zero cr7 when emulating a
80 // dcbz (as it does on 64-bit processors), so we could avoid all
81 // but the first.
82
83 1:
84 andi. r5,r4,0x1F // will there be trailing bytes?
85 b 2f
86 .align 4
87 2:
88 dcbz 0,r9 // zero another 32 bytes
89 addi r9,r9,32
90 bdnz 2b
91
92 beqlr // no trailing bytes
93
94 // Store trailing bytes.
95
96 Ltail:
97 andi. r5,r4,0x10 // test bit 27 separately
98 mtcrf 0x01,r4 // remaining byte count to cr7
99
100 beq 2f // no 16-byte chunks
101 stw r0,0(r9)
102 stw r0,4(r9)
103 stw r0,8(r9)
104 stw r0,12(r9)
105 addi r9,r9,16
106 2:
107 bf 28,4f // 8-byte chunk?
108 stw r0,0(r9)
109 stw r0,4(r9)
110 addi r9,r9,8
111 4:
112 bf 29,5f // word?
113 stw r0,0(r9)
114 addi r9,r9,4
115 5:
116 bf 30,6f // halfword?
117 sth r0,0(r9)
118 addi r9,r9,2
119 6:
120 bflr 31 // byte?
121 stb r0,0(r9)
122 blr
123
124 COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,kCommPage32)