]> git.saurik.com Git - apple/xnu.git/blob - osfmk/ppc/commpage/bzero_32.s
xnu-517.tar.gz
[apple/xnu.git] / osfmk / ppc / commpage / bzero_32.s
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25
26 #define ASSEMBLER
27 #include <sys/appleapiopts.h>
28 #include <ppc/asm.h>
29 #include <machine/cpu_capabilities.h>
30 #include <machine/commpage.h>
31
32 .text
33 .align 2
34 .globl EXT(bzero_32)
35
36
37 // *******************
38 // * B Z E R O _ 3 2 *
39 // *******************
40 //
41 // For 32-bit processors with a 32-byte cache line.
42 //
43 // Register use:
44 // r0 = zero
45 // r3 = original ptr, not changed since memset returns it
46 // r4 = count of bytes to set
47 // r9 = working operand ptr
48 // We do not touch r2 and r10-r12, which some callers depend on.
49
50 .align 5
51 bzero_32: // void bzero(void *b, size_t len);
52 cmplwi cr7,r4,32 // too short for DCBZ?
53 li r0,0 // get a 0
54 neg r5,r3 // start to compute #bytes to align
55 mr r9,r3 // make copy of operand ptr (can't change r3)
56 blt cr7,Ltail // length < 32, too short for DCBZ
57
58 // At least 32 bytes long, so compute alignment and #cache blocks.
59
60 andi. r5,r5,0x1F // r5 <- #bytes to 32-byte align
61 sub r4,r4,r5 // adjust length
62 srwi r8,r4,5 // r8 <- #32-byte chunks
63 cmpwi cr1,r8,0 // any chunks?
64 mtctr r8 // set up loop count
65 beq 1f // skip if already 32-byte aligned (r8!=0)
66
67 // 32-byte align. We just store 32 0s, rather than test and use conditional
68 // branches. We've already stored the first few bytes above.
69
70 stw r0,0(r9)
71 stw r0,4(r9)
72 stw r0,8(r9)
73 stw r0,12(r9)
74 stw r0,16(r9)
75 stw r0,20(r9)
76 stw r0,24(r9)
77 stw r0,28(r9)
78 add r9,r9,r5 // now rp is 32-byte aligned
79 beq cr1,Ltail // skip if no 32-byte chunks
80
81 // Loop doing 32-byte version of DCBZ instruction.
82 // NB: we take alignment exceptions on cache-inhibited memory.
83 // The kernel could be changed to zero cr7 when emulating a
84 // dcbz (as it does on 64-bit processors), so we could avoid all
85 // but the first.
86
87 1:
88 andi. r5,r4,0x1F // will there be trailing bytes?
89 b 2f
90 .align 4
91 2:
92 dcbz 0,r9 // zero another 32 bytes
93 addi r9,r9,32
94 bdnz 2b
95
96 beqlr // no trailing bytes
97
98 // Store trailing bytes.
99
100 Ltail:
101 andi. r5,r4,0x10 // test bit 27 separately
102 mtcrf 0x01,r4 // remaining byte count to cr7
103
104 beq 2f // no 16-byte chunks
105 stw r0,0(r9)
106 stw r0,4(r9)
107 stw r0,8(r9)
108 stw r0,12(r9)
109 addi r9,r9,16
110 2:
111 bf 28,4f // 8-byte chunk?
112 stw r0,0(r9)
113 stw r0,4(r9)
114 addi r9,r9,8
115 4:
116 bf 29,5f // word?
117 stw r0,0(r9)
118 addi r9,r9,4
119 5:
120 bf 30,6f // halfword?
121 sth r0,0(r9)
122 addi r9,r9,2
123 6:
124 bflr 31 // byte?
125 stb r0,0(r9)
126 blr
127
128 COMMPAGE_DESCRIPTOR(bzero_32,_COMM_PAGE_BZERO,kCache32,0,0)