]> git.saurik.com Git - apple/libc.git/blob - ppc/gen/bzero.s
2bd14ee1cd8884ff60b16c8d1c46c9ac3c3288cd
[apple/libc.git] / ppc / gen / bzero.s
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 //
26 // =============================
27 // BZERO and MEMSET FOR Mac OS X
28 // =============================
29 //
30 // We use DCBZ, and therefore are dependent on the cache block size (32.)
31 // Bzero and memset need to be in the same file since they are tightly
32 // coupled, so we can use bzero for memset of 0 without incurring extra
33 // overhead. (The issue is that bzero must preserve r3 for memset.)
34 //
35 // Registers we use:
36 // r3 = original ptr, not changed since memset returns it
37 // r4 = count of bytes to set ("rc")
38 // r11 = working operand ptr ("rp")
39 // r10 = value to set ("rv")
40
41 #define rc r4
42 #define rp r11
43 #define rv r10
44
45 #define __APPLE_API_PRIVATE
46 #include <machine/cpu_capabilities.h>
47
48 #include <architecture/ppc/asm_help.h>
49
50 .text
51 .align 5
52 .globl _bzero
53 .globl _memset
54
55 // *************
56 // * B Z E R O *
57 // *************
58
59 _bzero: // void bzero(void *b, size_t len);
60 ba _COMM_PAGE_BZERO
61
62 // store up to 31 trailing bytes
63 // rv = value to store (in all 4 bytes)
64 // rc = #bytes to store (0..31)
65 Ltail:
66 andi. r5,rc,16 // bit 27 set in length?
67 mtcrf 0x01,rc // low 4 bits of length to cr7
68 beq 1f // test bits of length
69 stw rv,0(rp)
70 stw rv,4(rp)
71 stw rv,8(rp)
72 stw rv,12(rp)
73 addi rp,rp,16
74 1:
75 bf 28,2f
76 stw rv,0(rp)
77 stw rv,4(rp)
78 addi rp,rp,8
79 2:
80 bf 29,3f
81 stw rv,0(rp)
82 addi rp,rp,4
83 3:
84 bf 30,4f
85 sth rv,0(rp)
86 addi rp,rp,2
87 4:
88 bflr 31
89 stb rv,0(rp)
90 blr
91
92
93 // ***************
94 // * M E M S E T *
95 // ***************
96
97 .align 5
98 _memset: // void * memset(void *b, int c, size_t len);
99 andi. rv,r4,0xFF // copy value to working register, test for 0
100 mr rc,r5 // move length to working register
101 cmplwi cr1,r5,32 // length < 32 ?
102 beqa++ _COMM_PAGE_BZERO
103 rlwimi rv,rv,8,16,23 // replicate value to low 2 bytes
104 mr rp,r3 // make working copy of operand ptr
105 rlwimi rv,rv,16,0,15 // value now in all 4 bytes
106 blt cr1,Ltail // length<32, so use common tail routine
107 neg r5,rp // start to compute #bytes to align
108 andi. r6,r5,0x7 // r6 <- #bytes to align on dw
109 beq- Lmemset1 // already aligned
110
111 ; align on 8-byte boundary
112
113 mtcrf 0x01,r6 // move count to cr7 (faster if only 1 cr)
114 sub rc,rc,r6 // adjust length
115 bf 31,1f
116 stb rv,0(rp)
117 addi rp,rp,1
118 1:
119 bf 30,2f
120 sth rv,0(rp)
121 addi rp,rp,2
122 2:
123 bf 29,Lmemset1
124 stw rv,0(rp)
125 addi rp,rp,4
126
127 // loop on 16-byte blocks
128 Lmemset1:
129 stw rv,0(rp) // store first 8 bytes from rv
130 stw rv,4(rp)
131 srwi r5,rc,4 // r5 <- #blocks (>=1)
132 mtcrf 0x01,rc // leftover length to cr7
133 mtctr r5 // set up loop count
134 lfd f0,0(rp) // pick up in a fp register
135 b 2f // enter loop in middle
136 .align 4
137 1: // loop on 16-byte blocks
138 stfd f0,0(rp)
139 2:
140 stfd f0,8(rp)
141 addi rp,rp,16
142 bdnz 1b
143
144 // store up to 16 trailing bytes (count in cr7)
145
146 bf 28,3f
147 stfd f0,0(rp)
148 addi rp,rp,8
149 3:
150 bf 29,4f
151 stw rv,0(rp)
152 addi rp,rp,4
153 4:
154 bf 30,5f
155 sth rv,0(rp)
156 addi rp,rp,2
157 5:
158 bflr 31
159 stb rv,0(rp)
160 blr