]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/ppc/xsumas.s
xnu-344.49.tar.gz
[apple/xnu.git] / bsd / dev / ppc / xsumas.s
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 #define STANDALONE 0
26
27 #if STANDALONE
28 #include "asm.h"
29 #include "assym.h"
30 #include "proc_reg.h" /* For CACHE_LINE_SIZE */
31
32 #else
33
34 #include <mach/ppc/asm.h>
35 #if 0
36 /* #include <assym.h> */
37 #include <ppc/proc_reg.h> /* For CACHE_LINE_SIZE */
38 #endif 0
39 #endif
40
41 /*
42 * Reg 3 - Pointer to data
43 * Reg 4 - Length of data
44 * Reg 5 - Accumulated sum value
45 * Reg 6 - Starting on odd boundary flag (relative to byte 0 of the checksumed data)
46 */
47
48 ENTRY(xsum_assym, TAG_NO_FRAME_USED)
49
50 mr r11, r6 ; Swapped flag
51 addi r8, 0, 0
52 addi r10, 0, 0x1f
53 addi r7, 0, 1
54 addic r7, r7, 0 ; This clears the carry bit!
55 mr r12, r5 ; Save the passed-in checksum value
56
57 /*
58 * Sum bytes before cache line boundary
59 */
60
61 cmpi cr0,0,r4,0 ; Check for length of 0
62 beq Lleftovers
63
64 and. r9, r3, r10
65 beq Laligned32 ; 32 byte aligned
66
67 andi. r9, r3, 0x3
68 beq Laligned4
69
70 andi. r9, r3, 0x1
71 beq Laligned2 ; 2 byte aligned
72
73 addi r11, 0, 1 ; swap bytes at end
74 lbz r8, 0(r3)
75 add r3, r3, r7
76 subf. r4, r7, r4
77 beq Ldone
78
79 Laligned2:
80 cmpi cr0,0,r4,2 ; If remaining length is less than two - go to wrap-up
81 blt Lleftovers
82 andi. r9, r3, 0x3 ; If aligned on a 4-byte boundary, go to that code
83 beq Laligned4
84 lhz r5, 0(r3) ; Load and add a halfword to the checksum
85 adde r8, r8, r5
86 slwi r7, r7, 1
87 add r3, r3, r7
88 subf. r4, r7, r4
89 beq Ldone
90
91
92 /*
93 Add longwords up to the 32 byte boundary
94 */
95
96 Laligned4:
97 addi r7, 0, 4
98 Lloop4:
99 cmpi cr0,0,r4,4
100 blt Lleftovers
101 and. r9, r3, r10
102 beq Laligned32
103 lwz r5, 0(r3)
104 adde r8, r8, r5
105 add r3, r3, r7
106 subf. r4, r7, r4
107 bne Lloop4
108 b Ldone
109
110
111 /*
112 We're aligned on a 32 byte boundary now - add 8 longwords to checksum
113 until the remaining length is less than 32
114 */
115 Laligned32:
116 andis. r6, r4, 0xffff
117 bne Lmainloop
118 andi. r6, r4, 0xffe0
119 beq Lleftovers
120
121 Lmainloop:
122 addi r9, 0, 64
123 addi r10, 0, 32
124 cmpi cr0,0,r4,64
125 blt Lnopretouch
126 dcbt r3, r10 ; Touch one cache-line ahead
127 Lnopretouch:
128 lwz r5, 0(r3)
129
130 /*
131 * This is the main meat of the checksum. I attempted to arrange this code
132 * such that the processor would execute as many instructions as possible
133 * in parallel.
134 */
135
136 Lloop:
137 cmpi cr0,0,r4,96
138 blt Lnotouch
139 dcbt r3, r9 ; Touch two cache lines ahead
140 Lnotouch:
141 adde r8, r8, r5
142 lwz r5, 4(r3)
143 lwz r6, 8(r3)
144 lwz r7, 12(r3)
145 adde r8, r8, r5
146 lwz r5, 16(r3)
147 adde r8, r8, r6
148 lwz r6, 20(r3)
149 adde r8, r8, r7
150 lwz r7, 24(r3)
151 adde r8, r8, r5
152 lwz r5, 28(r3)
153 add r3, r3, r10
154 adde r8, r8, r6
155 adde r8, r8, r7
156 adde r8, r8, r5
157 subf r4, r10, r4
158 andi. r6, r4, 0xffe0
159 beq Lleftovers
160 lwz r5, 0(r3)
161 b Lloop
162
163 /*
164 * Handle whatever bytes are left
165 */
166
167 Lleftovers:
168 /*
169 * Handle leftover bytes
170 */
171 cmpi cr0,0,r4,0
172 beq Ldone
173
174 addi r7, 0, 1
175 addi r10, 0, 0x7ffc
176
177 and. r9, r4, r10
178 bne Lfourormore
179 srw r10, r10, r7
180 and. r9, r4, r10
181 bne Ltwoormore
182 b Loneleft
183
184 Lfourormore:
185 addi r10, 0, 4
186
187 Lfourloop:
188 lwz r5, 0(r3)
189 adde r8, r8, r5
190 add r3, r3, r10
191 subf r4, r10, r4
192 andi. r6, r4, 0xfffc
193 bne Lfourloop
194
195 Ltwoormore:
196 andi. r6, r4, 0xfffe
197 beq Loneleft
198 lhz r5, 0(r3)
199 adde r8, r8, r5
200 addi r3, r3, 2
201 subi r4, r4, 2
202
203 Loneleft:
204 cmpi cr0,0,r4,0
205 beq Ldone
206 lbz r5, 0(r3)
207 slwi r5, r5, 8
208 adde r8, r8, r5
209
210 /*
211 * Wrap the longword around, adding the two 16-bit portions
212 * to each other along with any previous and subsequent carries.
213 */
214 Ldone:
215 addze r8, r8 ; Add the carry
216 addze r8, r8 ; Add the carry again (the last add may have carried)
217 andis. r6, r8, 0xffff ; Stuff r6 with the high order 16 bits of sum word
218 srwi r6, r6, 16 ; Shift it to the low order word
219 andi. r8, r8, 0xffff ; Zero out the high order word
220 add r8, r8, r6 ; Add the two halves
221
222 andis. r6, r8, 0xffff ; Do the above again in case we carried into the
223 srwi r6, r6, 16 ; high order word with the last add.
224 andi. r8, r8, 0xffff
225 add r3, r8, r6
226
227 cmpi cr0,0,r11,0 ; Check to see if we need to swap the bytes
228 beq Ldontswap
229
230 /*
231 * Our buffer began on an odd boundary, so we need to swap
232 * the checksum bytes.
233 */
234 slwi r8, r3, 8 ; shift byte 0 to byte 1
235 clrlwi r8, r8, 16 ; Clear top 16 bits
236 srwi r3, r3, 8 ; shift byte 1 to byte 0
237 or r3, r8, r3 ; or them
238
239 Ldontswap:
240 add r3, r3, r12 ; Add in the passed-in checksum
241 andis. r6, r3, 0xffff ; Wrap and add any carries into the top 16 bits
242 srwi r6, r6, 16
243 andi. r3, r3, 0xffff
244 add r3, r3, r6
245
246 andis. r6, r3, 0xffff ; Do the above again in case we carried into the
247 srwi r6, r6, 16 ; high order word with the last add.
248 andi. r3, r3, 0xffff
249 add r3, r3, r6
250 blr
251
252