2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
27 #include "proc_reg.h" /* For CACHE_LINE_SIZE */
31 #include <mach/ppc/asm.h>
33 /* #include <assym.h> */
34 #include <ppc/proc_reg.h> /* For CACHE_LINE_SIZE */
39 * Reg 3 - Pointer to data
40 * Reg 4 - Length of data
41 * Reg 5 - Accumulated sum value
42 * Reg 6 - Starting on odd boundary flag (relative to byte 0 of the checksumed data)
45 ENTRY(xsum_assym, TAG_NO_FRAME_USED)
47 mr r11, r6 ; Swapped flag
51 addic r7, r7, 0 ; This clears the carry bit!
52 mr r12, r5 ; Save the passed-in checksum value
55 * Sum bytes before cache line boundary
58 cmpi cr0,0,r4,0 ; Check for length of 0
62 beq Laligned32 ; 32 byte aligned
68 beq Laligned2 ; 2 byte aligned
70 addi r11, 0, 1 ; swap bytes at end
77 cmpi cr0,0,r4,2 ; If remaining length is less than two - go to wrap-up
79 andi. r9, r3, 0x3 ; If aligned on a 4-byte boundary, go to that code
81 lhz r5, 0(r3) ; Load and add a halfword to the checksum
90 Add longwords up to the 32 byte boundary
109 We're aligned on a 32 byte boundary now - add 8 longwords to checksum
110 until the remaining length is less than 32
113 andis. r6, r4, 0xffff
123 dcbt r3, r10 ; Touch one cache-line ahead
128 * This is the main meat of the checksum. I attempted to arrange this code
129 * such that the processor would execute as many instructions as possible
136 dcbt r3, r9 ; Touch two cache lines ahead
161 * Handle whatever bytes are left
166 * Handle leftover bytes
208 * Wrap the longword around, adding the two 16-bit portions
209 * to each other along with any previous and subsequent carries.
212 addze r8, r8 ; Add the carry
213 addze r8, r8 ; Add the carry again (the last add may have carried)
214 andis. r6, r8, 0xffff ; Stuff r6 with the high order 16 bits of sum word
215 srwi r6, r6, 16 ; Shift it to the low order word
216 andi. r8, r8, 0xffff ; Zero out the high order word
217 add r8, r8, r6 ; Add the two halves
219 andis. r6, r8, 0xffff ; Do the above again in case we carried into the
220 srwi r6, r6, 16 ; high order word with the last add.
224 cmpi cr0,0,r11,0 ; Check to see if we need to swap the bytes
228 * Our buffer began on an odd boundary, so we need to swap
229 * the checksum bytes.
231 slwi r8, r3, 8 ; shift byte 0 to byte 1
232 clrlwi r8, r8, 16 ; Clear top 16 bits
233 srwi r3, r3, 8 ; shift byte 1 to byte 0
234 or r3, r8, r3 ; or them
237 add r3, r3, r12 ; Add in the passed-in checksum
238 andis. r6, r3, 0xffff ; Wrap and add any carries into the top 16 bits
243 andis. r6, r3, 0xffff ; Do the above again in case we carried into the
244 srwi r6, r6, 16 ; high order word with the last add.