2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
30 #include "proc_reg.h" /* For CACHE_LINE_SIZE */
34 #include <mach/ppc/asm.h>
36 /* #include <assym.h> */
37 #include <ppc/proc_reg.h> /* For CACHE_LINE_SIZE */
42 * Reg 3 - Pointer to data
43 * Reg 4 - Length of data
44 * Reg 5 - Accumulated sum value
45 * Reg 6 - Starting on odd boundary flag (relative to byte 0 of the checksumed data)
48 ENTRY(xsum_assym, TAG_NO_FRAME_USED)
50 mr r11, r6 ; Swapped flag
54 addic r7, r7, 0 ; This clears the carry bit!
55 mr r12, r5 ; Save the passed-in checksum value
58 * Sum bytes before cache line boundary
61 cmpi cr0,0,r4,0 ; Check for length of 0
65 beq Laligned32 ; 32 byte aligned
71 beq Laligned2 ; 2 byte aligned
73 addi r11, 0, 1 ; swap bytes at end
80 cmpi cr0,0,r4,2 ; If remaining length is less than two - go to wrap-up
82 andi. r9, r3, 0x3 ; If aligned on a 4-byte boundary, go to that code
84 lhz r5, 0(r3) ; Load and add a halfword to the checksum
93 Add longwords up to the 32 byte boundary
112 We're aligned on a 32 byte boundary now - add 8 longwords to checksum
113 until the remaining length is less than 32
116 andis. r6, r4, 0xffff
126 dcbt r3, r10 ; Touch one cache-line ahead
131 * This is the main meat of the checksum. I attempted to arrange this code
132 * such that the processor would execute as many instructions as possible
139 dcbt r3, r9 ; Touch two cache lines ahead
164 * Handle whatever bytes are left
169 * Handle leftover bytes
211 * Wrap the longword around, adding the two 16-bit portions
212 * to each other along with any previous and subsequent carries.
215 addze r8, r8 ; Add the carry
216 addze r8, r8 ; Add the carry again (the last add may have carried)
217 andis. r6, r8, 0xffff ; Stuff r6 with the high order 16 bits of sum word
218 srwi r6, r6, 16 ; Shift it to the low order word
219 andi. r8, r8, 0xffff ; Zero out the high order word
220 add r8, r8, r6 ; Add the two halves
222 andis. r6, r8, 0xffff ; Do the above again in case we carried into the
223 srwi r6, r6, 16 ; high order word with the last add.
227 cmpi cr0,0,r11,0 ; Check to see if we need to swap the bytes
231 * Our buffer began on an odd boundary, so we need to swap
232 * the checksum bytes.
234 slwi r8, r3, 8 ; shift byte 0 to byte 1
235 clrlwi r8, r8, 16 ; Clear top 16 bits
236 srwi r3, r3, 8 ; shift byte 1 to byte 0
237 or r3, r8, r3 ; or them
240 add r3, r3, r12 ; Add in the passed-in checksum
241 andis. r6, r3, 0xffff ; Wrap and add any carries into the top 16 bits
246 andis. r6, r3, 0xffff ; Do the above again in case we carried into the
247 srwi r6, r6, 16 ; high order word with the last add.