X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e5568f75972dfc723778653c11cb6b4dc825716a..eb6b6ca394357805f2bdba989abae309f718b4d8:/bsd/netinet/in_cksum.c diff --git a/bsd/netinet/in_cksum.c b/bsd/netinet/in_cksum.c index 9d349d7b5..b4cd509ff 100644 --- a/bsd/netinet/in_cksum.c +++ b/bsd/netinet/in_cksum.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2017 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1988, 1992, 1993 @@ -55,10 +61,14 @@ */ #include +#include #include -#include - -#define DBG_FNC_IN_CKSUM NETDBG_CODE(DBG_NETIP, (3 << 8)) +#include +#include +#include +#define _IP_VHL +#include +#include /* * Checksum routine for Internet Protocol family headers (Portable Version). @@ -66,378 +76,564 @@ * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ - -union s_util { - char c[2]; - u_short s; -}; +#define REDUCE16 { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ +} union l_util { - u_int16_t s[2]; - u_int32_t l; + uint16_t s[2]; + uint32_t l; }; union q_util { - u_int16_t s[4]; - u_int32_t l[2]; - u_int64_t q; -}; - -#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) - -#define REDUCE32 \ - { \ - q_util.q = sum; \ - sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - } -#define REDUCE16 \ - { \ - q_util.q = sum; \ - l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - sum = l_util.s[0] + l_util.s[1]; \ - ADDCARRY(sum); \ - } - -#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} - - -#if defined(ppc) - -__inline unsigned short -in_addword(u_short a, u_short b) + uint16_t s[4]; + uint32_t l[2]; + uint64_t q; +}; + +extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t); + +/* + * Perform 16-bit 1's complement sum on a contiguous span. + */ +uint16_t +b_sum16(const void *buf, int len) { - union l_util l_util; - u_int32_t sum = a + b; - REDUCE; - return (sum); + return os_cpu_in_cksum(buf, len, 0); } -__inline unsigned short -in_pseudo(u_int a, u_int b, u_int c) +uint16_t inet_cksum_simple(struct mbuf *, int); +/* + * For the exported _in_cksum symbol in BSDKernel symbol set. + */ +uint16_t +inet_cksum_simple(struct mbuf *m, int len) { - u_int64_t sum; - union q_util q_util; - union l_util l_util; + return inet_cksum(m, 0, 0, len); +} - sum = (u_int64_t) a + b + c; - REDUCE16; - return (sum); +uint16_t +in_addword(uint16_t a, uint16_t b) +{ + uint64_t sum = a + b; + ADDCARRY(sum); + return sum; } -int -in_cksum(m, len) - register struct mbuf *m; - register int len; +uint16_t +in_pseudo(uint32_t a, uint32_t b, uint32_t c) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int starting_on_odd = 0; + uint64_t sum; + union q_util q_util; + union l_util l_util; + sum = (uint64_t)a + b + c; + REDUCE16; + return sum; +} - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); +uint16_t +in_pseudo64(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t sum; + union q_util q_util; + union l_util l_util; - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - mlen = m->m_len; - w = mtod(m, u_short *); + sum = a + b + c; + REDUCE16; + return sum; +} - if (len < mlen) - mlen = len; +/* + * May be used on IP header with options. + */ +uint16_t +in_cksum_hdr_opt(const struct ip *ip) +{ + return ~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff; +} - sum = xsum_assym(w, mlen, sum, starting_on_odd); - len -= mlen; - if (mlen & 0x1) - { - if (starting_on_odd) - starting_on_odd = 0; - else - starting_on_odd = 1; - } +/* + * A wrapper around the simple in_cksum_hdr() and the more complicated + * inet_cksum(); the former is chosen if the IP header is simple, + * contiguous and 32-bit aligned. Also does some stats accounting. + */ +uint16_t +ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out) +{ + struct ip *ip = mtod(m, struct ip *); + + if (out) { + ipstat.ips_snd_swcsum++; + ipstat.ips_snd_swcsum_bytes += hlen; + } else { + ipstat.ips_rcv_swcsum++; + ipstat.ips_rcv_swcsum_bytes += hlen; + } + + if (hlen == sizeof(*ip) && + m->m_len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) { + return in_cksum_hdr(ip); } - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); - return (~sum & 0xffff); + return inet_cksum(m, 0, 0, hlen); } -u_short -in_cksum_skip(m, len, skip) - register struct mbuf *m; - register int len; - register int skip; +uint16_t +ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len, + int out) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int starting_on_odd = 0; - - len -= skip; - for (; skip && m; m = m->m_next) { - if (m->m_len > skip) { - mlen = m->m_len - skip; - w = (u_short *)(m->m_data+skip); - goto skip_start; - } else { - skip -= m->m_len; - } - } - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - mlen = m->m_len; - w = mtod(m, u_short *); + const struct ip *ip = buffer; + + if (out) { + ipstat.ips_snd_swcsum++; + ipstat.ips_snd_swcsum_bytes += hlen; + } else { + ipstat.ips_rcv_swcsum++; + ipstat.ips_rcv_swcsum_bytes += hlen; + } -skip_start: - if (len < mlen) - mlen = len; - sum = xsum_assym(w, mlen, sum, starting_on_odd); - len -= mlen; - if (mlen & 0x1) - { - if (starting_on_odd) - starting_on_odd = 0; - else - starting_on_odd = 1; + if (hlen == sizeof(*ip) && + len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) { + return in_cksum_hdr(ip); + } + + return inet_cksum_buffer(buffer, 0, 0, hlen); +} + +/* + * m MUST contain at least an IP header, if nxt is specified; + * nxt is the upper layer protocol number; + * off is an offset where TCP/UDP/ICMP header starts; + * len is a total length of a transport segment (e.g. TCP header + TCP payload) + */ +uint16_t +inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len) +{ + uint32_t sum; + + sum = m_sum16(m, off, len); + + /* include pseudo header checksum? */ + if (nxt != 0) { + struct ip *ip; + unsigned char buf[sizeof((*ip))] __attribute__((aligned(8))); + uint32_t mlen; + + /* + * Sanity check + * + * Use m_length2() instead of m_length(), as we cannot rely on + * the caller setting m_pkthdr.len correctly, if the mbuf is + * a M_PKTHDR one. + */ + if ((mlen = m_length2(m, NULL)) < sizeof(*ip)) { + panic("%s: mbuf %p too short (%d) for IPv4 header", + __func__, m, mlen); + /* NOTREACHED */ } + + /* + * In case the IP header is not contiguous, or not 32-bit + * aligned, copy it to a local buffer. Note here that we + * expect the data pointer to point to the IP header. + */ + if ((sizeof(*ip) > m->m_len) || + !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) { + m_copydata(m, 0, sizeof(*ip), (caddr_t)buf); + ip = (struct ip *)(void *)buf; + } else { + ip = (struct ip *)(void *)(m->m_data); + } + + /* add pseudo header checksum */ + sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl(len + nxt)); + + /* fold in carry bits */ + ADDCARRY(sum); } - return (~sum & 0xffff); + return ~sum & 0xffff; } -#else - -u_short -in_addword(u_short a, u_short b) -{ - union l_util l_util; - u_int32_t sum = a + b; - REDUCE(sum); - return (sum); -} - -u_short -in_pseudo(u_int a, u_int b, u_int c) + +/* + * buffer MUST contain at least an IP header, if nxt is specified; + * nxt is the upper layer protocol number; + * off is an offset where TCP/UDP/ICMP header starts; + * len is a total length of a transport segment (e.g. TCP header + TCP payload) + */ +uint16_t +inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off, + uint32_t len) { - u_int64_t sum; - union q_util q_util; - union l_util l_util; + uint32_t sum; - sum = (u_int64_t) a + b + c; - REDUCE16; - return (sum); + if (off >= len) { + panic("%s: off (%d) >= len (%d)", __func__, off, len); + } + + sum = b_sum16(&((const uint8_t *)buffer)[off], len); + + /* include pseudo header checksum? */ + if (nxt != 0) { + const struct ip *ip; + unsigned char buf[sizeof((*ip))] __attribute__((aligned(8))); + + /* + * In case the IP header is not contiguous, or not 32-bit + * aligned, copy it to a local buffer. Note here that we + * expect the data pointer to point to the IP header. + */ + if (!IP_HDR_ALIGNED_P(buffer)) { + memcpy(buf, buffer, sizeof(*ip)); + ip = (const struct ip *)(const void *)buf; + } else { + ip = (const struct ip *)buffer; + } + + /* add pseudo header checksum */ + sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl(len + nxt)); + + /* fold in carry bits */ + ADDCARRY(sum); + } + + return ~sum & 0xffff; } +#if DEBUG || DEVELOPMENT +#include -int -in_cksum(m, len) - register struct mbuf *m; - register int len; -{ - register u_short *w; - register int sum = 0; - register int mlen = 0; - int byte_swapped = 0; - union s_util s_util; - union l_util l_util; +#define CKSUM_ERR kprintf - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); +/* + * The following routines implement the portable, reference implementation + * of os_cpu_in_cksum_mbuf(). This is currently used only for validating + * the correctness of the platform-specific implementation, at boot time + * in dlil_verify_sum16(). It returns the 32-bit accumulator without doing + * a 1's complement on it. + */ +#if !defined(__LP64__) +/* 32-bit version */ +uint32_t +in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum) +{ + int mlen; + uint32_t sum, partial; + unsigned int final_acc; + uint8_t *data; + boolean_t needs_swap, started_on_odd; + + VERIFY(len >= 0); + VERIFY(off >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + sum = (initial_sum >> 16) + (initial_sum & 0xffff); + + for (;;) { + if (__improbable(m == NULL)) { + CKSUM_ERR("%s: out of data\n", __func__); + return (uint32_t)-1; + } + mlen = m->m_len; + if (mlen > off) { + mlen -= off; + data = mtod(m, uint8_t *) + off; + goto post_initial_offset; + } + off -= mlen; + if (len == 0) { + break; + } + m = m->m_next; + } - for (;m && len; m = m->m_next) { - if (m->m_len == 0) + for (; len > 0; m = m->m_next) { + if (__improbable(m == NULL)) { + CKSUM_ERR("%s: out of data\n", __func__); + return (uint32_t)-1; + } + mlen = m->m_len; + data = mtod(m, uint8_t *); +post_initial_offset: + if (mlen == 0) { continue; - w = mtod(m, u_short *); - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else - mlen = m->m_len; - if (len < mlen) + } + if (mlen > len) { mlen = len; + } len -= mlen; + + partial = 0; + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else /* BYTE_ORDER != LITTLE_ENDIAN */ + partial = *data; +#endif /* BYTE_ORDER != LITTLE_ENDIAN */ + ++data; + --mlen; + } + needs_swap = started_on_odd; + while (mlen >= 32) { + __builtin_prefetch(data + 32); + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + partial += *(uint16_t *)(void *)(data + 8); + partial += *(uint16_t *)(void *)(data + 10); + partial += *(uint16_t *)(void *)(data + 12); + partial += *(uint16_t *)(void *)(data + 14); + partial += *(uint16_t *)(void *)(data + 16); + partial += *(uint16_t *)(void *)(data + 18); + partial += *(uint16_t *)(void *)(data + 20); + partial += *(uint16_t *)(void *)(data + 22); + partial += *(uint16_t *)(void *)(data + 24); + partial += *(uint16_t *)(void *)(data + 26); + partial += *(uint16_t *)(void *)(data + 28); + partial += *(uint16_t *)(void *)(data + 30); + data += 32; + mlen -= 32; + if (__improbable(partial & 0xc0000000)) { + if (needs_swap) { + partial = (partial << 8) + + (partial >> 24); + } + sum += (partial >> 16); + sum += (partial & 0xffff); + partial = 0; + } + } + if (mlen & 16) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + partial += *(uint16_t *)(void *)(data + 8); + partial += *(uint16_t *)(void *)(data + 10); + partial += *(uint16_t *)(void *)(data + 12); + partial += *(uint16_t *)(void *)(data + 14); + data += 16; + mlen -= 16; + } /* - * Force to even boundary. + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. */ - if ((1 & (int) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)((char *)w + 1); - mlen--; - byte_swapped = 1; + if (mlen & 8) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + partial += *(uint16_t *)(void *)(data + 4); + partial += *(uint16_t *)(void *)(data + 6); + data += 8; + } + if (mlen & 4) { + partial += *(uint16_t *)(void *)data; + partial += *(uint16_t *)(void *)(data + 2); + data += 4; + } + if (mlen & 2) { + partial += *(uint16_t *)(void *)data; + data += 2; + } + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else /* BYTE_ORDER != LITTLE_ENDIAN */ + partial += *data << 8; +#endif /* BYTE_ORDER != LITTLE_ENDIAN */ + started_on_odd = !started_on_odd; } + + if (needs_swap) { + partial = (partial << 8) + (partial >> 24); + } + sum += (partial >> 16) + (partial & 0xffff); /* - * Unroll the loop to make overhead from - * branches &c small. + * Reduce sum to allow potential byte swap + * in the next iteration without carry. */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; - } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("cksum: out of data\n"); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; + sum = (sum >> 16) + (sum & 0xffff); } - REDUCE; - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); - return (~sum & 0xffff); + final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + return final_acc & 0xffff; } -int -in_cksum_skip(m, len, skip) - register struct mbuf *m; - register u_short len; - register u_short skip; +#else /* __LP64__ */ +/* 64-bit version */ +uint32_t +in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int byte_swapped = 0; - union s_util s_util; - union l_util l_util; - - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); - - len -= skip; - for (; skip && m; m = m->m_next) { - if (m->m_len > skip) { - mlen = m->m_len - skip; - w = (u_short *)(m->m_data+skip); - goto skip_start; - } else { - skip -= m->m_len; - } - } - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_short *); - - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else { - mlen = m->m_len; + int mlen; + uint64_t sum, partial; + unsigned int final_acc; + uint8_t *data; + boolean_t needs_swap, started_on_odd; + + VERIFY(len >= 0); + VERIFY(off >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + sum = initial_sum; + + for (;;) { + if (__improbable(m == NULL)) { + CKSUM_ERR("%s: out of data\n", __func__); + return (uint32_t)-1; + } + mlen = m->m_len; + if (mlen > off) { + mlen -= off; + data = mtod(m, uint8_t *) + off; + goto post_initial_offset; } -skip_start: - if (len < mlen) - mlen = len; + off -= mlen; + if (len == 0) { + break; + } + m = m->m_next; + } + for (; len > 0; m = m->m_next) { + if (__improbable(m == NULL)) { + CKSUM_ERR("%s: out of data\n", __func__); + return (uint32_t)-1; + } + mlen = m->m_len; + data = mtod(m, uint8_t *); +post_initial_offset: + if (mlen == 0) { + continue; + } + if (mlen > len) { + mlen = len; + } len -= mlen; + + partial = 0; + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else /* BYTE_ORDER != LITTLE_ENDIAN */ + partial = *data; +#endif /* BYTE_ORDER != LITTLE_ENDIAN */ + ++data; + --mlen; + } + needs_swap = started_on_odd; + if ((uintptr_t)data & 2) { + if (mlen < 2) { + goto trailing_bytes; + } + partial += *(uint16_t *)(void *)data; + data += 2; + mlen -= 2; + } + while (mlen >= 64) { + __builtin_prefetch(data + 32); + __builtin_prefetch(data + 64); + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + partial += *(uint32_t *)(void *)(data + 16); + partial += *(uint32_t *)(void *)(data + 20); + partial += *(uint32_t *)(void *)(data + 24); + partial += *(uint32_t *)(void *)(data + 28); + partial += *(uint32_t *)(void *)(data + 32); + partial += *(uint32_t *)(void *)(data + 36); + partial += *(uint32_t *)(void *)(data + 40); + partial += *(uint32_t *)(void *)(data + 44); + partial += *(uint32_t *)(void *)(data + 48); + partial += *(uint32_t *)(void *)(data + 52); + partial += *(uint32_t *)(void *)(data + 56); + partial += *(uint32_t *)(void *)(data + 60); + data += 64; + mlen -= 64; + if (__improbable(partial & (3ULL << 62))) { + if (needs_swap) { + partial = (partial << 8) + + (partial >> 56); + } + sum += (partial >> 32); + sum += (partial & 0xffffffff); + partial = 0; + } + } /* - * Force to even boundary. + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. */ - if ((1 & (int) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)((char *)w + 1); - mlen--; - byte_swapped = 1; + if (mlen & 32) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + partial += *(uint32_t *)(void *)(data + 16); + partial += *(uint32_t *)(void *)(data + 20); + partial += *(uint32_t *)(void *)(data + 24); + partial += *(uint32_t *)(void *)(data + 28); + data += 32; + } + if (mlen & 16) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + partial += *(uint32_t *)(void *)(data + 8); + partial += *(uint32_t *)(void *)(data + 12); + data += 16; + } + if (mlen & 8) { + partial += *(uint32_t *)(void *)data; + partial += *(uint32_t *)(void *)(data + 4); + data += 8; + } + if (mlen & 4) { + partial += *(uint32_t *)(void *)data; + data += 4; } + if (mlen & 2) { + partial += *(uint16_t *)(void *)data; + data += 2; + } +trailing_bytes: + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else /* BYTE_ORDER != LITTLE_ENDIAN */ + partial += *data << 8; +#endif /* BYTE_ORDER != LITTLE_ENDIAN */ + started_on_odd = !started_on_odd; + } + + if (needs_swap) { + partial = (partial << 8) + (partial >> 56); + } + sum += (partial >> 32) + (partial & 0xffffffff); /* - * Unroll the loop to make overhead from - * branches &c small. + * Reduce sum to allow potential byte swap + * in the next iteration without carry. */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; - } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("cksum: out of data\n"); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; + sum = (sum >> 32) + (sum & 0xffffffff); } - REDUCE; - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); - return (~sum & 0xffff); + final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + + ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + return final_acc & 0xffff; } - -#endif +#endif /* __LP64 */ +#endif /* DEBUG || DEVELOPMENT */