X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/ff6e181ae92fc6f1e89841290f461d1f2f9badd9..d190cdc3f5544636abb56dc1874be391d3e1b148:/bsd/netinet/in_cksum.c diff --git a/bsd/netinet/in_cksum.c b/bsd/netinet/in_cksum.c index ac8b2648c..bc302ae30 100644 --- a/bsd/netinet/in_cksum.c +++ b/bsd/netinet/in_cksum.c @@ -1,14 +1,19 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER @@ -18,7 +23,7 @@ * Please see the License for the specific language governing rights and * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1988, 1992, 1993 @@ -56,10 +61,14 @@ */ #include +#include #include -#include - -#define DBG_FNC_IN_CKSUM NETDBG_CODE(DBG_NETIP, (3 << 8)) +#include +#include +#include +#define _IP_VHL +#include +#include /* * Checksum routine for Internet Protocol family headers (Portable Version). @@ -67,378 +76,415 @@ * This routine is very heavily used in the network * code and should be modified for each CPU to be as fast as possible. */ - -union s_util { - char c[2]; - u_short s; -}; +#define REDUCE16 { \ + q_util.q = sum; \ + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ + sum = l_util.s[0] + l_util.s[1]; \ + ADDCARRY(sum); \ +} union l_util { - u_int16_t s[2]; - u_int32_t l; + uint16_t s[2]; + uint32_t l; }; union q_util { - u_int16_t s[4]; - u_int32_t l[2]; - u_int64_t q; -}; - -#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) - -#define REDUCE32 \ - { \ - q_util.q = sum; \ - sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - } -#define REDUCE16 \ - { \ - q_util.q = sum; \ - l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \ - sum = l_util.s[0] + l_util.s[1]; \ - ADDCARRY(sum); \ - } - -#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} - - -#if defined(ppc) - -__inline unsigned short -in_addword(u_short a, u_short b) -{ - union l_util l_util; - u_int32_t sum = a + b; - REDUCE; - return (sum); -} + uint16_t s[4]; + uint32_t l[2]; + uint64_t q; +}; -__inline unsigned short -in_pseudo(u_int a, u_int b, u_int c) -{ - u_int64_t sum; - union q_util q_util; - union l_util l_util; +#define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0) - sum = (u_int64_t) a + b + c; - REDUCE16; - return (sum); +static uint16_t in_cksumdata(const void *buf, int len); -} +/* + * Portable version of 16-bit 1's complement sum function that works + * on a contiguous buffer. This is used mainly for instances where + * the caller is certain about the buffer requirements, e.g. for IP + * header checksum calculation, though it is capable of being used + * on any arbitrary data span. The platform-specific cpu_in_cksum() + * routine might be better-optmized, so use that instead for large + * data span. + * + * The logic is borrowed from + */ -int -in_cksum(m, len) - register struct mbuf *m; - register int len; +#if ULONG_MAX == 0xffffffffUL +/* 32-bit version */ +static uint16_t +in_cksumdata(const void *buf, int mlen) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int starting_on_odd = 0; - - - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); - - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - mlen = m->m_len; - w = mtod(m, u_short *); - - if (len < mlen) - mlen = len; - - sum = xsum_assym(w, mlen, sum, starting_on_odd); - len -= mlen; - if (mlen & 0x1) - { - if (starting_on_odd) - starting_on_odd = 0; - else - starting_on_odd = 1; + uint32_t sum, partial; + unsigned int final_acc; + const uint8_t *data = (const uint8_t *)buf; + boolean_t needs_swap, started_on_odd; + + VERIFY(mlen >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + + sum = 0; + partial = 0; + + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else + partial = *data; +#endif + ++data; + --mlen; + } + needs_swap = started_on_odd; + while (mlen >= 32) { + __builtin_prefetch(data + 32); + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + partial += *(const uint16_t *)(const void *)(data + 4); + partial += *(const uint16_t *)(const void *)(data + 6); + partial += *(const uint16_t *)(const void *)(data + 8); + partial += *(const uint16_t *)(const void *)(data + 10); + partial += *(const uint16_t *)(const void *)(data + 12); + partial += *(const uint16_t *)(const void *)(data + 14); + partial += *(const uint16_t *)(const void *)(data + 16); + partial += *(const uint16_t *)(const void *)(data + 18); + partial += *(const uint16_t *)(const void *)(data + 20); + partial += *(const uint16_t *)(const void *)(data + 22); + partial += *(const uint16_t *)(const void *)(data + 24); + partial += *(const uint16_t *)(const void *)(data + 26); + partial += *(const uint16_t *)(const void *)(data + 28); + partial += *(const uint16_t *)(const void *)(data + 30); + data += 32; + mlen -= 32; + if (PREDICT_FALSE(partial & 0xc0000000)) { + if (needs_swap) + partial = (partial << 8) + + (partial >> 24); + sum += (partial >> 16); + sum += (partial & 0xffff); + partial = 0; } } + if (mlen & 16) { + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + partial += *(const uint16_t *)(const void *)(data + 4); + partial += *(const uint16_t *)(const void *)(data + 6); + partial += *(const uint16_t *)(const void *)(data + 8); + partial += *(const uint16_t *)(const void *)(data + 10); + partial += *(const uint16_t *)(const void *)(data + 12); + partial += *(const uint16_t *)(const void *)(data + 14); + data += 16; + mlen -= 16; + } + /* + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. + */ + if (mlen & 8) { + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + partial += *(const uint16_t *)(const void *)(data + 4); + partial += *(const uint16_t *)(const void *)(data + 6); + data += 8; + } + if (mlen & 4) { + partial += *(const uint16_t *)(const void *)data; + partial += *(const uint16_t *)(const void *)(data + 2); + data += 4; + } + if (mlen & 2) { + partial += *(const uint16_t *)(const void *)data; + data += 2; + } + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else + partial += *data << 8; +#endif + started_on_odd = !started_on_odd; + } - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); - return (~sum & 0xffff); + if (needs_swap) + partial = (partial << 8) + (partial >> 24); + sum += (partial >> 16) + (partial & 0xffff); + sum = (sum >> 16) + (sum & 0xffff); + + final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + + return (final_acc); } -u_short -in_cksum_skip(m, len, skip) - register struct mbuf *m; - register int len; - register int skip; +#else +/* 64-bit version */ +static uint16_t +in_cksumdata(const void *buf, int mlen) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int starting_on_odd = 0; - - len -= skip; - for (; skip && m; m = m->m_next) { - if (m->m_len > skip) { - mlen = m->m_len - skip; - w = (u_short *)(m->m_data+skip); - goto skip_start; - } else { - skip -= m->m_len; - } - } - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - mlen = m->m_len; - w = mtod(m, u_short *); - -skip_start: - if (len < mlen) - mlen = len; - sum = xsum_assym(w, mlen, sum, starting_on_odd); - len -= mlen; - if (mlen & 0x1) - { - if (starting_on_odd) - starting_on_odd = 0; - else - starting_on_odd = 1; + uint64_t sum, partial; + unsigned int final_acc; + const uint8_t *data = (const uint8_t *)buf; + boolean_t needs_swap, started_on_odd; + + VERIFY(mlen >= 0); + + needs_swap = FALSE; + started_on_odd = FALSE; + + sum = 0; + partial = 0; + + if ((uintptr_t)data & 1) { + /* Align on word boundary */ + started_on_odd = !started_on_odd; +#if BYTE_ORDER == LITTLE_ENDIAN + partial = *data << 8; +#else + partial = *data; +#endif + ++data; + --mlen; + } + needs_swap = started_on_odd; + if ((uintptr_t)data & 2) { + if (mlen < 2) + goto trailing_bytes; + partial += *(const uint16_t *)(const void *)data; + data += 2; + mlen -= 2; + } + while (mlen >= 64) { + __builtin_prefetch(data + 32); + __builtin_prefetch(data + 64); + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + partial += *(const uint32_t *)(const void *)(data + 8); + partial += *(const uint32_t *)(const void *)(data + 12); + partial += *(const uint32_t *)(const void *)(data + 16); + partial += *(const uint32_t *)(const void *)(data + 20); + partial += *(const uint32_t *)(const void *)(data + 24); + partial += *(const uint32_t *)(const void *)(data + 28); + partial += *(const uint32_t *)(const void *)(data + 32); + partial += *(const uint32_t *)(const void *)(data + 36); + partial += *(const uint32_t *)(const void *)(data + 40); + partial += *(const uint32_t *)(const void *)(data + 44); + partial += *(const uint32_t *)(const void *)(data + 48); + partial += *(const uint32_t *)(const void *)(data + 52); + partial += *(const uint32_t *)(const void *)(data + 56); + partial += *(const uint32_t *)(const void *)(data + 60); + data += 64; + mlen -= 64; + if (PREDICT_FALSE(partial & (3ULL << 62))) { + if (needs_swap) + partial = (partial << 8) + + (partial >> 56); + sum += (partial >> 32); + sum += (partial & 0xffffffff); + partial = 0; } } + /* + * mlen is not updated below as the remaining tests + * are using bit masks, which are not affected. + */ + if (mlen & 32) { + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + partial += *(const uint32_t *)(const void *)(data + 8); + partial += *(const uint32_t *)(const void *)(data + 12); + partial += *(const uint32_t *)(const void *)(data + 16); + partial += *(const uint32_t *)(const void *)(data + 20); + partial += *(const uint32_t *)(const void *)(data + 24); + partial += *(const uint32_t *)(const void *)(data + 28); + data += 32; + } + if (mlen & 16) { + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + partial += *(const uint32_t *)(const void *)(data + 8); + partial += *(const uint32_t *)(const void *)(data + 12); + data += 16; + } + if (mlen & 8) { + partial += *(const uint32_t *)(const void *)data; + partial += *(const uint32_t *)(const void *)(data + 4); + data += 8; + } + if (mlen & 4) { + partial += *(const uint32_t *)(const void *)data; + data += 4; + } + if (mlen & 2) { + partial += *(const uint16_t *)(const void *)data; + data += 2; + } +trailing_bytes: + if (mlen & 1) { +#if BYTE_ORDER == LITTLE_ENDIAN + partial += *data; +#else + partial += *data << 8; +#endif + started_on_odd = !started_on_odd; + } - return (~sum & 0xffff); + if (needs_swap) + partial = (partial << 8) + (partial >> 56); + sum += (partial >> 32) + (partial & 0xffffffff); + sum = (sum >> 32) + (sum & 0xffffffff); + + final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) + + ((sum >> 16) & 0xffff) + (sum & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + final_acc = (final_acc >> 16) + (final_acc & 0xffff); + + return (final_acc); } -#else +#endif /* ULONG_MAX != 0xffffffffUL */ -u_short -in_addword(u_short a, u_short b) -{ - union l_util l_util; - u_int32_t sum = a + b; - REDUCE(sum); - return (sum); -} +/* + * Perform 16-bit 1's complement sum on a contiguous span. + */ +uint16_t +b_sum16(const void *buf, int len) +{ + return (in_cksumdata(buf, len)); +} + +uint16_t inet_cksum_simple(struct mbuf *, int); +/* + * For the exported _in_cksum symbol in BSDKernel symbol set. + */ +uint16_t +inet_cksum_simple(struct mbuf *m, int len) +{ + return (inet_cksum(m, 0, 0, len)); +} -u_short -in_pseudo(u_int a, u_int b, u_int c) +uint16_t +in_addword(uint16_t a, uint16_t b) { - u_int64_t sum; + uint64_t sum = a + b; + + ADDCARRY(sum); + return (sum); +} + +uint16_t +in_pseudo(uint32_t a, uint32_t b, uint32_t c) +{ + uint64_t sum; union q_util q_util; - union l_util l_util; + union l_util l_util; - sum = (u_int64_t) a + b + c; + sum = (uint64_t)a + b + c; REDUCE16; return (sum); } +uint16_t +in_pseudo64(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t sum; + union q_util q_util; + union l_util l_util; -int -in_cksum(m, len) - register struct mbuf *m; - register int len; + sum = a + b + c; + REDUCE16; + return (sum); +} + +/* + * May be used on IP header with options. + */ +uint16_t +in_cksum_hdr_opt(const struct ip *ip) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int byte_swapped = 0; - union s_util s_util; - union l_util l_util; - - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); - - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_short *); - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else - mlen = m->m_len; - if (len < mlen) - mlen = len; - len -= mlen; - /* - * Force to even boundary. - */ - if ((1 & (int) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)((char *)w + 1); - mlen--; - byte_swapped = 1; - } - /* - * Unroll the loop to make overhead from - * branches &c small. - */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; - } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("cksum: out of data\n"); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; + return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff); +} + +/* + * A wrapper around the simple in_cksum_hdr() and the more complicated + * inet_cksum(); the former is chosen if the IP header is simple, + * contiguous and 32-bit aligned. Also does some stats accounting. + */ +uint16_t +ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out) +{ + struct ip *ip = mtod(m, struct ip *); + + if (out) { + ipstat.ips_snd_swcsum++; + ipstat.ips_snd_swcsum_bytes += hlen; + } else { + ipstat.ips_rcv_swcsum++; + ipstat.ips_rcv_swcsum_bytes += hlen; } - REDUCE; - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); - return (~sum & 0xffff); + + if (hlen == sizeof (*ip) && + m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip)) + return (in_cksum_hdr(ip)); + + return (inet_cksum(m, 0, 0, hlen)); } -int -in_cksum_skip(m, len, skip) - register struct mbuf *m; - register u_short len; - register u_short skip; +/* + * m MUST contain at least an IP header, if nxt is specified; + * nxt is the upper layer protocol number; + * off is an offset where TCP/UDP/ICMP header starts; + * len is a total length of a transport segment (e.g. TCP header + TCP payload) + */ +uint16_t +inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len) { - register u_short *w; - register int sum = 0; - register int mlen = 0; - int byte_swapped = 0; - union s_util s_util; - union l_util l_util; - - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0); - - len -= skip; - for (; skip && m; m = m->m_next) { - if (m->m_len > skip) { - mlen = m->m_len - skip; - w = (u_short *)(m->m_data+skip); - goto skip_start; - } else { - skip -= m->m_len; - } - } - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_short *); - - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else { - mlen = m->m_len; - } -skip_start: - if (len < mlen) - mlen = len; + uint32_t sum; + + sum = m_sum16(m, off, len); + + /* include pseudo header checksum? */ + if (nxt != 0) { + struct ip *ip; + unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8))); + uint32_t mlen; - len -= mlen; /* - * Force to even boundary. + * Sanity check + * + * Use m_length2() instead of m_length(), as we cannot rely on + * the caller setting m_pkthdr.len correctly, if the mbuf is + * a M_PKTHDR one. */ - if ((1 & (int) w) && (mlen > 0)) { - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)((char *)w + 1); - mlen--; - byte_swapped = 1; + if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) { + panic("%s: mbuf %p too short (%d) for IPv4 header", + __func__, m, mlen); + /* NOTREACHED */ } + /* - * Unroll the loop to make overhead from - * branches &c small. + * In case the IP header is not contiguous, or not 32-bit + * aligned, copy it to a local buffer. Note here that we + * expect the data pointer to point to the IP header. */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; + if ((sizeof (*ip) > m->m_len) || + !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) { + m_copydata(m, 0, sizeof (*ip), (caddr_t)buf); + ip = (struct ip *)(void *)buf; + } else { + ip = (struct ip *)(void *)(m->m_data); } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("cksum: out of data\n"); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; + + /* add pseudo header checksum */ + sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htonl(len + nxt)); + + /* fold in carry bits */ + ADDCARRY(sum); } - REDUCE; - KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0); + return (~sum & 0xffff); } - -#endif