[apple/xnu.git] / bsd / netinet / in_cksum.c

/*
 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/*
 * Copyright (c) 1988, 1992, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)in_cksum.c	8.1 (Berkeley) 6/10/93
 */

#include <sys/param.h>
#include <machine/endian.h>
#include <sys/mbuf.h>
#include <kern/debug.h>
#include <net/dlil.h>
#include <netinet/in.h>
#define	_IP_VHL
#include <netinet/ip.h>
#include <netinet/ip_var.h>

/*
 * Checksum routine for Internet Protocol family headers (Portable Version).
 *
 * This routine is very heavily used in the network
 * code and should be modified for each CPU to be as fast as possible.
 */
#define REDUCE16 {							  \
	q_util.q = sum;							  \
	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
	sum = l_util.s[0] + l_util.s[1];				  \
	ADDCARRY(sum);							  \
}

union l_util {
        uint16_t s[2];
        uint32_t l;
};

union q_util {
        uint16_t s[4];
        uint32_t l[2];
        uint64_t q;
};

#define	PREDICT_FALSE(_exp)	__builtin_expect((_exp), 0)

static uint16_t in_cksumdata(const void *buf, int len);

/*
 * Portable version of 16-bit 1's complement sum function that works
 * on a contiguous buffer.  This is used mainly for instances where
 * the caller is certain about the buffer requirements, e.g. for IP
 * header checksum calculation, though it is capable of being used
 * on any arbitrary data span.  The platform-specific cpu_in_cksum()
 * routine might be better-optmized, so use that instead for large
 * data span.
 *
 * The logic is borrowed from <bsd/netinet/cpu_in_cksum.c>
 */

#if ULONG_MAX == 0xffffffffUL
/* 32-bit version */
static uint16_t
in_cksumdata(const void *buf, int mlen)
{
	uint32_t sum, partial;
	unsigned int final_acc;
	uint8_t *data = (void *)buf;
	boolean_t needs_swap, started_on_odd;

	VERIFY(mlen >= 0);

	needs_swap = FALSE;
	started_on_odd = FALSE;

	sum = 0;
	partial = 0;

	if ((uintptr_t)data & 1) {
		/* Align on word boundary */
		started_on_odd = !started_on_odd;
#if BYTE_ORDER == LITTLE_ENDIAN
		partial = *data << 8;
#else
		partial = *data;
#endif
		++data;
		--mlen;
	}
	needs_swap = started_on_odd;
	while (mlen >= 32) {
		__builtin_prefetch(data + 32);
		partial += *(uint16_t *)(void *)data;
		partial += *(uint16_t *)(void *)(data + 2);
		partial += *(uint16_t *)(void *)(data + 4);
		partial += *(uint16_t *)(void *)(data + 6);
		partial += *(uint16_t *)(void *)(data + 8);
		partial += *(uint16_t *)(void *)(data + 10);
		partial += *(uint16_t *)(void *)(data + 12);
		partial += *(uint16_t *)(void *)(data + 14);
		partial += *(uint16_t *)(void *)(data + 16);
		partial += *(uint16_t *)(void *)(data + 18);
		partial += *(uint16_t *)(void *)(data + 20);
		partial += *(uint16_t *)(void *)(data + 22);
		partial += *(uint16_t *)(void *)(data + 24);
		partial += *(uint16_t *)(void *)(data + 26);
		partial += *(uint16_t *)(void *)(data + 28);
		partial += *(uint16_t *)(void *)(data + 30);
		data += 32;
		mlen -= 32;
		if (PREDICT_FALSE(partial & 0xc0000000)) {
			if (needs_swap)
				partial = (partial << 8) +
				    (partial >> 24);
			sum += (partial >> 16);
			sum += (partial & 0xffff);
			partial = 0;
		}
	}
	if (mlen & 16) {
		partial += *(uint16_t *)(void *)data;
		partial += *(uint16_t *)(void *)(data + 2);
		partial += *(uint16_t *)(void *)(data + 4);
		partial += *(uint16_t *)(void *)(data + 6);
		partial += *(uint16_t *)(void *)(data + 8);
		partial += *(uint16_t *)(void *)(data + 10);
		partial += *(uint16_t *)(void *)(data + 12);
		partial += *(uint16_t *)(void *)(data + 14);
		data += 16;
		mlen -= 16;
	}
	/*
	 * mlen is not updated below as the remaining tests
	 * are using bit masks, which are not affected.
	 */
	if (mlen & 8) {
		partial += *(uint16_t *)(void *)data;
		partial += *(uint16_t *)(void *)(data + 2);
		partial += *(uint16_t *)(void *)(data + 4);
		partial += *(uint16_t *)(void *)(data + 6);
		data += 8;
	}
	if (mlen & 4) {
		partial += *(uint16_t *)(void *)data;
		partial += *(uint16_t *)(void *)(data + 2);
		data += 4;
	}
	if (mlen & 2) {
		partial += *(uint16_t *)(void *)data;
		data += 2;
	}
	if (mlen & 1) {
#if BYTE_ORDER == LITTLE_ENDIAN
		partial += *data;
#else
		partial += *data << 8;
#endif
		started_on_odd = !started_on_odd;
	}

	if (needs_swap)
		partial = (partial << 8) + (partial >> 24);
	sum += (partial >> 16) + (partial & 0xffff);
	sum = (sum >> 16) + (sum & 0xffff);

	final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
	final_acc = (final_acc >> 16) + (final_acc & 0xffff);

	return (final_acc);
}

#else
/* 64-bit version */
static uint16_t
in_cksumdata(const void *buf, int mlen)
{
	uint64_t sum, partial;
	unsigned int final_acc;
	uint8_t *data = (void *)buf;
	boolean_t needs_swap, started_on_odd;

	VERIFY(mlen >= 0);

	needs_swap = FALSE;
	started_on_odd = FALSE;

	sum = 0;
	partial = 0;

	if ((uintptr_t)data & 1) {
		/* Align on word boundary */
		started_on_odd = !started_on_odd;
#if BYTE_ORDER == LITTLE_ENDIAN
		partial = *data << 8;
#else
		partial = *data;
#endif
		++data;
		--mlen;
	}
	needs_swap = started_on_odd;
	if ((uintptr_t)data & 2) {
		if (mlen < 2)
			goto trailing_bytes;
		partial += *(uint16_t *)(void *)data;
		data += 2;
		mlen -= 2;
	}
	while (mlen >= 64) {
		__builtin_prefetch(data + 32);
		__builtin_prefetch(data + 64);
		partial += *(uint32_t *)(void *)data;
		partial += *(uint32_t *)(void *)(data + 4);
		partial += *(uint32_t *)(void *)(data + 8);
		partial += *(uint32_t *)(void *)(data + 12);
		partial += *(uint32_t *)(void *)(data + 16);
		partial += *(uint32_t *)(void *)(data + 20);
		partial += *(uint32_t *)(void *)(data + 24);
		partial += *(uint32_t *)(void *)(data + 28);
		partial += *(uint32_t *)(void *)(data + 32);
		partial += *(uint32_t *)(void *)(data + 36);
		partial += *(uint32_t *)(void *)(data + 40);
		partial += *(uint32_t *)(void *)(data + 44);
		partial += *(uint32_t *)(void *)(data + 48);
		partial += *(uint32_t *)(void *)(data + 52);
		partial += *(uint32_t *)(void *)(data + 56);
		partial += *(uint32_t *)(void *)(data + 60);
		data += 64;
		mlen -= 64;
		if (PREDICT_FALSE(partial & (3ULL << 62))) {
			if (needs_swap)
				partial = (partial << 8) +
				    (partial >> 56);
			sum += (partial >> 32);
			sum += (partial & 0xffffffff);
			partial = 0;
		}
	}
	/*
	 * mlen is not updated below as the remaining tests
	 * are using bit masks, which are not affected.
	 */
	if (mlen & 32) {
		partial += *(uint32_t *)(void *)data;
		partial += *(uint32_t *)(void *)(data + 4);
		partial += *(uint32_t *)(void *)(data + 8);
		partial += *(uint32_t *)(void *)(data + 12);
		partial += *(uint32_t *)(void *)(data + 16);
		partial += *(uint32_t *)(void *)(data + 20);
		partial += *(uint32_t *)(void *)(data + 24);
		partial += *(uint32_t *)(void *)(data + 28);
		data += 32;
	}
	if (mlen & 16) {
		partial += *(uint32_t *)(void *)data;
		partial += *(uint32_t *)(void *)(data + 4);
		partial += *(uint32_t *)(void *)(data + 8);
		partial += *(uint32_t *)(void *)(data + 12);
		data += 16;
	}
	if (mlen & 8) {
		partial += *(uint32_t *)(void *)data;
		partial += *(uint32_t *)(void *)(data + 4);
		data += 8;
	}
	if (mlen & 4) {
		partial += *(uint32_t *)(void *)data;
		data += 4;
	}
	if (mlen & 2) {
		partial += *(uint16_t *)(void *)data;
		data += 2;
	}
trailing_bytes:
	if (mlen & 1) {
#if BYTE_ORDER == LITTLE_ENDIAN
		partial += *data;
#else
		partial += *data << 8;
#endif
		started_on_odd = !started_on_odd;
	}

	if (needs_swap)
		partial = (partial << 8) + (partial >> 56);
	sum += (partial >> 32) + (partial & 0xffffffff);
	sum = (sum >> 32) + (sum & 0xffffffff);

	final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
	    ((sum >> 16) & 0xffff) + (sum & 0xffff);
	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
	final_acc = (final_acc >> 16) + (final_acc & 0xffff);

	return (final_acc);
}
#endif /* ULONG_MAX != 0xffffffffUL */

/*
 * Perform 16-bit 1's complement sum on a contiguous span.
 */
uint16_t
b_sum16(const void *buf, int len)
{
	return (in_cksumdata(buf, len));
}

uint16_t inet_cksum_simple(struct mbuf *, int);
/*
 * For the exported _in_cksum symbol in BSDKernel symbol set.
 */
uint16_t
inet_cksum_simple(struct mbuf *m, int len)
{
	return (inet_cksum(m, 0, 0, len));
}

uint16_t
in_addword(uint16_t a, uint16_t b)
{
	uint64_t sum = a + b;

	ADDCARRY(sum);
	return (sum);
}

uint16_t
in_pseudo(uint32_t a, uint32_t b, uint32_t c)
{
        uint64_t sum;
        union q_util q_util;
        union l_util l_util;

        sum = (uint64_t)a + b + c;
        REDUCE16;
        return (sum);
}

uint16_t
in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
{
	uint64_t sum;
	union q_util q_util;
	union l_util l_util;

	sum = a + b + c;
	REDUCE16;
	return (sum);
}

/*
 * May be used on IP header with options.
 */
uint16_t
in_cksum_hdr_opt(const struct ip *ip)
{
	return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff);
}

/*
 * A wrapper around the simple in_cksum_hdr() and the more complicated
 * inet_cksum(); the former is chosen if the IP header is simple,
 * contiguous and 32-bit aligned.  Also does some stats accounting.
 */
uint16_t
ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
{
	struct ip *ip = mtod(m, struct ip *);

	if (out) {
		ipstat.ips_snd_swcsum++;
		ipstat.ips_snd_swcsum_bytes += hlen;
	} else {
		ipstat.ips_rcv_swcsum++;
		ipstat.ips_rcv_swcsum_bytes += hlen;
	}

	if (hlen == sizeof (*ip) &&
	    m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip))
		return (in_cksum_hdr(ip));

	return (inet_cksum(m, 0, 0, hlen));
}

/*
 * m MUST contain at least an IP header, if nxt is specified;
 * nxt is the upper layer protocol number;
 * off is an offset where TCP/UDP/ICMP header starts;
 * len is a total length of a transport segment (e.g. TCP header + TCP payload)
 */
uint16_t
inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
{
	uint32_t sum;

	sum = m_sum16(m, off, len);

	/* include pseudo header checksum? */
	if (nxt != 0) {
		struct ip *ip;
		unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8)));
		uint32_t mlen;

		/*
		 * Sanity check
		 *
		 * Use m_length2() instead of m_length(), as we cannot rely on
		 * the caller setting m_pkthdr.len correctly, if the mbuf is
		 * a M_PKTHDR one.
		 */
		if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) {
			panic("%s: mbuf %p too short (%d) for IPv4 header",
			    __func__, m, mlen);
			/* NOTREACHED */
		}

		/*
		 * In case the IP header is not contiguous, or not 32-bit
		 * aligned, copy it to a local buffer.  Note here that we
		 * expect the data pointer to point to the IP header.
		 */
		if ((sizeof (*ip) > m->m_len) ||
		    !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
			m_copydata(m, 0, sizeof (*ip), (caddr_t)buf);
			ip = (struct ip *)(void *)buf;
		} else {
			ip = (struct ip *)(void *)(m->m_data);
		}

		/* add pseudo header checksum */
		sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
		    htonl(len + nxt));

		/* fold in carry bits */
		ADDCARRY(sum);
	}

	return (~sum & 0xffff);
}
Commit	Line	Data
1c79356b	1	/*
39236c6e	2	* Copyright (c) 2000-2012 Apple Inc. All rights reserved.
5d5c5d0d	3	*
2d21ac55	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b	5	*
2d21ac55 A	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
8f6c56a5	14	*
2d21ac55 A	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5 A	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
8f6c56a5 A	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55 A	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
8f6c56a5	25	*
2d21ac55	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b A	27	*/
	28	/*
	29	* Copyright (c) 1988, 1992, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	*
	32	* Redistribution and use in source and binary forms, with or without
	33	* modification, are permitted provided that the following conditions
	34	* are met:
	35	* 1. Redistributions of source code must retain the above copyright
	36	* notice, this list of conditions and the following disclaimer.
	37	* 2. Redistributions in binary form must reproduce the above copyright
	38	* notice, this list of conditions and the following disclaimer in the
	39	* documentation and/or other materials provided with the distribution.
	40	* 3. All advertising materials mentioning features or use of this software
	41	* must display the following acknowledgement:
	42	* This product includes software developed by the University of
	43	* California, Berkeley and its contributors.
	44	* 4. Neither the name of the University nor the names of its contributors
	45	* may be used to endorse or promote products derived from this software
	46	* without specific prior written permission.
	47	*
	48	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	49	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	50	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	51	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	52	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	53	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	54	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	55	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	56	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	57	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	58	* SUCH DAMAGE.
	59	*
	60	* @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
	61	*/
	62
	63	#include <sys/param.h>
39236c6e	64	#include <machine/endian.h>
1c79356b	65	#include <sys/mbuf.h>
2d21ac55	66	#include <kern/debug.h>
39236c6e	67	#include <net/dlil.h>
2d21ac55	68	#include <netinet/in.h>
39236c6e	69	#define _IP_VHL
2d21ac55	70	#include <netinet/ip.h>
39236c6e	71	#include <netinet/ip_var.h>
1c79356b A	72
	73	/*
	74	* Checksum routine for Internet Protocol family headers (Portable Version).
	75	*
	76	* This routine is very heavily used in the network
	77	* code and should be modified for each CPU to be as fast as possible.
	78	*/
39236c6e A	79	#define REDUCE16 { \
	80	q_util.q = sum; \
	81	l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
	82	sum = l_util.s[0] + l_util.s[1]; \
	83	ADDCARRY(sum); \
	84	}
1c79356b	85
0b4e3aa0	86	union l_util {
39236c6e A	87	uint16_t s[2];
39236c6e A	88	uint32_t l;
0b4e3aa0	89	};
1c79356b	90
0b4e3aa0	91	union q_util {
39236c6e A	92	uint16_t s[4];
	93	uint32_t l[2];
	94	uint64_t q;
2d21ac55	95	};
0b4e3aa0	96
39236c6e A	97	#define PREDICT_FALSE(_exp) __builtin_expect((_exp), 0)
	98
	99	static uint16_t in_cksumdata(const void *buf, int len);
	100
	101	/*
	102	* Portable version of 16-bit 1's complement sum function that works
	103	* on a contiguous buffer. This is used mainly for instances where
	104	* the caller is certain about the buffer requirements, e.g. for IP
	105	* header checksum calculation, though it is capable of being used
	106	* on any arbitrary data span. The platform-specific cpu_in_cksum()
	107	* routine might be better-optmized, so use that instead for large
	108	* data span.
	109	*
	110	* The logic is borrowed from <bsd/netinet/cpu_in_cksum.c>
	111	*/
	112
	113	#if ULONG_MAX == 0xffffffffUL
	114	/* 32-bit version */
	115	static uint16_t
	116	in_cksumdata(const void *buf, int mlen)
	117	{
	118	uint32_t sum, partial;
	119	unsigned int final_acc;
	120	uint8_t data = (void )buf;
	121	boolean_t needs_swap, started_on_odd;
	122
	123	VERIFY(mlen >= 0);
	124
	125	needs_swap = FALSE;
	126	started_on_odd = FALSE;
	127
	128	sum = 0;
	129	partial = 0;
	130
	131	if ((uintptr_t)data & 1) {
	132	/* Align on word boundary */
	133	started_on_odd = !started_on_odd;
	134	#if BYTE_ORDER == LITTLE_ENDIAN
	135	partial = *data << 8;
	136	#else
	137	partial = *data;
	138	#endif
	139	++data;
	140	--mlen;
	141	}
	142	needs_swap = started_on_odd;
	143	while (mlen >= 32) {
	144	__builtin_prefetch(data + 32);
	145	partial += (uint16_t )(void *)data;
	146	partial += (uint16_t )(void *)(data + 2);
	147	partial += (uint16_t )(void *)(data + 4);
	148	partial += (uint16_t )(void *)(data + 6);
	149	partial += (uint16_t )(void *)(data + 8);
	150	partial += (uint16_t )(void *)(data + 10);
	151	partial += (uint16_t )(void *)(data + 12);
	152	partial += (uint16_t )(void *)(data + 14);
	153	partial += (uint16_t )(void *)(data + 16);
	154	partial += (uint16_t )(void *)(data + 18);
	155	partial += (uint16_t )(void *)(data + 20);
	156	partial += (uint16_t )(void *)(data + 22);
	157	partial += (uint16_t )(void *)(data + 24);
	158	partial += (uint16_t )(void *)(data + 26);
	159	partial += (uint16_t )(void *)(data + 28);
	160	partial += (uint16_t )(void *)(data + 30);
161	data += 32;
162	mlen -= 32;
163	if (PREDICT_FALSE(partial & 0xc0000000)) {
164	if (needs_swap)
165	partial = (partial << 8) +
166	(partial >> 24);
167	sum += (partial >> 16);
168	sum += (partial & 0xffff);
169	partial = 0;
170	}
171	}
172	if (mlen & 16) {
173	partial += (uint16_t )(void *)data;
174	partial += (uint16_t )(void *)(data + 2);
175	partial += (uint16_t )(void *)(data + 4);
176	partial += (uint16_t )(void *)(data + 6);
177	partial += (uint16_t )(void *)(data + 8);
178	partial += (uint16_t )(void *)(data + 10);
179	partial += (uint16_t )(void *)(data + 12);
180	partial += (uint16_t )(void *)(data + 14);
181	data += 16;
182	mlen -= 16;
183	}
184	/*
185	* mlen is not updated below as the remaining tests
186	* are using bit masks, which are not affected.
187	*/
188	if (mlen & 8) {
189	partial += (uint16_t )(void *)data;
190	partial += (uint16_t )(void *)(data + 2);
191	partial += (uint16_t )(void *)(data + 4);
192	partial += (uint16_t )(void *)(data + 6);
193	data += 8;
194	}
195	if (mlen & 4) {
196	partial += (uint16_t )(void *)data;
197	partial += (uint16_t )(void *)(data + 2);
198	data += 4;
199	}
200	if (mlen & 2) {
201	partial += (uint16_t )(void *)data;
202	data += 2;
203	}
204	if (mlen & 1) {
205	#if BYTE_ORDER == LITTLE_ENDIAN
206	partial += *data;
207	#else
208	partial += *data << 8;
209	#endif
210	started_on_odd = !started_on_odd;
211	}
0b4e3aa0	212
39236c6e A	213	if (needs_swap)
	214	partial = (partial << 8) + (partial >> 24);
	215	sum += (partial >> 16) + (partial & 0xffff);
	216	sum = (sum >> 16) + (sum & 0xffff);
0b4e3aa0	217
39236c6e A	218	final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
	219	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
	220
	221	return (final_acc);
	222	}
	223
	224	#else
	225	/* 64-bit version */
	226	static uint16_t
	227	in_cksumdata(const void *buf, int mlen)
	228	{
	229	uint64_t sum, partial;
	230	unsigned int final_acc;
	231	uint8_t data = (void )buf;
	232	boolean_t needs_swap, started_on_odd;
	233
	234	VERIFY(mlen >= 0);
	235
	236	needs_swap = FALSE;
	237	started_on_odd = FALSE;
	238
	239	sum = 0;
	240	partial = 0;
	241
	242	if ((uintptr_t)data & 1) {
	243	/* Align on word boundary */
	244	started_on_odd = !started_on_odd;
	245	#if BYTE_ORDER == LITTLE_ENDIAN
	246	partial = *data << 8;
	247	#else
	248	partial = *data;
	249	#endif
	250	++data;
	251	--mlen;
	252	}
	253	needs_swap = started_on_odd;
	254	if ((uintptr_t)data & 2) {
	255	if (mlen < 2)
	256	goto trailing_bytes;
	257	partial += (uint16_t )(void *)data;
	258	data += 2;
	259	mlen -= 2;
	260	}
	261	while (mlen >= 64) {
	262	__builtin_prefetch(data + 32);
	263	__builtin_prefetch(data + 64);
	264	partial += (uint32_t )(void *)data;
	265	partial += (uint32_t )(void *)(data + 4);
	266	partial += (uint32_t )(void *)(data + 8);
	267	partial += (uint32_t )(void *)(data + 12);
	268	partial += (uint32_t )(void *)(data + 16);
	269	partial += (uint32_t )(void *)(data + 20);
	270	partial += (uint32_t )(void *)(data + 24);
	271	partial += (uint32_t )(void *)(data + 28);
	272	partial += (uint32_t )(void *)(data + 32);
	273	partial += (uint32_t )(void *)(data + 36);
	274	partial += (uint32_t )(void *)(data + 40);
	275	partial += (uint32_t )(void *)(data + 44);
	276	partial += (uint32_t )(void *)(data + 48);
	277	partial += (uint32_t )(void *)(data + 52);
	278	partial += (uint32_t )(void *)(data + 56);
	279	partial += (uint32_t )(void *)(data + 60);
	280	data += 64;
	281	mlen -= 64;
282	if (PREDICT_FALSE(partial & (3ULL << 62))) {
283	if (needs_swap)
284	partial = (partial << 8) +
285	(partial >> 56);
286	sum += (partial >> 32);
287	sum += (partial & 0xffffffff);
288	partial = 0;
289	}
290	}
291	/*
292	* mlen is not updated below as the remaining tests
293	* are using bit masks, which are not affected.
294	*/
295	if (mlen & 32) {
296	partial += (uint32_t )(void *)data;
297	partial += (uint32_t )(void *)(data + 4);
298	partial += (uint32_t )(void *)(data + 8);
299	partial += (uint32_t )(void *)(data + 12);
300	partial += (uint32_t )(void *)(data + 16);
301	partial += (uint32_t )(void *)(data + 20);
302	partial += (uint32_t )(void *)(data + 24);
303	partial += (uint32_t )(void *)(data + 28);
304	data += 32;
305	}
306	if (mlen & 16) {
307	partial += (uint32_t )(void *)data;
308	partial += (uint32_t )(void *)(data + 4);
309	partial += (uint32_t )(void *)(data + 8);
310	partial += (uint32_t )(void *)(data + 12);
311	data += 16;
312	}
313	if (mlen & 8) {
314	partial += (uint32_t )(void *)data;
315	partial += (uint32_t )(void *)(data + 4);
316	data += 8;
317	}
318	if (mlen & 4) {
319	partial += (uint32_t )(void *)data;
320	data += 4;
321	}
322	if (mlen & 2) {
323	partial += (uint16_t )(void *)data;
324	data += 2;
325	}
326	trailing_bytes:
327	if (mlen & 1) {
328	#if BYTE_ORDER == LITTLE_ENDIAN
329	partial += *data;
330	#else
331	partial += *data << 8;
332	#endif
333	started_on_odd = !started_on_odd;
334	}
335
336	if (needs_swap)
337	partial = (partial << 8) + (partial >> 56);
338	sum += (partial >> 32) + (partial & 0xffffffff);
339	sum = (sum >> 32) + (sum & 0xffffffff);
340
341	final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
342	((sum >> 16) & 0xffff) + (sum & 0xffff);
343	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
344	final_acc = (final_acc >> 16) + (final_acc & 0xffff);
345
346	return (final_acc);
347	}
348	#endif /* ULONG_MAX != 0xffffffffUL */
0b4e3aa0	349
39236c6e A	350	/*
	351	* Perform 16-bit 1's complement sum on a contiguous span.
	352	*/
	353	uint16_t
	354	b_sum16(const void *buf, int len)
	355	{
	356	return (in_cksumdata(buf, len));
	357	}
2d21ac55	358
39236c6e A	359	uint16_t inet_cksum_simple(struct mbuf *, int);
	360	/*
	361	* For the exported _in_cksum symbol in BSDKernel symbol set.
	362	*/
	363	uint16_t
2d21ac55 A	364	inet_cksum_simple(struct mbuf *m, int len)
	365	{
	366	return (inet_cksum(m, 0, 0, len));
	367	}
1c79356b	368
39236c6e A	369	uint16_t
39236c6e A	370	in_addword(uint16_t a, uint16_t b)
0b4e3aa0	371	{
39236c6e	372	uint64_t sum = a + b;
2d21ac55	373
39236c6e	374	ADDCARRY(sum);
0b4e3aa0 A	375	return (sum);
	376	}
	377
39236c6e A	378	uint16_t
39236c6e A	379	in_pseudo(uint32_t a, uint32_t b, uint32_t c)
0b4e3aa0	380	{
39236c6e	381	uint64_t sum;
0b4e3aa0	382	union q_util q_util;
2d21ac55	383	union l_util l_util;
0b4e3aa0	384
39236c6e	385	sum = (uint64_t)a + b + c;
0b4e3aa0 A	386	REDUCE16;
0b4e3aa0 A	387	return (sum);
0b4e3aa0 A	388	}
0b4e3aa0 A	389
39236c6e A	390	uint16_t
39236c6e A	391	in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
1c79356b	392	{
39236c6e A	393	uint64_t sum;
39236c6e A	394	union q_util q_util;
2d21ac55	395	union l_util l_util;
1c79356b	396
39236c6e A	397	sum = a + b + c;
	398	REDUCE16;
	399	return (sum);
	400	}
1c79356b	401
39236c6e A	402	/*
	403	* May be used on IP header with options.
	404	*/
	405	uint16_t
	406	in_cksum_hdr_opt(const struct ip *ip)
	407	{
	408	return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff);
	409	}
	410
	411	/*
	412	* A wrapper around the simple in_cksum_hdr() and the more complicated
	413	* inet_cksum(); the former is chosen if the IP header is simple,
	414	* contiguous and 32-bit aligned. Also does some stats accounting.
	415	*/
	416	uint16_t
	417	ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
	418	{
	419	struct ip ip = mtod(m, struct ip );
	420
	421	if (out) {
	422	ipstat.ips_snd_swcsum++;
	423	ipstat.ips_snd_swcsum_bytes += hlen;
	424	} else {
	425	ipstat.ips_rcv_swcsum++;
	426	ipstat.ips_rcv_swcsum_bytes += hlen;
0b4e3aa0	427	}
0b4e3aa0	428
39236c6e A	429	if (hlen == sizeof (*ip) &&
	430	m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip))
	431	return (in_cksum_hdr(ip));
0b4e3aa0	432
39236c6e A	433	return (inet_cksum(m, 0, 0, hlen));
39236c6e A	434	}
2d21ac55	435
39236c6e A	436	/*
	437	* m MUST contain at least an IP header, if nxt is specified;
	438	* nxt is the upper layer protocol number;
	439	* off is an offset where TCP/UDP/ICMP header starts;
	440	* len is a total length of a transport segment (e.g. TCP header + TCP payload)
	441	*/
	442	uint16_t
	443	inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
	444	{
	445	uint32_t sum;
0b4e3aa0	446
39236c6e A	447	sum = m_sum16(m, off, len);
	448
	449	/* include pseudo header checksum? */
	450	if (nxt != 0) {
	451	struct ip *ip;
	452	unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8)));
	453	uint32_t mlen;
0b4e3aa0	454
0b4e3aa0	455	/*
39236c6e A	456	* Sanity check
	457	*
	458	* Use m_length2() instead of m_length(), as we cannot rely on
	459	* the caller setting m_pkthdr.len correctly, if the mbuf is
	460	* a M_PKTHDR one.
0b4e3aa0	461	*/
39236c6e A	462	if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) {
	463	panic("%s: mbuf %p too short (%d) for IPv4 header",
	464	__func__, m, mlen);
	465	/* NOTREACHED */
0b4e3aa0	466	}
39236c6e	467
0b4e3aa0	468	/*
39236c6e A	469	* In case the IP header is not contiguous, or not 32-bit
	470	* aligned, copy it to a local buffer. Note here that we
	471	* expect the data pointer to point to the IP header.
0b4e3aa0	472	*/
39236c6e A	473	if ((sizeof (*ip) > m->m_len) \|\|
	474	!IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
	475	m_copydata(m, 0, sizeof (*ip), (caddr_t)buf);
	476	ip = (struct ip )(void )buf;
	477	} else {
	478	ip = (struct ip )(void )(m->m_data);
1c79356b	479	}
39236c6e A	480
	481	/* add pseudo header checksum */
	482	sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
	483	htonl(len + nxt));
	484
	485	/* fold in carry bits */
	486	ADDCARRY(sum);
1c79356b	487	}
39236c6e	488
1c79356b A	489	return (~sum & 0xffff);
1c79356b A	490	}