X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..a39ff7e25e19b3a8c3020042a3872ca9ec9659f1:/bsd/netinet6/frag6.c

diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c
index b6b68b920..5bdb1adf3 100644
--- a/bsd/netinet6/frag6.c
+++ b/bsd/netinet6/frag6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -61,6 +61,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
+#include <sys/mcache.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
@@ -78,51 +79,166 @@
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
+#include <netinet/ip_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
 
 #include <net/net_osdep.h>
+#include <dev/random/randomdev.h>
 
 /*
  * Define it to get a correct behavior on per-interface statistics.
- * You will need to perform an extra routing table lookup, per fragment,
- * to do it.  This may, or may not be, a performance hit.
  */
 #define IN6_IFSTAT_STRICT
 
+MBUFQ_HEAD(fq6_head);
+
+static void frag6_save_context(struct mbuf *, int);
+static void frag6_scrub_context(struct mbuf *);
+static int frag6_restore_context(struct mbuf *);
+
+static void frag6_icmp6_paramprob_error(struct fq6_head *);
+static void frag6_icmp6_timeex_error(struct fq6_head *);
+
 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
 static void frag6_deq(struct ip6asfrag *);
 static void frag6_insque(struct ip6q *, struct ip6q *);
 static void frag6_remque(struct ip6q *);
-static void frag6_freef(struct ip6q *);
+static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *);
+
+static int frag6_timeout_run;		/* frag6 timer is scheduled to run */
+static void frag6_timeout(void *);
+static void frag6_sched_timeout(void);
+
+static struct ip6q *ip6q_alloc(int);
+static void ip6q_free(struct ip6q *);
+static void ip6q_updateparams(void);
+static struct ip6asfrag *ip6af_alloc(int);
+static void ip6af_free(struct ip6asfrag *);
+
+decl_lck_mtx_data(static, ip6qlock);
+static lck_attr_t	*ip6qlock_attr;
+static lck_grp_t	*ip6qlock_grp;
+static lck_grp_attr_t	*ip6qlock_grp_attr;
+
+/* IPv6 fragment reassembly queues (protected by ip6qlock) */
+static struct ip6q ip6q;		/* ip6 reassembly queues */
+static int ip6_maxfragpackets;		/* max packets in reass queues */
+static u_int32_t frag6_nfragpackets;	/* # of packets in reass queues */
+static int ip6_maxfrags;		/* max fragments in reass queues */
+static u_int32_t frag6_nfrags;		/* # of fragments in reass queues */
+static u_int32_t ip6q_limit;		/* ip6q allocation limit */
+static u_int32_t ip6q_count;		/* current # of allocated ip6q's */
+static u_int32_t ip6af_limit;		/* ip6asfrag allocation limit */
+static u_int32_t ip6af_count;		/* current # of allocated ip6asfrag's */
+
+static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS;
+static int sysctl_maxfrags SYSCTL_HANDLER_ARGS;
+
+SYSCTL_DECL(_net_inet6_ip6);
+
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0,
+    sysctl_maxfragpackets, "I",
+    "Maximum number of IPv6 fragment reassembly queue entries");
+
+SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets,
+    CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0,
+    "Current number of IPv6 fragment reassembly queue entries");
+
+SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0,
+    sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed");
 
-/* XXX we eventually need splreass6, or some real semaphore */
-int frag6_doing_reass;
-u_int frag6_nfragpackets;
-static u_int frag6_nfrags;
-struct	ip6q ip6q;	/* ip6 reassemble queue */
-
-
-extern lck_mtx_t *inet6_domain_mutex;
 /*
  * Initialise reassembly queue and fragment identifier.
  */
 void
-frag6_init()
+frag6_init(void)
 {
-	struct timeval tv;
+	/* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */
+	_CASSERT(sizeof (struct ip6q) <= _MLEN);
+	/* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */
+	_CASSERT(sizeof (struct ip6asfrag) <= _MLEN);
+
+	/* IPv6 fragment reassembly queue lock */
+	ip6qlock_grp_attr  = lck_grp_attr_alloc_init();
+	ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr);
+	ip6qlock_attr = lck_attr_alloc_init();
+	lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr);
+
+	lck_mtx_lock(&ip6qlock);
+	/* Initialize IPv6 reassembly queue. */
+	ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
 
+	/* same limits as IPv4 */
 	ip6_maxfragpackets = nmbclusters / 32;
-	ip6_maxfrags = nmbclusters / 4;
+	ip6_maxfrags = ip6_maxfragpackets * 2;
+	ip6q_updateparams();
+	lck_mtx_unlock(&ip6qlock);
+}
 
-	/*
-	 * in many cases, random() here does NOT return random number
-	 * as initialization during bootstrap time occur in fixed order.
-	 */
-	microtime(&tv);
-	ip6_id = random() ^ tv.tv_usec;
-	ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
+static void
+frag6_save_context(struct mbuf *m, int val)
+{
+	m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val;
+}
+
+static void
+frag6_scrub_context(struct mbuf *m)
+{
+	m->m_pkthdr.pkt_hdr = NULL;
+}
+
+static int
+frag6_restore_context(struct mbuf *m)
+{
+	return ((int)m->m_pkthdr.pkt_hdr);
+}
+
+/*
+ * Send any deferred ICMP param problem error messages; caller must not be
+ * holding ip6qlock and is expected to have saved the per-packet parameter
+ * value via frag6_save_context().
+ */
+static void
+frag6_icmp6_paramprob_error(struct fq6_head *diq6)
+{
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
+
+	if (!MBUFQ_EMPTY(diq6)) {
+		struct mbuf *merr, *merr_tmp;
+		int param;
+		MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) {
+			MBUFQ_REMOVE(diq6, merr);
+			MBUFQ_NEXT(merr) = NULL;
+			param = frag6_restore_context(merr);
+			frag6_scrub_context(merr);
+			icmp6_error(merr, ICMP6_PARAM_PROB,
+			    ICMP6_PARAMPROB_HEADER, param);
+		}
+	}
+}
+
+/*
+ * Send any deferred ICMP time exceeded error messages;
+ * caller must not be holding ip6qlock.
+ */
+static void
+frag6_icmp6_timeex_error(struct fq6_head *diq6)
+{
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
+
+	if (!MBUFQ_EMPTY(diq6)) {
+		struct mbuf *m, *m_tmp;
+		MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) {
+			MBUFQ_REMOVE(diq6, m);
+			MBUFQ_NEXT(m) = NULL;
+			icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
+			    ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0);
+		}
+	}
 }
 
 /*
@@ -156,8 +272,6 @@ frag6_init()
  */
 /*
  * Fragment input
- * NOTE: this function is called with the inet6_domain_mutex held from ip6_input.
- * 	 inet6_domain_mutex is protecting he frag6 queue manipulation.
  */
 int
 frag6_input(struct mbuf **mp, int *offp, int proto)
@@ -171,58 +285,48 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 	int offset = *offp, nxt, i, next;
 	int first_frag = 0;
 	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
-	struct ifnet *dstifp;
-	struct ifaddr *ifa = NULL;
+	struct ifnet *dstifp = NULL;
 	u_int8_t ecn, ecn0;
+	uint32_t csum, csum_flags;
+	struct fq6_head diq6;
+	int locked = 0;
 
-#ifdef IN6_IFSTAT_STRICT
-	struct route_in6 ro;
-	struct sockaddr_in6 *dst;
-#endif
+	VERIFY(m->m_flags & M_PKTHDR);
+
+	MBUFQ_INIT(&diq6);	/* for deferred ICMP param problem errors */
+
+	/* Expect 32-bit aligned data pointer on strict-align platforms */
+	MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 
 	ip6 = mtod(m, struct ip6_hdr *);
-#ifndef PULLDOWN_TEST
-	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), return IPPROTO_DONE);
+	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done);
 	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
-#else
-	IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
-	if (ip6f == NULL)
-		return IPPROTO_DONE;
-#endif
 
-	dstifp = NULL;
 #ifdef IN6_IFSTAT_STRICT
 	/* find the destination interface of the packet. */
-	bzero(&ro, sizeof (ro));
-	dst = (struct sockaddr_in6 *)&ro.ro_dst;
-	dst->sin6_family = AF_INET6;
-	dst->sin6_len = sizeof (struct sockaddr_in6);
-	dst->sin6_addr = ip6->ip6_dst;
-
-	rtalloc((struct route *)&ro);
-	if (ro.ro_rt != NULL) {
-		RT_LOCK(ro.ro_rt);
-		if ((ifa = ro.ro_rt->rt_ifa) != NULL) {
-			IFA_ADDREF(ifa);
-			dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp;
+	if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) {
+		uint32_t idx;
+
+		if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) {
+			if (idx > 0 && idx <= if_index) {
+				ifnet_head_lock_shared();
+				dstifp = ifindex2ifnet[idx];
+				ifnet_head_done();
+			}
 		}
-		RT_UNLOCK(ro.ro_rt);
-		rtfree(ro.ro_rt);
-		ro.ro_rt = NULL;
 	}
-#else
-	/* we are violating the spec, this is not the destination interface */
-	if ((m->m_flags & M_PKTHDR) != 0)
+#endif /* IN6_IFSTAT_STRICT */
+
+	/* we are violating the spec, this may not be the dst interface */
+	if (dstifp == NULL)
 		dstifp = m->m_pkthdr.rcvif;
-#endif
 
 	/* jumbo payload can't contain a fragment header */
 	if (ip6->ip6_plen == 0) {
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
 		in6_ifstat_inc(dstifp, ifs6_reass_fail);
-		if (ifa != NULL)
-			IFA_REMREF(ifa);
-		return IPPROTO_DONE;
+		m = NULL;
+		goto done;
 	}
 
 	/*
@@ -233,32 +337,118 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 	 */
 	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
 	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
-		icmp6_error(m, ICMP6_PARAM_PROB,
-			    ICMP6_PARAMPROB_HEADER,
-			    offsetof(struct ip6_hdr, ip6_plen));
+		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
+		    offsetof(struct ip6_hdr, ip6_plen));
 		in6_ifstat_inc(dstifp, ifs6_reass_fail);
-		if (ifa != NULL)
-			IFA_REMREF(ifa);
-		return IPPROTO_DONE;
+		m = NULL;
+		goto done;
+	}
+
+	/* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */
+	if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) {
+		ip6stat.ip6s_fragments++;
+		ip6stat.ip6s_fragdropped++;
+		in6_ifstat_inc(dstifp, ifs6_reass_fail);
+		m_freem(m);
+		m = NULL;
+		goto done;
 	}
 
-	ip6stat.ip6s_fragments++;
-	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
-	
 	/* offset now points to data portion */
 	offset += sizeof(struct ip6_frag);
 
-	frag6_doing_reass = 1;
+	/*
+	 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
+	 * upfront, unrelated to any reassembly.  Just skip the fragment header.
+	 */
+	if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
+		/*
+		 * In ICMPv6 processing, we drop certain
+		 * NDP messages that are not expected to
+		 * have fragment header based on recommendations
+		 * against security vulnerability as described in
+		 * RFC 6980.
+		 * We set PKTF_REASSEMBLED flag to let ICMPv6 NDP
+		 * drop such packets.
+		 * However there are already devices running software
+		 * that are creating interface with MTU < IPv6 Min
+		 * MTU. We should not have allowed that but they are
+		 * out, and sending atomic NDP fragments.
+		 * For that reason, we do not set the same flag here
+		 * and relax the check.
+		 */
+		ip6stat.ip6s_atmfrag_rcvd++;
+		in6_ifstat_inc(dstifp, ifs6_atmfrag_rcvd);
+		*offp = offset;
+		return (ip6f->ip6f_nxt);
+	}
 
 	/*
-	 * Enforce upper bound on number of fragments.
-	 * If maxfrag is 0, never accept fragments.
-	 * If maxfrag is -1, accept all fragments without limitation.
+	 * Leverage partial checksum offload for simple UDP/IP fragments,
+	 * as that is the most common case.
+	 *
+	 * Perform 1's complement adjustment of octets that got included/
+	 * excluded in the hardware-calculated checksum value.  Also take
+	 * care of any trailing bytes and subtract out their partial sum.
 	 */
-	if (ip6_maxfrags < 0)
-		;
-	else if (frag6_nfrags >= (u_int)ip6_maxfrags)
-		goto dropfrag;
+	if (ip6f->ip6f_nxt == IPPROTO_UDP &&
+	    offset == (sizeof (*ip6) + sizeof (*ip6f)) &&
+	    (m->m_pkthdr.csum_flags &
+	    (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
+	    (CSUM_DATA_VALID | CSUM_PARTIAL)) {
+		uint32_t start = m->m_pkthdr.csum_rx_start;
+		uint32_t ip_len = (sizeof (*ip6) + ntohs(ip6->ip6_plen));
+		int32_t trailer = (m_pktlen(m) - ip_len);
+		uint32_t swbytes = (uint32_t)trailer;
+
+		csum = m->m_pkthdr.csum_rx_val;
+
+		ASSERT(trailer >= 0);
+		if (start != offset || trailer != 0) {
+			uint16_t s = 0, d = 0;
+
+			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
+				s = ip6->ip6_src.s6_addr16[1];
+				ip6->ip6_src.s6_addr16[1] = 0 ;
+			}
+			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
+				d = ip6->ip6_dst.s6_addr16[1];
+				ip6->ip6_dst.s6_addr16[1] = 0;
+			}
+
+			/* callee folds in sum */
+			csum = m_adj_sum16(m, start, offset,
+			    (ip_len - offset), csum);
+			if (offset > start)
+				swbytes += (offset - start);
+			else
+				swbytes += (start - offset);
+
+			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
+				ip6->ip6_src.s6_addr16[1] = s;
+			if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
+				ip6->ip6_dst.s6_addr16[1] = d;
+
+		}
+		csum_flags = m->m_pkthdr.csum_flags;
+
+		if (swbytes != 0)
+			udp_in6_cksum_stats(swbytes);
+		if (trailer != 0)
+			m_adj(m, -trailer);
+	} else {
+		csum = 0;
+		csum_flags = 0;
+	}
+
+	/* Invalidate checksum */
+	m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
+
+	ip6stat.ip6s_fragments++;
+	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
+
+	lck_mtx_lock(&ip6qlock);
+	locked = 1;
 
 	for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
 		if (ip6f->ip6f_ident == q6->ip6q_ident &&
@@ -272,24 +462,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 		 */
 		first_frag = 1;
 
-		/*
-		 * Enforce upper bound on number of fragmented packets
-		 * for which we attempt reassembly;
-		 * If maxfrag is 0, never accept fragments.
-		 * If maxfrag is -1, accept all fragments without limitation.
-		 */
-		if (ip6_maxfragpackets < 0)
-			;
-		else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets)
-			goto dropfrag;
-		frag6_nfragpackets++;
-		q6 = (struct ip6q *)_MALLOC(sizeof(struct ip6q), M_FTABLE,
-			M_DONTWAIT);
+		q6 = ip6q_alloc(M_DONTWAIT);
 		if (q6 == NULL)
 			goto dropfrag;
-		bzero(q6, sizeof(*q6));
 
 		frag6_insque(q6, &ip6q);
+		frag6_nfragpackets++;
 
 		/* ip6q_nxt will be filled afterwards, from 1st fragment */
 		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
@@ -297,14 +475,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 		q6->ip6q_nxtp	= (u_char *)nxtp;
 #endif
 		q6->ip6q_ident	= ip6f->ip6f_ident;
-		q6->ip6q_ttl 	= IPV6_FRAGTTL;
+		q6->ip6q_ttl	= IPV6_FRAGTTL;
 		q6->ip6q_src	= ip6->ip6_src;
 		q6->ip6q_dst	= ip6->ip6_dst;
 		q6->ip6q_ecn	=
 		    (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
 
-		q6->ip6q_nfrag	= 0;
+		q6->ip6q_nfrag = 0;
+
+		/*
+		 * If the first fragment has valid checksum offload
+		 * info, the rest of fragments are eligible as well.
+		 */
+		if (csum_flags != 0) {
+			q6->ip6q_csum = csum;
+			q6->ip6q_csum_flags = csum_flags;
+		}
 	}
 
 	/*
@@ -313,8 +500,8 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 	 */
 	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
 	if (fragoff == 0) {
-		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr)
-			- sizeof(struct ip6_frag);
+		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
+		    sizeof(struct ip6_frag);
 		q6->ip6q_nxt = ip6f->ip6f_nxt;
 	}
 
@@ -327,23 +514,22 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 	if (q6->ip6q_unfrglen >= 0) {
 		/* The 1st fragment has already arrived. */
 		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
+			lck_mtx_unlock(&ip6qlock);
+			locked = 0;
 			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
-				    offset - sizeof(struct ip6_frag) +
-					offsetof(struct ip6_frag, ip6f_offlg));
-			frag6_doing_reass = 0;
-			if (ifa != NULL)
-				IFA_REMREF(ifa);
-			return(IPPROTO_DONE);
+			    offset - sizeof(struct ip6_frag) +
+			    offsetof(struct ip6_frag, ip6f_offlg));
+			m = NULL;
+			goto done;
 		}
-	}
-	else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
+	} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
+		lck_mtx_unlock(&ip6qlock);
+		locked = 0;
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
-			    offset - sizeof(struct ip6_frag) +
-				offsetof(struct ip6_frag, ip6f_offlg));
-		frag6_doing_reass = 0;
-		if (ifa != NULL)
-			IFA_REMREF(ifa);
-		return(IPPROTO_DONE);
+		    offset - sizeof(struct ip6_frag) +
+		    offsetof(struct ip6_frag, ip6f_offlg));
+		m = NULL;
+		goto done;
 	}
 	/*
 	 * If it's the first fragment, do the above check for each
@@ -362,7 +548,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 
 				/* dequeue the fragment. */
 				frag6_deq(af6);
-				FREE(af6, M_FTABLE);
+				ip6af_free(af6);
 
 				/* adjust pointer. */
 				ip6err = mtod(merr, struct ip6_hdr *);
@@ -374,19 +560,19 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 				ip6err->ip6_src = q6->ip6q_src;
 				ip6err->ip6_dst = q6->ip6q_dst;
 
-				icmp6_error(merr, ICMP6_PARAM_PROB,
-					    ICMP6_PARAMPROB_HEADER,
-					    erroff - sizeof(struct ip6_frag) +
-						offsetof(struct ip6_frag, ip6f_offlg));
+				frag6_save_context(merr,
+				    erroff - sizeof (struct ip6_frag) +
+				    offsetof(struct ip6_frag, ip6f_offlg));
+
+				MBUFQ_ENQUEUE(&diq6, merr);
 			}
 		}
 	}
 
-	ip6af = (struct ip6asfrag *)_MALLOC(sizeof(struct ip6asfrag), M_FTABLE,
-	    M_DONTWAIT);
+	ip6af = ip6af_alloc(M_DONTWAIT);
 	if (ip6af == NULL)
 		goto dropfrag;
-	bzero(ip6af, sizeof(*ip6af));
+
 	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
 	ip6af->ip6af_off = fragoff;
 	ip6af->ip6af_frglen = frgpartlen;
@@ -407,14 +593,14 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 	ecn0 = q6->ip6q_ecn;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT) {
-			FREE(ip6af, M_FTABLE);
+			ip6af_free(ip6af);
 			goto dropfrag;
 		}
 		if (ecn0 != IPTOS_ECN_CE)
 			q6->ip6q_ecn = IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
-		FREE(ip6af, M_FTABLE);
+		ip6af_free(ip6af);
 		goto dropfrag;
 	}
 
@@ -431,6 +617,9 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us.
+	 *
+	 * If some of the data is dropped from the preceding
+	 * segment, then it's checksum is invalidated.
 	 */
 	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
 		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
@@ -439,6 +628,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 			if (i >= ip6af->ip6af_frglen)
 				goto dropfrag;
 			m_adj(IP6_REASS_MBUF(ip6af), i);
+			q6->ip6q_csum_flags = 0;
 			ip6af->ip6af_off += i;
 			ip6af->ip6af_frglen -= i;
 		}
@@ -455,6 +645,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 			af6->ip6af_frglen -= i;
 			af6->ip6af_off += i;
 			m_adj(IP6_REASS_MBUF(af6), i);
+			q6->ip6q_csum_flags = 0;
 			break;
 		}
 		af6 = af6->ip6af_down;
@@ -481,7 +672,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 			    "overlaps the previous fragment\n",
 			    i, ip6_sprintf(&q6->ip6q_src));
 #endif
-			FREE(ip6af, M_FTABLE);
+			ip6af_free(ip6af);
 			goto dropfrag;
 		}
 	}
@@ -493,12 +684,22 @@ frag6_input(struct mbuf **mp, int *offp, int proto)
 			    "overlaps the succeeding fragment",
 			    i, ip6_sprintf(&q6->ip6q_src));
 #endif
-			FREE(ip6af, M_FTABLE);
+			ip6af_free(ip6af);
 			goto dropfrag;
 		}
 	}
 #endif
 
+	/*
+	 * If this fragment contains similar checksum offload info
+	 * as that of the existing ones, accumulate checksum.  Otherwise,
+	 * invalidate checksum offload info for the entire datagram.
+	 */
+	if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags)
+		q6->ip6q_csum += csum;
+	else if (q6->ip6q_csum_flags != 0)
+		q6->ip6q_csum_flags = 0;
+
 insert:
 
 	/*
@@ -520,18 +721,18 @@ insert:
 	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 	     af6 = af6->ip6af_down) {
 		if (af6->ip6af_off != next) {
-			frag6_doing_reass = 0;
-			if (ifa != NULL)
-				IFA_REMREF(ifa);
-			return IPPROTO_DONE;
+			lck_mtx_unlock(&ip6qlock);
+			locked = 0;
+			m = NULL;
+			goto done;
 		}
 		next += af6->ip6af_frglen;
 	}
 	if (af6->ip6af_up->ip6af_mff) {
-		frag6_doing_reass = 0;
-		if (ifa != NULL)
-			IFA_REMREF(ifa);
-		return IPPROTO_DONE;
+		lck_mtx_unlock(&ip6qlock);
+		locked = 0;
+		m = NULL;
+		goto done;
 	}
 
 	/*
@@ -548,13 +749,34 @@ insert:
 			t = t->m_next;
 		t->m_next = IP6_REASS_MBUF(af6);
 		m_adj(t->m_next, af6->ip6af_offset);
-		FREE(af6, M_FTABLE);
+		ip6af_free(af6);
 		af6 = af6dwn;
 	}
 
+	/*
+	 * Store partial hardware checksum info from the fragment queue;
+	 * the receive start offset is set to 40 bytes (see code at the
+	 * top of this routine.)
+	 */
+	if (q6->ip6q_csum_flags != 0) {
+		csum = q6->ip6q_csum;
+
+		ADDCARRY(csum);
+
+		m->m_pkthdr.csum_rx_val = csum;
+		m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr);
+		m->m_pkthdr.csum_flags = q6->ip6q_csum_flags;
+	} else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
+	    (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
+		/* loopback checksums are always OK */
+		m->m_pkthdr.csum_data = 0xffff;
+		m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
+		m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
+	}
+
 	/* adjust offset to point where the original next header starts */
 	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
-	FREE(ip6af, M_FTABLE);
+	ip6af_free(ip6af);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
 	ip6->ip6_src = q6->ip6q_src;
@@ -563,7 +785,7 @@ insert:
 		ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
 
 	nxt = q6->ip6q_nxt;
-#if notyet
+#ifdef notyet
 	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
 #endif
 
@@ -571,16 +793,16 @@ insert:
 	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
 		/* This is the only possible case with !PULLDOWN_TEST */
 		ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
-			offset);
+		    offset);
 		m->m_data += sizeof(struct ip6_frag);
 		m->m_len -= sizeof(struct ip6_frag);
 	} else {
 		/* this comes with no copy if the boundary is on cluster */
 		if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
 			frag6_remque(q6);
-			frag6_nfrags -= q6->ip6q_nfrag;
-			FREE(q6, M_FTABLE);
 			frag6_nfragpackets--;
+			frag6_nfrags -= q6->ip6q_nfrag;
+			ip6q_free(q6);
 			goto dropfrag;
 		}
 		m_adj(t, sizeof(struct ip6_frag));
@@ -596,40 +818,65 @@ insert:
 	}
 
 	frag6_remque(q6);
-	frag6_nfrags -= q6->ip6q_nfrag;
-	FREE(q6, M_FTABLE);
 	frag6_nfragpackets--;
+	frag6_nfrags -= q6->ip6q_nfrag;
+	ip6q_free(q6);
 
-	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
-		int plen = 0;
-		for (t = m; t; t = t->m_next)
-			plen += t->m_len;
-		m->m_pkthdr.len = plen;
+	if (m->m_flags & M_PKTHDR) {	/* Isn't it always true? */
+		m_fixhdr(m);
+		/*
+		 * Mark packet as reassembled
+		 * In ICMPv6 processing, we drop certain
+		 * NDP messages that are not expected to
+		 * have fragment header based on recommendations
+		 * against security vulnerability as described in
+		 * RFC 6980.
+		 */
+		m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED;
 	}
-	
 	ip6stat.ip6s_reassembled++;
-	in6_ifstat_inc(dstifp, ifs6_reass_ok);
 
 	/*
 	 * Tell launch routine the next header
 	 */
-
 	*mp = m;
 	*offp = offset;
 
-	frag6_doing_reass = 0;
-	if (ifa != NULL)
-		IFA_REMREF(ifa);
-	return nxt;
-
- dropfrag:
-	in6_ifstat_inc(dstifp, ifs6_reass_fail);
+	/* arm the purge timer if not already and if there's work to do */
+	frag6_sched_timeout();
+	lck_mtx_unlock(&ip6qlock);
+	in6_ifstat_inc(dstifp, ifs6_reass_ok);
+	frag6_icmp6_paramprob_error(&diq6);
+	VERIFY(MBUFQ_EMPTY(&diq6));
+	return (nxt);
+
+done:
+	VERIFY(m == NULL);
+	if (!locked) {
+		if (frag6_nfragpackets == 0) {
+			frag6_icmp6_paramprob_error(&diq6);
+			VERIFY(MBUFQ_EMPTY(&diq6));
+			return (IPPROTO_DONE);
+		}
+		lck_mtx_lock(&ip6qlock);
+	}
+	/* arm the purge timer if not already and if there's work to do */
+	frag6_sched_timeout();
+	lck_mtx_unlock(&ip6qlock);
+	frag6_icmp6_paramprob_error(&diq6);
+	VERIFY(MBUFQ_EMPTY(&diq6));
+	return (IPPROTO_DONE);
+
+dropfrag:
 	ip6stat.ip6s_fragdropped++;
+	/* arm the purge timer if not already and if there's work to do */
+	frag6_sched_timeout();
+	lck_mtx_unlock(&ip6qlock);
+	in6_ifstat_inc(dstifp, ifs6_reass_fail);
 	m_freem(m);
-	frag6_doing_reass = 0;
-	if (ifa != NULL)
-		IFA_REMREF(ifa);
-	return IPPROTO_DONE;
+	frag6_icmp6_paramprob_error(&diq6);
+	VERIFY(MBUFQ_EMPTY(&diq6));
+	return (IPPROTO_DONE);
 }
 
 /*
@@ -637,11 +884,12 @@ insert:
  * associated datagrams.
  */
 void
-frag6_freef(q6)
-	struct ip6q *q6;
+frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6)
 {
 	struct ip6asfrag *af6, *down6;
 
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+
 	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 	     af6 = down6) {
 		struct mbuf *m = IP6_REASS_MBUF(af6);
@@ -662,17 +910,18 @@ frag6_freef(q6)
 			/* restore source and destination addresses */
 			ip6->ip6_src = q6->ip6q_src;
 			ip6->ip6_dst = q6->ip6q_dst;
-			icmp6_error(m, ICMP6_TIME_EXCEEDED,
-				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
-		} else
-			m_freem(m);
-		FREE(af6, M_FTABLE);
+
+			MBUFQ_ENQUEUE(diq6, m);
+		} else {
+			MBUFQ_ENQUEUE(dfq6, m);
+		}
+		ip6af_free(af6);
 
 	}
 	frag6_remque(q6);
-	frag6_nfrags -= q6->ip6q_nfrag;
-	FREE(q6, M_FTABLE);
 	frag6_nfragpackets--;
+	frag6_nfrags -= q6->ip6q_nfrag;
+	ip6q_free(q6);
 }
 
 /*
@@ -680,9 +929,10 @@ frag6_freef(q6)
  * Like insque, but pointers in middle of structure.
  */
 void
-frag6_enq(af6, up6)
-	struct ip6asfrag *af6, *up6;
+frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
 {
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+
 	af6->ip6af_up = up6;
 	af6->ip6af_down = up6->ip6af_down;
 	up6->ip6af_down->ip6af_up = af6;
@@ -693,17 +943,19 @@ frag6_enq(af6, up6)
  * To frag6_enq as remque is to insque.
  */
 void
-frag6_deq(af6)
-	struct ip6asfrag *af6;
+frag6_deq(struct ip6asfrag *af6)
 {
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+
 	af6->ip6af_up->ip6af_down = af6->ip6af_down;
 	af6->ip6af_down->ip6af_up = af6->ip6af_up;
 }
 
 void
-frag6_insque(new, old)
-	struct ip6q *new, *old;
+frag6_insque(struct ip6q *new, struct ip6q *old)
 {
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+
 	new->ip6q_prev = old;
 	new->ip6q_next = old->ip6q_next;
 	old->ip6q_next->ip6q_prev= new;
@@ -711,9 +963,10 @@ frag6_insque(new, old)
 }
 
 void
-frag6_remque(p6)
-	struct ip6q *p6;
+frag6_remque(struct ip6q *p6)
 {
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+
 	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
 	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
 }
@@ -723,13 +976,24 @@ frag6_remque(p6)
  * if a timer expires on a reassembly
  * queue, discard it.
  */
-void
-frag6_slowtimo()
+static void
+frag6_timeout(void *arg)
 {
+#pragma unused(arg)
+	struct fq6_head dfq6, diq6;
 	struct ip6q *q6;
-	lck_mtx_lock(inet6_domain_mutex);
 
-	frag6_doing_reass = 1;
+	MBUFQ_INIT(&dfq6);	/* for deferred frees */
+	MBUFQ_INIT(&diq6);	/* for deferred ICMP time exceeded errors */
+
+	/*
+	 * Update coarse-grained networking timestamp (in sec.); the idea
+	 * is to piggy-back on the timeout callout to update the counter
+	 * returnable via net_uptime().
+	 */
+	net_update_uptime();
+
+	lck_mtx_lock(&ip6qlock);
 	q6 = ip6q.ip6q_next;
 	if (q6)
 		while (q6 != &ip6q) {
@@ -738,7 +1002,7 @@ frag6_slowtimo()
 			if (q6->ip6q_prev->ip6q_ttl == 0) {
 				ip6stat.ip6s_fragtimeout++;
 				/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
-				frag6_freef(q6->ip6q_prev);
+				frag6_freef(q6->ip6q_prev, &dfq6, &diq6);
 			}
 		}
 	/*
@@ -746,29 +1010,208 @@ frag6_slowtimo()
 	 * (due to the limit being lowered), drain off
 	 * enough to get down to the new limit.
 	 */
-	while (frag6_nfragpackets > (u_int)ip6_maxfragpackets &&
-	    ip6q.ip6q_prev) {
-		ip6stat.ip6s_fragoverflow++;
-		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
-		frag6_freef(ip6q.ip6q_prev);
+	if (ip6_maxfragpackets >= 0) {
+		while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets &&
+		    ip6q.ip6q_prev) {
+			ip6stat.ip6s_fragoverflow++;
+			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
+			frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6);
+		}
+	}
+	/* re-arm the purge timer if there's work to do */
+	frag6_timeout_run = 0;
+	frag6_sched_timeout();
+	lck_mtx_unlock(&ip6qlock);
+
+	/* free fragments that need to be freed */
+	if (!MBUFQ_EMPTY(&dfq6))
+		MBUFQ_DRAIN(&dfq6);
+
+	frag6_icmp6_timeex_error(&diq6);
+
+	VERIFY(MBUFQ_EMPTY(&dfq6));
+	VERIFY(MBUFQ_EMPTY(&diq6));
+}
+
+static void
+frag6_sched_timeout(void)
+{
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+
+	if (!frag6_timeout_run && frag6_nfragpackets > 0) {
+		frag6_timeout_run = 1;
+		timeout(frag6_timeout, NULL, hz);
 	}
-	frag6_doing_reass = 0;
-	lck_mtx_unlock(inet6_domain_mutex);
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
-frag6_drain()
+frag6_drain(void)
 {
-	if (frag6_doing_reass)
-		return;
-	lck_mtx_lock(inet6_domain_mutex);
+	struct fq6_head dfq6, diq6;
+
+	MBUFQ_INIT(&dfq6);	/* for deferred frees */
+	MBUFQ_INIT(&diq6);	/* for deferred ICMP time exceeded errors */
+
+	lck_mtx_lock(&ip6qlock);
 	while (ip6q.ip6q_next != &ip6q) {
 		ip6stat.ip6s_fragdropped++;
 		/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
-		frag6_freef(ip6q.ip6q_next);
+		frag6_freef(ip6q.ip6q_next, &dfq6, &diq6);
+	}
+	lck_mtx_unlock(&ip6qlock);
+
+	/* free fragments that need to be freed */
+	if (!MBUFQ_EMPTY(&dfq6))
+		MBUFQ_DRAIN(&dfq6);
+
+	frag6_icmp6_timeex_error(&diq6);
+
+	VERIFY(MBUFQ_EMPTY(&dfq6));
+	VERIFY(MBUFQ_EMPTY(&diq6));
+}
+
+static struct ip6q *
+ip6q_alloc(int how)
+{
+	struct mbuf *t;
+	struct ip6q *q6;
+
+	/*
+	 * See comments in ip6q_updateparams().  Keep the count separate
+	 * from frag6_nfragpackets since the latter represents the elements
+	 * already in the reassembly queues.
+	 */
+	if (ip6q_limit > 0 && ip6q_count > ip6q_limit)
+		return (NULL);
+
+	t = m_get(how, MT_FTABLE);
+	if (t != NULL) {
+		atomic_add_32(&ip6q_count, 1);
+		q6 = mtod(t, struct ip6q *);
+		bzero(q6, sizeof (*q6));
+	} else {
+		q6 = NULL;
+	}
+	return (q6);
+}
+
+static void
+ip6q_free(struct ip6q *q6)
+{
+	(void) m_free(dtom(q6));
+	atomic_add_32(&ip6q_count, -1);
+}
+
+static struct ip6asfrag *
+ip6af_alloc(int how)
+{
+	struct mbuf *t;
+	struct ip6asfrag *af6;
+
+	/*
+	 * See comments in ip6q_updateparams().  Keep the count separate
+	 * from frag6_nfrags since the latter represents the elements
+	 * already in the reassembly queues.
+	 */
+	if (ip6af_limit > 0 && ip6af_count > ip6af_limit)
+		return (NULL);
+
+	t = m_get(how, MT_FTABLE);
+	if (t != NULL) {
+		atomic_add_32(&ip6af_count, 1);
+		af6 = mtod(t, struct ip6asfrag *);
+		bzero(af6, sizeof (*af6));
+	} else {
+		af6 = NULL;
+	}
+	return (af6);
+}
+
+static void
+ip6af_free(struct ip6asfrag *af6)
+{
+	(void) m_free(dtom(af6));
+	atomic_add_32(&ip6af_count, -1);
+}
+
+static void
+ip6q_updateparams(void)
+{
+	LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
+	/*
+	 * -1 for unlimited allocation.
+	 */
+	if (ip6_maxfragpackets < 0)
+		ip6q_limit = 0;
+	if (ip6_maxfrags < 0)
+		ip6af_limit = 0;
+	/*
+	 * Positive number for specific bound.
+	 */
+	if (ip6_maxfragpackets > 0)
+		ip6q_limit = ip6_maxfragpackets;
+	if (ip6_maxfrags > 0)
+		ip6af_limit = ip6_maxfrags;
+	/*
+	 * Zero specifies no further fragment queue allocation -- set the
+	 * bound very low, but rely on implementation elsewhere to actually
+	 * prevent allocation and reclaim current queues.
+	 */
+	if (ip6_maxfragpackets == 0)
+		ip6q_limit = 1;
+	if (ip6_maxfrags == 0)
+		ip6af_limit = 1;
+	/*
+	 * Arm the purge timer if not already and if there's work to do
+	 */
+	frag6_sched_timeout();
+}
+
+static int
+sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+	int error, i;
+
+	lck_mtx_lock(&ip6qlock);
+	i = ip6_maxfragpackets;
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || req->newptr == USER_ADDR_NULL)
+		goto done;
+	/* impose bounds */
+	if (i < -1 || i > (nmbclusters / 4)) {
+		error = EINVAL;
+		goto done;
+	}
+	ip6_maxfragpackets = i;
+	ip6q_updateparams();
+done:
+	lck_mtx_unlock(&ip6qlock);
+	return (error);
+}
+
+static int
+sysctl_maxfrags SYSCTL_HANDLER_ARGS
+{
+#pragma unused(arg1, arg2)
+	int error, i;
+
+	lck_mtx_lock(&ip6qlock);
+	i = ip6_maxfrags;
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || req->newptr == USER_ADDR_NULL)
+		goto done;
+	/* impose bounds */
+	if (i < -1 || i > (nmbclusters / 4)) {
+		error = EINVAL;
+		goto done;
 	}
-	lck_mtx_unlock(inet6_domain_mutex);
+	ip6_maxfrags= i;
+	ip6q_updateparams();	/* see if we need to arm timer */
+done:
+	lck_mtx_unlock(&ip6qlock);
+	return (error);
 }