X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..39236c6e673c41db228275375ab7fdb0f837b292:/bsd/netinet6/frag6.c diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index f9434d42d..6a92d7380 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -1,4 +1,33 @@ -/* $KAME: frag6.c,v 1.23 2000/02/28 16:18:11 itojun Exp $ */ +/* + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ + * + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ + */ + +/* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */ +/* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -32,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -41,62 +71,173 @@ #include #include #include +#include #include #include #include #include +#include #include #include -#if !(defined(__FreeBSD__) && __FreeBSD__ >= 3) && !defined(__OpenBSD__) && !(defined(__bsdi__) && _BSDI_VERSION >= 199802) && !defined(__APPLE__) -#include -#endif #include #include +#include /* * Define it to get a correct behavior on per-interface statistics. - * You will need to perform an extra routing table lookup, per fragment, - * to do it. This may, or may not be, a performance hit. */ #define IN6_IFSTAT_STRICT -static void frag6_enq __P((struct ip6asfrag *, struct ip6asfrag *)); -static void frag6_deq __P((struct ip6asfrag *)); -static void frag6_insque __P((struct ip6q *, struct ip6q *)); -static void frag6_remque __P((struct ip6q *)); -static void frag6_freef __P((struct ip6q *)); +MBUFQ_HEAD(fq6_head); + +static void frag6_save_context(struct mbuf *, int); +static void frag6_scrub_context(struct mbuf *); +static int frag6_restore_context(struct mbuf *); + +static void frag6_icmp6_paramprob_error(struct fq6_head *); +static void frag6_icmp6_timeex_error(struct fq6_head *); + +static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); +static void frag6_deq(struct ip6asfrag *); +static void frag6_insque(struct ip6q *, struct ip6q *); +static void frag6_remque(struct ip6q *); +static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *); + +static int frag6_timeout_run; /* frag6 timer is scheduled to run */ +static void frag6_timeout(void *); +static void frag6_sched_timeout(void); + +static struct ip6q *ip6q_alloc(int); +static void ip6q_free(struct ip6q *); +static void ip6q_updateparams(void); +static struct ip6asfrag *ip6af_alloc(int); +static void ip6af_free(struct ip6asfrag *); + +decl_lck_mtx_data(static, ip6qlock); +static lck_attr_t *ip6qlock_attr; +static lck_grp_t *ip6qlock_grp; +static lck_grp_attr_t *ip6qlock_grp_attr; + +/* IPv6 fragment reassembly queues (protected by ip6qlock) */ +static struct ip6q ip6q; /* ip6 reassembly queues */ +static int ip6_maxfragpackets; /* max packets in reass queues */ +static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */ +static int ip6_maxfrags; /* max fragments in reass queues */ +static u_int32_t frag6_nfrags; /* # of fragments in reass queues */ +static u_int32_t ip6q_limit; /* ip6q allocation limit */ +static u_int32_t ip6q_count; /* current # of allocated ip6q's */ +static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */ +static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */ + +static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS; +static int sysctl_maxfrags SYSCTL_HANDLER_ARGS; + +SYSCTL_DECL(_net_inet6_ip6); + +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, + sysctl_maxfragpackets, "I", + "Maximum number of IPv6 fragment reassembly queue entries"); + +SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets, + CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0, + "Current number of IPv6 fragment reassembly queue entries"); + +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, + sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed"); -int frag6_doing_reass; -u_int frag6_nfragpackets; -struct ip6q ip6q; /* ip6 reassemble queue */ +/* + * Initialise reassembly queue and fragment identifier. + */ +void +frag6_init(void) +{ + /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */ + _CASSERT(sizeof (struct ip6q) <= _MLEN); + /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */ + _CASSERT(sizeof (struct ip6asfrag) <= _MLEN); + + /* IPv6 fragment reassembly queue lock */ + ip6qlock_grp_attr = lck_grp_attr_alloc_init(); + ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr); + ip6qlock_attr = lck_attr_alloc_init(); + lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr); + + lck_mtx_lock(&ip6qlock); + /* Initialize IPv6 reassembly queue. */ + ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; -/* FreeBSD tweak */ -#if !defined(M_FTABLE) && (defined(__FreeBSD__) && __FreeBSD__ >= 3) -MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); -#endif + /* same limits as IPv4 */ + ip6_maxfragpackets = nmbclusters / 32; + ip6_maxfrags = ip6_maxfragpackets * 2; + ip6q_updateparams(); + lck_mtx_unlock(&ip6qlock); +} -#ifndef offsetof /* XXX */ -#define offsetof(type, member) ((size_t)(&((type *)0)->member)) -#endif +static void +frag6_save_context(struct mbuf *m, int val) +{ + m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val; +} + +static void +frag6_scrub_context(struct mbuf *m) +{ + m->m_pkthdr.pkt_hdr = NULL; +} + +static int +frag6_restore_context(struct mbuf *m) +{ + return ((int)m->m_pkthdr.pkt_hdr); +} /* - * Initialise reassembly queue and fragment identifier. + * Send any deferred ICMP param problem error messages; caller must not be + * holding ip6qlock and is expected to have saved the per-packet parameter + * value via frag6_save_context(). */ -void -frag6_init() +static void +frag6_icmp6_paramprob_error(struct fq6_head *diq6) { - struct timeval tv; + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); + + if (!MBUFQ_EMPTY(diq6)) { + struct mbuf *merr, *merr_tmp; + int param; + MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) { + MBUFQ_REMOVE(diq6, merr); + MBUFQ_NEXT(merr) = NULL; + param = frag6_restore_context(merr); + frag6_scrub_context(merr); + icmp6_error(merr, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, param); + } + } +} - /* - * in many cases, random() here does NOT return random number - * as initialization during bootstrap time occur in fixed order. - */ - microtime(&tv); - ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; - ip6_id = random() ^ tv.tv_usec; +/* + * Send any deferred ICMP time exceeded error messages; + * caller must not be holding ip6qlock. + */ +static void +frag6_icmp6_timeex_error(struct fq6_head *diq6) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); + + if (!MBUFQ_EMPTY(diq6)) { + struct mbuf *m, *m_tmp; + MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { + MBUFQ_REMOVE(diq6, m); + MBUFQ_NEXT(m) = NULL; + icmp6_error(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_REASSEMBLY, 0); + } + } } /* @@ -132,10 +273,9 @@ frag6_init() * Fragment input */ int -frag6_input(mp, offp, proto) - struct mbuf **mp; - int *offp, proto; +frag6_input(struct mbuf **mp, int *offp, int proto) { +#pragma unused(proto) struct mbuf *m = *mp, *t; struct ip6_hdr *ip6; struct ip6_frag *ip6f; @@ -144,56 +284,54 @@ frag6_input(mp, offp, proto) int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ - struct ifnet *dstifp; -#ifdef IN6_IFSTAT_STRICT - static struct route_in6 ro; - struct sockaddr_in6 *dst; -#endif + struct ifnet *dstifp = NULL; + u_int8_t ecn, ecn0; + uint32_t csum, csum_flags; + struct fq6_head diq6; + int locked = 0; + + VERIFY(m->m_flags & M_PKTHDR); + + MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */ + + /* Expect 32-bit aligned data pointer on strict-align platforms */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); #else IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); if (ip6f == NULL) - return IPPROTO_DONE; + goto done; #endif - dstifp = NULL; #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ - dst = (struct sockaddr_in6 *)&ro.ro_dst; - if (ro.ro_rt - && ((ro.ro_rt->rt_flags & RTF_UP) == 0 - || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) { - RTFREE(ro.ro_rt); - ro.ro_rt = (struct rtentry *)0; - } - if (ro.ro_rt == NULL) { - bzero(dst, sizeof(*dst)); - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(struct sockaddr_in6); - dst->sin6_addr = ip6->ip6_dst; - } -#ifndef __bsdi__ - rtalloc((struct route *)&ro); -#else - rtcalloc((struct route *)&ro); -#endif - if (ro.ro_rt != NULL && ro.ro_rt->rt_ifa != NULL) - dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; -#else - /* we are violating the spec, this is not the destination interface */ - if ((m->m_flags & M_PKTHDR) != 0) + if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { + uint32_t idx; + + if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) { + if (idx > 0 && idx <= if_index) { + ifnet_head_lock_shared(); + dstifp = ifindex2ifnet[idx]; + ifnet_head_done(); + } + } + } +#endif /* IN6_IFSTAT_STRICT */ + + /* we are violating the spec, this may not be the dst interface */ + if (dstifp == NULL) dstifp = m->m_pkthdr.rcvif; -#endif /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); - return IPPROTO_DONE; + m = NULL; + goto done; } /* @@ -204,19 +342,79 @@ frag6_input(mp, offp, proto) */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { - icmp6_error(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - offsetof(struct ip6_hdr, ip6_plen)); + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); - return IPPROTO_DONE; + m = NULL; + goto done; + } + + /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */ + if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) { + ip6stat.ip6s_fragments++; + ip6stat.ip6s_fragdropped++; + in6_ifstat_inc(dstifp, ifs6_reass_fail); + m_freem(m); + m = NULL; + goto done; } - ip6stat.ip6s_fragments++; - in6_ifstat_inc(dstifp, ifs6_reass_reqd); - /* offset now points to data portion */ offset += sizeof(struct ip6_frag); + /* + * Leverage partial checksum offload for simple UDP/IP fragments, + * as that is the most common case. + * + * Perform 1's complement adjustment of octets that got included/ + * excluded in the hardware-calculated checksum value. + */ + if (ip6f->ip6f_nxt == IPPROTO_UDP && + offset == (sizeof (*ip6) + sizeof (*ip6f)) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t start; + + start = m->m_pkthdr.csum_rx_start; + csum = m->m_pkthdr.csum_rx_val; + + if (start != offset) { + uint16_t s, d; + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + s = ip6->ip6_src.s6_addr16[1]; + ip6->ip6_src.s6_addr16[1] = 0 ; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + d = ip6->ip6_dst.s6_addr16[1]; + ip6->ip6_dst.s6_addr16[1] = 0; + } + + /* callee folds in sum */ + csum = m_adj_sum16(m, start, offset, csum); + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = s; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = d; + + } + csum_flags = m->m_pkthdr.csum_flags; + } else { + csum = 0; + csum_flags = 0; + } + + /* Invalidate checksum */ + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + + ip6stat.ip6s_fragments++; + in6_ifstat_inc(dstifp, ifs6_reass_reqd); + + lck_mtx_lock(&ip6qlock); + locked = 1; + for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && @@ -228,38 +426,37 @@ frag6_input(mp, offp, proto) * the first fragment to arrive, create a reassembly queue. */ first_frag = 1; - frag6_nfragpackets++; - /* - * Enforce upper bound on number of fragmented packets - * for which we attempt reassembly; - * If maxfrag is 0, never accept fragments. - * If maxfrag is -1, accept all fragments without limitation. - */ - if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets) { - ip6stat.ip6s_fragoverflow++; - in6_ifstat_inc(dstifp, ifs6_reass_fail); - frag6_freef(ip6q.ip6q_prev); - } - q6 = (struct ip6q *)_MALLOC(sizeof(struct ip6q), M_FTABLE, - M_DONTWAIT); + q6 = ip6q_alloc(M_DONTWAIT); if (q6 == NULL) goto dropfrag; - bzero(q6, sizeof(*q6)); frag6_insque(q6, &ip6q); + frag6_nfragpackets++; /* ip6q_nxt will be filled afterwards, from 1st fragment */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; -#if notyet +#ifdef notyet q6->ip6q_nxtp = (u_char *)nxtp; #endif q6->ip6q_ident = ip6f->ip6f_ident; - q6->ip6q_arrive = 0; /* Is it used anywhere? */ - q6->ip6q_ttl = IPV6_FRAGTTL; + q6->ip6q_ttl = IPV6_FRAGTTL; q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; + q6->ip6q_ecn = + (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ + + q6->ip6q_nfrag = 0; + + /* + * If the first fragment has valid checksum offload + * info, the rest of fragments are eligible as well. + */ + if (csum_flags != 0) { + q6->ip6q_csum = csum; + q6->ip6q_csum_flags = csum_flags; + } } /* @@ -268,8 +465,8 @@ frag6_input(mp, offp, proto) */ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); if (fragoff == 0) { - q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - - sizeof(struct ip6_frag); + q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - + sizeof(struct ip6_frag); q6->ip6q_nxt = ip6f->ip6f_nxt; } @@ -282,17 +479,22 @@ frag6_input(mp, offp, proto) if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { + lck_mtx_unlock(&ip6qlock); + locked = 0; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offset - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); - return(IPPROTO_DONE); + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + m = NULL; + goto done; } - } - else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + lck_mtx_unlock(&ip6qlock); + locked = 0; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offset - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); - return(IPPROTO_DONE); + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + m = NULL; + goto done; } /* * If it's the first fragment, do the above check for each @@ -311,7 +513,7 @@ frag6_input(mp, offp, proto) /* dequeue the fragment. */ frag6_deq(af6); - _FREE(af6, M_FTABLE); + ip6af_free(af6); /* adjust pointer. */ ip6err = mtod(merr, struct ip6_hdr *); @@ -323,23 +525,19 @@ frag6_input(mp, offp, proto) ip6err->ip6_src = q6->ip6q_src; ip6err->ip6_dst = q6->ip6q_dst; - icmp6_error(merr, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - erroff - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); + frag6_save_context(merr, + erroff - sizeof (struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + + MBUFQ_ENQUEUE(&diq6, merr); } } } - ip6af = (struct ip6asfrag *)_MALLOC(sizeof(struct ip6asfrag), M_FTABLE, - M_DONTWAIT); + ip6af = ip6af_alloc(M_DONTWAIT); if (ip6af == NULL) goto dropfrag; - bzero(ip6af, sizeof(*ip6af)); - ip6af->ip6af_head = ip6->ip6_flow; - ip6af->ip6af_len = ip6->ip6_plen; - ip6af->ip6af_nxt = ip6->ip6_nxt; - ip6af->ip6af_hlim = ip6->ip6_hlim; + ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; @@ -351,6 +549,26 @@ frag6_input(mp, offp, proto) goto insert; } + /* + * Handle ECN by comparing this segment with the first one; + * if CE is set, do not lose CE. + * drop if CE and not-ECT are mixed for the same packet. + */ + ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; + ecn0 = q6->ip6q_ecn; + if (ecn == IPTOS_ECN_CE) { + if (ecn0 == IPTOS_ECN_NOTECT) { + ip6af_free(ip6af); + goto dropfrag; + } + if (ecn0 != IPTOS_ECN_CE) + q6->ip6q_ecn = IPTOS_ECN_CE; + } + if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { + ip6af_free(ip6af); + goto dropfrag; + } + /* * Find a segment which begins after this one does. */ @@ -364,6 +582,9 @@ frag6_input(mp, offp, proto) * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. + * + * If some of the data is dropped from the preceding + * segment, then it's checksum is invalidated. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen @@ -372,6 +593,7 @@ frag6_input(mp, offp, proto) if (i >= ip6af->ip6af_frglen) goto dropfrag; m_adj(IP6_REASS_MBUF(ip6af), i); + q6->ip6q_csum_flags = 0; ip6af->ip6af_off += i; ip6af->ip6af_frglen -= i; } @@ -388,6 +610,7 @@ frag6_input(mp, offp, proto) af6->ip6af_frglen -= i; af6->ip6af_off += i; m_adj(IP6_REASS_MBUF(af6), i); + q6->ip6q_csum_flags = 0; break; } af6 = af6->ip6af_down; @@ -399,28 +622,49 @@ frag6_input(mp, offp, proto) * If the incoming framgent overlaps some existing fragments in * the reassembly queue, drop it, since it is dangerous to override * existing fragments from a security point of view. + * We don't know which fragment is the bad guy - here we trust + * fragment that came in earlier, with no real reason. + * + * Note: due to changes after disabling this part, mbuf passed to + * m_adj() below now does not meet the requirement. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - ip6af->ip6af_off; if (i > 0) { +#if 0 /* suppress the noisy log */ log(LOG_ERR, "%d bytes of a fragment from %s " "overlaps the previous fragment\n", i, ip6_sprintf(&q6->ip6q_src)); +#endif + ip6af_free(ip6af); goto dropfrag; } } if (af6 != (struct ip6asfrag *)q6) { i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; if (i > 0) { +#if 0 /* suppress the noisy log */ log(LOG_ERR, "%d bytes of a fragment from %s " "overlaps the succeeding fragment", i, ip6_sprintf(&q6->ip6q_src)); +#endif + ip6af_free(ip6af); goto dropfrag; } } #endif + /* + * If this fragment contains similar checksum offload info + * as that of the existing ones, accumulate checksum. Otherwise, + * invalidate checksum offload info for the entire datagram. + */ + if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) + q6->ip6q_csum += csum; + else if (q6->ip6q_csum_flags != 0) + q6->ip6q_csum_flags = 0; + insert: /* @@ -430,6 +674,8 @@ insert: * the most recently active fragmented packet. */ frag6_enq(ip6af, af6->ip6af_up); + frag6_nfrags++; + q6->ip6q_nfrag++; #if 0 /* xxx */ if (q6 != ip6q.ip6q_next) { frag6_remque(q6); @@ -440,14 +686,18 @@ insert: for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { - frag6_doing_reass = 0; - return IPPROTO_DONE; + lck_mtx_unlock(&ip6qlock); + locked = 0; + m = NULL; + goto done; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { - frag6_doing_reass = 0; - return IPPROTO_DONE; + lck_mtx_unlock(&ip6qlock); + locked = 0; + m = NULL; + goto done; } /* @@ -464,36 +714,60 @@ insert: t = t->m_next; t->m_next = IP6_REASS_MBUF(af6); m_adj(t->m_next, af6->ip6af_offset); - _FREE(af6, M_FTABLE); + ip6af_free(af6); af6 = af6dwn; } + /* + * Store partial hardware checksum info from the fragment queue; + * the receive start offset is set to 40 bytes (see code at the + * top of this routine.) + */ + if (q6->ip6q_csum_flags != 0) { + csum = q6->ip6q_csum; + + ADDCARRY(csum); + + m->m_pkthdr.csum_rx_val = csum; + m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr); + m->m_pkthdr.csum_flags = q6->ip6q_csum_flags; + } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + } + /* adjust offset to point where the original next header starts */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); - _FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; + if (q6->ip6q_ecn == IPTOS_ECN_CE) + ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); + nxt = q6->ip6q_nxt; -#if notyet +#ifdef notyet *q6->ip6q_nxtp = (u_char)(nxt & 0xff); #endif - /* - * Delete frag6 header with as a few cost as possible. - */ - if (offset < m->m_len) { + /* Delete frag6 header */ + if (m->m_len >= offset + sizeof(struct ip6_frag)) { + /* This is the only possible case with !PULLDOWN_TEST */ ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), - offset); + offset); m->m_data += sizeof(struct ip6_frag); m->m_len -= sizeof(struct ip6_frag); } else { /* this comes with no copy if the boundary is on cluster */ if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { frag6_remque(q6); - _FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); goto dropfrag; } m_adj(t, sizeof(struct ip6_frag)); @@ -509,34 +783,56 @@ insert: } frag6_remque(q6); - _FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); + + if (m->m_flags & M_PKTHDR) /* Isn't it always true? */ + m_fixhdr(m); - if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ - int plen = 0; - for (t = m; t; t = t->m_next) - plen += t->m_len; - m->m_pkthdr.len = plen; - } - ip6stat.ip6s_reassembled++; - in6_ifstat_inc(dstifp, ifs6_reass_ok); /* * Tell launch routine the next header */ - *mp = m; *offp = offset; - frag6_doing_reass = 0; - return nxt; - - dropfrag: - in6_ifstat_inc(dstifp, ifs6_reass_fail); + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + in6_ifstat_inc(dstifp, ifs6_reass_ok); + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (nxt); + +done: + VERIFY(m == NULL); + if (!locked) { + if (frag6_nfragpackets == 0) { + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); + } + lck_mtx_lock(&ip6qlock); + } + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); + +dropfrag: ip6stat.ip6s_fragdropped++; + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + in6_ifstat_inc(dstifp, ifs6_reass_fail); m_freem(m); - return IPPROTO_DONE; + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); } /* @@ -544,11 +840,12 @@ insert: * associated datagrams. */ void -frag6_freef(q6) - struct ip6q *q6; +frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) { struct ip6asfrag *af6, *down6; + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = down6) { struct mbuf *m = IP6_REASS_MBUF(af6); @@ -566,20 +863,21 @@ frag6_freef(q6) /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); - /* restoure source and destination addresses */ + /* restore source and destination addresses */ ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; - icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_REASSEMBLY, 0); - } else - m_freem(m); - _FREE(af6, M_FTABLE); + MBUFQ_ENQUEUE(diq6, m); + } else { + MBUFQ_ENQUEUE(dfq6, m); + } + ip6af_free(af6); } frag6_remque(q6); - _FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); } /* @@ -587,9 +885,10 @@ frag6_freef(q6) * Like insque, but pointers in middle of structure. */ void -frag6_enq(af6, up6) - struct ip6asfrag *af6, *up6; +frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + af6->ip6af_up = up6; af6->ip6af_down = up6->ip6af_down; up6->ip6af_down->ip6af_up = af6; @@ -600,17 +899,19 @@ frag6_enq(af6, up6) * To frag6_enq as remque is to insque. */ void -frag6_deq(af6) - struct ip6asfrag *af6; +frag6_deq(struct ip6asfrag *af6) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + af6->ip6af_up->ip6af_down = af6->ip6af_down; af6->ip6af_down->ip6af_up = af6->ip6af_up; } void -frag6_insque(new, old) - struct ip6q *new, *old; +frag6_insque(struct ip6q *new, struct ip6q *old) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + new->ip6q_prev = old; new->ip6q_next = old->ip6q_next; old->ip6q_next->ip6q_prev= new; @@ -618,37 +919,37 @@ frag6_insque(new, old) } void -frag6_remque(p6) - struct ip6q *p6; +frag6_remque(struct ip6q *p6) { + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + p6->ip6q_prev->ip6q_next = p6->ip6q_next; p6->ip6q_next->ip6q_prev = p6->ip6q_prev; } /* - * IP timer processing; + * IPv6 reassembling timer processing; * if a timer expires on a reassembly * queue, discard it. */ -void -frag6_slowtimo() +static void +frag6_timeout(void *arg) { +#pragma unused(arg) + struct fq6_head dfq6, diq6; struct ip6q *q6; - int s; -#ifdef __APPLE__ - boolean_t funnel_state; - funnel_state = thread_set_funneled(TRUE); -#endif -#ifdef __NetBSD__ - s = splsoftnet(); -#else - s = splnet(); -#endif -#if 0 - extern struct route_in6 ip6_forward_rt; -#endif - frag6_doing_reass = 1; + MBUFQ_INIT(&dfq6); /* for deferred frees */ + MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + lck_mtx_lock(&ip6qlock); q6 = ip6q.ip6q_next; if (q6) while (q6 != &ip6q) { @@ -657,7 +958,7 @@ frag6_slowtimo() if (q6->ip6q_prev->ip6q_ttl == 0) { ip6stat.ip6s_fragtimeout++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(q6->ip6q_prev); + frag6_freef(q6->ip6q_prev, &dfq6, &diq6); } } /* @@ -665,46 +966,208 @@ frag6_slowtimo() * (due to the limit being lowered), drain off * enough to get down to the new limit. */ - while (frag6_nfragpackets > (u_int)ip6_maxfragpackets) { - ip6stat.ip6s_fragoverflow++; - /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(ip6q.ip6q_prev); + if (ip6_maxfragpackets >= 0) { + while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets && + ip6q.ip6q_prev) { + ip6stat.ip6s_fragoverflow++; + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6); + } } - frag6_doing_reass = 0; + /* re-arm the purge timer if there's work to do */ + frag6_timeout_run = 0; + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); -#if 0 - /* - * Routing changes might produce a better route than we last used; - * make sure we notice eventually, even if forwarding only for one - * destination and the cache is never replaced. - */ - if (ip6_forward_rt.ro_rt) { - RTFREE(ip6_forward_rt.ro_rt); - ip6_forward_rt.ro_rt = 0; - } - if (ipsrcchk_rt.ro_rt) { - RTFREE(ipsrcchk_rt.ro_rt); - ipsrcchk_rt.ro_rt = 0; - } -#endif + /* free fragments that need to be freed */ + if (!MBUFQ_EMPTY(&dfq6)) + MBUFQ_DRAIN(&dfq6); - splx(s); -#ifdef __APPLE__ - (void) thread_set_funneled(funnel_state); -#endif + frag6_icmp6_timeex_error(&diq6); + + VERIFY(MBUFQ_EMPTY(&dfq6)); + VERIFY(MBUFQ_EMPTY(&diq6)); +} + +static void +frag6_sched_timeout(void) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + + if (!frag6_timeout_run && frag6_nfragpackets > 0) { + frag6_timeout_run = 1; + timeout(frag6_timeout, NULL, hz); + } } /* * Drain off all datagram fragments. */ void -frag6_drain() +frag6_drain(void) { - if (frag6_doing_reass) - return; + struct fq6_head dfq6, diq6; + + MBUFQ_INIT(&dfq6); /* for deferred frees */ + MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ + + lck_mtx_lock(&ip6qlock); while (ip6q.ip6q_next != &ip6q) { ip6stat.ip6s_fragdropped++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(ip6q.ip6q_next); + frag6_freef(ip6q.ip6q_next, &dfq6, &diq6); + } + lck_mtx_unlock(&ip6qlock); + + /* free fragments that need to be freed */ + if (!MBUFQ_EMPTY(&dfq6)) + MBUFQ_DRAIN(&dfq6); + + frag6_icmp6_timeex_error(&diq6); + + VERIFY(MBUFQ_EMPTY(&dfq6)); + VERIFY(MBUFQ_EMPTY(&diq6)); +} + +static struct ip6q * +ip6q_alloc(int how) +{ + struct mbuf *t; + struct ip6q *q6; + + /* + * See comments in ip6q_updateparams(). Keep the count separate + * from frag6_nfragpackets since the latter represents the elements + * already in the reassembly queues. + */ + if (ip6q_limit > 0 && ip6q_count > ip6q_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ip6q_count, 1); + q6 = mtod(t, struct ip6q *); + bzero(q6, sizeof (*q6)); + } else { + q6 = NULL; + } + return (q6); +} + +static void +ip6q_free(struct ip6q *q6) +{ + (void) m_free(dtom(q6)); + atomic_add_32(&ip6q_count, -1); +} + +static struct ip6asfrag * +ip6af_alloc(int how) +{ + struct mbuf *t; + struct ip6asfrag *af6; + + /* + * See comments in ip6q_updateparams(). Keep the count separate + * from frag6_nfrags since the latter represents the elements + * already in the reassembly queues. + */ + if (ip6af_limit > 0 && ip6af_count > ip6af_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ip6af_count, 1); + af6 = mtod(t, struct ip6asfrag *); + bzero(af6, sizeof (*af6)); + } else { + af6 = NULL; + } + return (af6); +} + +static void +ip6af_free(struct ip6asfrag *af6) +{ + (void) m_free(dtom(af6)); + atomic_add_32(&ip6af_count, -1); +} + +static void +ip6q_updateparams(void) +{ + lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED); + /* + * -1 for unlimited allocation. + */ + if (ip6_maxfragpackets < 0) + ip6q_limit = 0; + if (ip6_maxfrags < 0) + ip6af_limit = 0; + /* + * Positive number for specific bound. + */ + if (ip6_maxfragpackets > 0) + ip6q_limit = ip6_maxfragpackets; + if (ip6_maxfrags > 0) + ip6af_limit = ip6_maxfrags; + /* + * Zero specifies no further fragment queue allocation -- set the + * bound very low, but rely on implementation elsewhere to actually + * prevent allocation and reclaim current queues. + */ + if (ip6_maxfragpackets == 0) + ip6q_limit = 1; + if (ip6_maxfrags == 0) + ip6af_limit = 1; + /* + * Arm the purge timer if not already and if there's work to do + */ + frag6_sched_timeout(); +} + +static int +sysctl_maxfragpackets SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ip6qlock); + i = ip6_maxfragpackets; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; + } + ip6_maxfragpackets = i; + ip6q_updateparams(); +done: + lck_mtx_unlock(&ip6qlock); + return (error); +} + +static int +sysctl_maxfrags SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ip6qlock); + i = ip6_maxfrags; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; } + ip6_maxfrags= i; + ip6q_updateparams(); /* see if we need to arm timer */ +done: + lck_mtx_unlock(&ip6qlock); + return (error); }