X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..a39ff7e25e19b3a8c3020042a3872ca9ec9659f1:/bsd/netinet6/frag6.c diff --git a/bsd/netinet6/frag6.c b/bsd/netinet6/frag6.c index b6b68b920..5bdb1adf3 100644 --- a/bsd/netinet6/frag6.c +++ b/bsd/netinet6/frag6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2010 Apple Inc. All rights reserved. + * Copyright (c) 2000-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -78,51 +79,166 @@ #include #include #include +#include #include #include #include #include +#include /* * Define it to get a correct behavior on per-interface statistics. - * You will need to perform an extra routing table lookup, per fragment, - * to do it. This may, or may not be, a performance hit. */ #define IN6_IFSTAT_STRICT +MBUFQ_HEAD(fq6_head); + +static void frag6_save_context(struct mbuf *, int); +static void frag6_scrub_context(struct mbuf *); +static int frag6_restore_context(struct mbuf *); + +static void frag6_icmp6_paramprob_error(struct fq6_head *); +static void frag6_icmp6_timeex_error(struct fq6_head *); + static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *); static void frag6_deq(struct ip6asfrag *); static void frag6_insque(struct ip6q *, struct ip6q *); static void frag6_remque(struct ip6q *); -static void frag6_freef(struct ip6q *); +static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *); + +static int frag6_timeout_run; /* frag6 timer is scheduled to run */ +static void frag6_timeout(void *); +static void frag6_sched_timeout(void); + +static struct ip6q *ip6q_alloc(int); +static void ip6q_free(struct ip6q *); +static void ip6q_updateparams(void); +static struct ip6asfrag *ip6af_alloc(int); +static void ip6af_free(struct ip6asfrag *); + +decl_lck_mtx_data(static, ip6qlock); +static lck_attr_t *ip6qlock_attr; +static lck_grp_t *ip6qlock_grp; +static lck_grp_attr_t *ip6qlock_grp_attr; + +/* IPv6 fragment reassembly queues (protected by ip6qlock) */ +static struct ip6q ip6q; /* ip6 reassembly queues */ +static int ip6_maxfragpackets; /* max packets in reass queues */ +static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */ +static int ip6_maxfrags; /* max fragments in reass queues */ +static u_int32_t frag6_nfrags; /* # of fragments in reass queues */ +static u_int32_t ip6q_limit; /* ip6q allocation limit */ +static u_int32_t ip6q_count; /* current # of allocated ip6q's */ +static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */ +static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */ + +static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS; +static int sysctl_maxfrags SYSCTL_HANDLER_ARGS; + +SYSCTL_DECL(_net_inet6_ip6); + +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0, + sysctl_maxfragpackets, "I", + "Maximum number of IPv6 fragment reassembly queue entries"); + +SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets, + CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0, + "Current number of IPv6 fragment reassembly queue entries"); + +SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0, + sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed"); -/* XXX we eventually need splreass6, or some real semaphore */ -int frag6_doing_reass; -u_int frag6_nfragpackets; -static u_int frag6_nfrags; -struct ip6q ip6q; /* ip6 reassemble queue */ - - -extern lck_mtx_t *inet6_domain_mutex; /* * Initialise reassembly queue and fragment identifier. */ void -frag6_init() +frag6_init(void) { - struct timeval tv; + /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */ + _CASSERT(sizeof (struct ip6q) <= _MLEN); + /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */ + _CASSERT(sizeof (struct ip6asfrag) <= _MLEN); + + /* IPv6 fragment reassembly queue lock */ + ip6qlock_grp_attr = lck_grp_attr_alloc_init(); + ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr); + ip6qlock_attr = lck_attr_alloc_init(); + lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr); + + lck_mtx_lock(&ip6qlock); + /* Initialize IPv6 reassembly queue. */ + ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; + /* same limits as IPv4 */ ip6_maxfragpackets = nmbclusters / 32; - ip6_maxfrags = nmbclusters / 4; + ip6_maxfrags = ip6_maxfragpackets * 2; + ip6q_updateparams(); + lck_mtx_unlock(&ip6qlock); +} - /* - * in many cases, random() here does NOT return random number - * as initialization during bootstrap time occur in fixed order. - */ - microtime(&tv); - ip6_id = random() ^ tv.tv_usec; - ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q; +static void +frag6_save_context(struct mbuf *m, int val) +{ + m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val; +} + +static void +frag6_scrub_context(struct mbuf *m) +{ + m->m_pkthdr.pkt_hdr = NULL; +} + +static int +frag6_restore_context(struct mbuf *m) +{ + return ((int)m->m_pkthdr.pkt_hdr); +} + +/* + * Send any deferred ICMP param problem error messages; caller must not be + * holding ip6qlock and is expected to have saved the per-packet parameter + * value via frag6_save_context(). + */ +static void +frag6_icmp6_paramprob_error(struct fq6_head *diq6) +{ + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); + + if (!MBUFQ_EMPTY(diq6)) { + struct mbuf *merr, *merr_tmp; + int param; + MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) { + MBUFQ_REMOVE(diq6, merr); + MBUFQ_NEXT(merr) = NULL; + param = frag6_restore_context(merr); + frag6_scrub_context(merr); + icmp6_error(merr, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, param); + } + } +} + +/* + * Send any deferred ICMP time exceeded error messages; + * caller must not be holding ip6qlock. + */ +static void +frag6_icmp6_timeex_error(struct fq6_head *diq6) +{ + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED); + + if (!MBUFQ_EMPTY(diq6)) { + struct mbuf *m, *m_tmp; + MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) { + MBUFQ_REMOVE(diq6, m); + MBUFQ_NEXT(m) = NULL; + icmp6_error_flag(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0); + } + } } /* @@ -156,8 +272,6 @@ frag6_init() */ /* * Fragment input - * NOTE: this function is called with the inet6_domain_mutex held from ip6_input. - * inet6_domain_mutex is protecting he frag6 queue manipulation. */ int frag6_input(struct mbuf **mp, int *offp, int proto) @@ -171,58 +285,48 @@ frag6_input(struct mbuf **mp, int *offp, int proto) int offset = *offp, nxt, i, next; int first_frag = 0; int fragoff, frgpartlen; /* must be larger than u_int16_t */ - struct ifnet *dstifp; - struct ifaddr *ifa = NULL; + struct ifnet *dstifp = NULL; u_int8_t ecn, ecn0; + uint32_t csum, csum_flags; + struct fq6_head diq6; + int locked = 0; -#ifdef IN6_IFSTAT_STRICT - struct route_in6 ro; - struct sockaddr_in6 *dst; -#endif + VERIFY(m->m_flags & M_PKTHDR); + + MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */ + + /* Expect 32-bit aligned data pointer on strict-align platforms */ + MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m); ip6 = mtod(m, struct ip6_hdr *); -#ifndef PULLDOWN_TEST - IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), return IPPROTO_DONE); + IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done); ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); -#else - IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); - if (ip6f == NULL) - return IPPROTO_DONE; -#endif - dstifp = NULL; #ifdef IN6_IFSTAT_STRICT /* find the destination interface of the packet. */ - bzero(&ro, sizeof (ro)); - dst = (struct sockaddr_in6 *)&ro.ro_dst; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof (struct sockaddr_in6); - dst->sin6_addr = ip6->ip6_dst; - - rtalloc((struct route *)&ro); - if (ro.ro_rt != NULL) { - RT_LOCK(ro.ro_rt); - if ((ifa = ro.ro_rt->rt_ifa) != NULL) { - IFA_ADDREF(ifa); - dstifp = ((struct in6_ifaddr *)ro.ro_rt->rt_ifa)->ia_ifp; + if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) { + uint32_t idx; + + if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) { + if (idx > 0 && idx <= if_index) { + ifnet_head_lock_shared(); + dstifp = ifindex2ifnet[idx]; + ifnet_head_done(); + } } - RT_UNLOCK(ro.ro_rt); - rtfree(ro.ro_rt); - ro.ro_rt = NULL; } -#else - /* we are violating the spec, this is not the destination interface */ - if ((m->m_flags & M_PKTHDR) != 0) +#endif /* IN6_IFSTAT_STRICT */ + + /* we are violating the spec, this may not be the dst interface */ + if (dstifp == NULL) dstifp = m->m_pkthdr.rcvif; -#endif /* jumbo payload can't contain a fragment header */ if (ip6->ip6_plen == 0) { icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); in6_ifstat_inc(dstifp, ifs6_reass_fail); - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + m = NULL; + goto done; } /* @@ -233,32 +337,118 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { - icmp6_error(m, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - offsetof(struct ip6_hdr, ip6_plen)); + icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, + offsetof(struct ip6_hdr, ip6_plen)); in6_ifstat_inc(dstifp, ifs6_reass_fail); - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + m = NULL; + goto done; + } + + /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */ + if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) { + ip6stat.ip6s_fragments++; + ip6stat.ip6s_fragdropped++; + in6_ifstat_inc(dstifp, ifs6_reass_fail); + m_freem(m); + m = NULL; + goto done; } - ip6stat.ip6s_fragments++; - in6_ifstat_inc(dstifp, ifs6_reass_reqd); - /* offset now points to data portion */ offset += sizeof(struct ip6_frag); - frag6_doing_reass = 1; + /* + * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0) + * upfront, unrelated to any reassembly. Just skip the fragment header. + */ + if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { + /* + * In ICMPv6 processing, we drop certain + * NDP messages that are not expected to + * have fragment header based on recommendations + * against security vulnerability as described in + * RFC 6980. + * We set PKTF_REASSEMBLED flag to let ICMPv6 NDP + * drop such packets. + * However there are already devices running software + * that are creating interface with MTU < IPv6 Min + * MTU. We should not have allowed that but they are + * out, and sending atomic NDP fragments. + * For that reason, we do not set the same flag here + * and relax the check. + */ + ip6stat.ip6s_atmfrag_rcvd++; + in6_ifstat_inc(dstifp, ifs6_atmfrag_rcvd); + *offp = offset; + return (ip6f->ip6f_nxt); + } /* - * Enforce upper bound on number of fragments. - * If maxfrag is 0, never accept fragments. - * If maxfrag is -1, accept all fragments without limitation. + * Leverage partial checksum offload for simple UDP/IP fragments, + * as that is the most common case. + * + * Perform 1's complement adjustment of octets that got included/ + * excluded in the hardware-calculated checksum value. Also take + * care of any trailing bytes and subtract out their partial sum. */ - if (ip6_maxfrags < 0) - ; - else if (frag6_nfrags >= (u_int)ip6_maxfrags) - goto dropfrag; + if (ip6f->ip6f_nxt == IPPROTO_UDP && + offset == (sizeof (*ip6) + sizeof (*ip6f)) && + (m->m_pkthdr.csum_flags & + (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PARTIAL)) { + uint32_t start = m->m_pkthdr.csum_rx_start; + uint32_t ip_len = (sizeof (*ip6) + ntohs(ip6->ip6_plen)); + int32_t trailer = (m_pktlen(m) - ip_len); + uint32_t swbytes = (uint32_t)trailer; + + csum = m->m_pkthdr.csum_rx_val; + + ASSERT(trailer >= 0); + if (start != offset || trailer != 0) { + uint16_t s = 0, d = 0; + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) { + s = ip6->ip6_src.s6_addr16[1]; + ip6->ip6_src.s6_addr16[1] = 0 ; + } + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) { + d = ip6->ip6_dst.s6_addr16[1]; + ip6->ip6_dst.s6_addr16[1] = 0; + } + + /* callee folds in sum */ + csum = m_adj_sum16(m, start, offset, + (ip_len - offset), csum); + if (offset > start) + swbytes += (offset - start); + else + swbytes += (start - offset); + + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) + ip6->ip6_src.s6_addr16[1] = s; + if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) + ip6->ip6_dst.s6_addr16[1] = d; + + } + csum_flags = m->m_pkthdr.csum_flags; + + if (swbytes != 0) + udp_in6_cksum_stats(swbytes); + if (trailer != 0) + m_adj(m, -trailer); + } else { + csum = 0; + csum_flags = 0; + } + + /* Invalidate checksum */ + m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID; + + ip6stat.ip6s_fragments++; + in6_ifstat_inc(dstifp, ifs6_reass_reqd); + + lck_mtx_lock(&ip6qlock); + locked = 1; for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) if (ip6f->ip6f_ident == q6->ip6q_ident && @@ -272,24 +462,12 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ first_frag = 1; - /* - * Enforce upper bound on number of fragmented packets - * for which we attempt reassembly; - * If maxfrag is 0, never accept fragments. - * If maxfrag is -1, accept all fragments without limitation. - */ - if (ip6_maxfragpackets < 0) - ; - else if (frag6_nfragpackets >= (u_int)ip6_maxfragpackets) - goto dropfrag; - frag6_nfragpackets++; - q6 = (struct ip6q *)_MALLOC(sizeof(struct ip6q), M_FTABLE, - M_DONTWAIT); + q6 = ip6q_alloc(M_DONTWAIT); if (q6 == NULL) goto dropfrag; - bzero(q6, sizeof(*q6)); frag6_insque(q6, &ip6q); + frag6_nfragpackets++; /* ip6q_nxt will be filled afterwards, from 1st fragment */ q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; @@ -297,14 +475,23 @@ frag6_input(struct mbuf **mp, int *offp, int proto) q6->ip6q_nxtp = (u_char *)nxtp; #endif q6->ip6q_ident = ip6f->ip6f_ident; - q6->ip6q_ttl = IPV6_FRAGTTL; + q6->ip6q_ttl = IPV6_FRAGTTL; q6->ip6q_src = ip6->ip6_src; q6->ip6q_dst = ip6->ip6_dst; q6->ip6q_ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ - q6->ip6q_nfrag = 0; + q6->ip6q_nfrag = 0; + + /* + * If the first fragment has valid checksum offload + * info, the rest of fragments are eligible as well. + */ + if (csum_flags != 0) { + q6->ip6q_csum = csum; + q6->ip6q_csum_flags = csum_flags; + } } /* @@ -313,8 +500,8 @@ frag6_input(struct mbuf **mp, int *offp, int proto) */ fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); if (fragoff == 0) { - q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - - sizeof(struct ip6_frag); + q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - + sizeof(struct ip6_frag); q6->ip6q_nxt = ip6f->ip6f_nxt; } @@ -327,23 +514,22 @@ frag6_input(struct mbuf **mp, int *offp, int proto) if (q6->ip6q_unfrglen >= 0) { /* The 1st fragment has already arrived. */ if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { + lck_mtx_unlock(&ip6qlock); + locked = 0; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offset - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return(IPPROTO_DONE); + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + m = NULL; + goto done; } - } - else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { + lck_mtx_unlock(&ip6qlock); + locked = 0; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, - offset - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return(IPPROTO_DONE); + offset - sizeof(struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + m = NULL; + goto done; } /* * If it's the first fragment, do the above check for each @@ -362,7 +548,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) /* dequeue the fragment. */ frag6_deq(af6); - FREE(af6, M_FTABLE); + ip6af_free(af6); /* adjust pointer. */ ip6err = mtod(merr, struct ip6_hdr *); @@ -374,19 +560,19 @@ frag6_input(struct mbuf **mp, int *offp, int proto) ip6err->ip6_src = q6->ip6q_src; ip6err->ip6_dst = q6->ip6q_dst; - icmp6_error(merr, ICMP6_PARAM_PROB, - ICMP6_PARAMPROB_HEADER, - erroff - sizeof(struct ip6_frag) + - offsetof(struct ip6_frag, ip6f_offlg)); + frag6_save_context(merr, + erroff - sizeof (struct ip6_frag) + + offsetof(struct ip6_frag, ip6f_offlg)); + + MBUFQ_ENQUEUE(&diq6, merr); } } } - ip6af = (struct ip6asfrag *)_MALLOC(sizeof(struct ip6asfrag), M_FTABLE, - M_DONTWAIT); + ip6af = ip6af_alloc(M_DONTWAIT); if (ip6af == NULL) goto dropfrag; - bzero(ip6af, sizeof(*ip6af)); + ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; ip6af->ip6af_off = fragoff; ip6af->ip6af_frglen = frgpartlen; @@ -407,14 +593,14 @@ frag6_input(struct mbuf **mp, int *offp, int proto) ecn0 = q6->ip6q_ecn; if (ecn == IPTOS_ECN_CE) { if (ecn0 == IPTOS_ECN_NOTECT) { - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } if (ecn0 != IPTOS_ECN_CE) q6->ip6q_ecn = IPTOS_ECN_CE; } if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } @@ -431,6 +617,9 @@ frag6_input(struct mbuf **mp, int *offp, int proto) * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. + * + * If some of the data is dropped from the preceding + * segment, then it's checksum is invalidated. */ if (af6->ip6af_up != (struct ip6asfrag *)q6) { i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen @@ -439,6 +628,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) if (i >= ip6af->ip6af_frglen) goto dropfrag; m_adj(IP6_REASS_MBUF(ip6af), i); + q6->ip6q_csum_flags = 0; ip6af->ip6af_off += i; ip6af->ip6af_frglen -= i; } @@ -455,6 +645,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) af6->ip6af_frglen -= i; af6->ip6af_off += i; m_adj(IP6_REASS_MBUF(af6), i); + q6->ip6q_csum_flags = 0; break; } af6 = af6->ip6af_down; @@ -481,7 +672,7 @@ frag6_input(struct mbuf **mp, int *offp, int proto) "overlaps the previous fragment\n", i, ip6_sprintf(&q6->ip6q_src)); #endif - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } } @@ -493,12 +684,22 @@ frag6_input(struct mbuf **mp, int *offp, int proto) "overlaps the succeeding fragment", i, ip6_sprintf(&q6->ip6q_src)); #endif - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); goto dropfrag; } } #endif + /* + * If this fragment contains similar checksum offload info + * as that of the existing ones, accumulate checksum. Otherwise, + * invalidate checksum offload info for the entire datagram. + */ + if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) + q6->ip6q_csum += csum; + else if (q6->ip6q_csum_flags != 0) + q6->ip6q_csum_flags = 0; + insert: /* @@ -520,18 +721,18 @@ insert: for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = af6->ip6af_down) { if (af6->ip6af_off != next) { - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + lck_mtx_unlock(&ip6qlock); + locked = 0; + m = NULL; + goto done; } next += af6->ip6af_frglen; } if (af6->ip6af_up->ip6af_mff) { - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + lck_mtx_unlock(&ip6qlock); + locked = 0; + m = NULL; + goto done; } /* @@ -548,13 +749,34 @@ insert: t = t->m_next; t->m_next = IP6_REASS_MBUF(af6); m_adj(t->m_next, af6->ip6af_offset); - FREE(af6, M_FTABLE); + ip6af_free(af6); af6 = af6dwn; } + /* + * Store partial hardware checksum info from the fragment queue; + * the receive start offset is set to 40 bytes (see code at the + * top of this routine.) + */ + if (q6->ip6q_csum_flags != 0) { + csum = q6->ip6q_csum; + + ADDCARRY(csum); + + m->m_pkthdr.csum_rx_val = csum; + m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr); + m->m_pkthdr.csum_flags = q6->ip6q_csum_flags; + } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || + (m->m_pkthdr.pkt_flags & PKTF_LOOP)) { + /* loopback checksums are always OK */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL; + m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + } + /* adjust offset to point where the original next header starts */ offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); - FREE(ip6af, M_FTABLE); + ip6af_free(ip6af); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); ip6->ip6_src = q6->ip6q_src; @@ -563,7 +785,7 @@ insert: ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); nxt = q6->ip6q_nxt; -#if notyet +#ifdef notyet *q6->ip6q_nxtp = (u_char)(nxt & 0xff); #endif @@ -571,16 +793,16 @@ insert: if (m->m_len >= offset + sizeof(struct ip6_frag)) { /* This is the only possible case with !PULLDOWN_TEST */ ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag), - offset); + offset); m->m_data += sizeof(struct ip6_frag); m->m_len -= sizeof(struct ip6_frag); } else { /* this comes with no copy if the boundary is on cluster */ if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) { frag6_remque(q6); - frag6_nfrags -= q6->ip6q_nfrag; - FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); goto dropfrag; } m_adj(t, sizeof(struct ip6_frag)); @@ -596,40 +818,65 @@ insert: } frag6_remque(q6); - frag6_nfrags -= q6->ip6q_nfrag; - FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); - if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ - int plen = 0; - for (t = m; t; t = t->m_next) - plen += t->m_len; - m->m_pkthdr.len = plen; + if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ + m_fixhdr(m); + /* + * Mark packet as reassembled + * In ICMPv6 processing, we drop certain + * NDP messages that are not expected to + * have fragment header based on recommendations + * against security vulnerability as described in + * RFC 6980. + */ + m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED; } - ip6stat.ip6s_reassembled++; - in6_ifstat_inc(dstifp, ifs6_reass_ok); /* * Tell launch routine the next header */ - *mp = m; *offp = offset; - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return nxt; - - dropfrag: - in6_ifstat_inc(dstifp, ifs6_reass_fail); + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + in6_ifstat_inc(dstifp, ifs6_reass_ok); + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (nxt); + +done: + VERIFY(m == NULL); + if (!locked) { + if (frag6_nfragpackets == 0) { + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); + } + lck_mtx_lock(&ip6qlock); + } + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); + +dropfrag: ip6stat.ip6s_fragdropped++; + /* arm the purge timer if not already and if there's work to do */ + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + in6_ifstat_inc(dstifp, ifs6_reass_fail); m_freem(m); - frag6_doing_reass = 0; - if (ifa != NULL) - IFA_REMREF(ifa); - return IPPROTO_DONE; + frag6_icmp6_paramprob_error(&diq6); + VERIFY(MBUFQ_EMPTY(&diq6)); + return (IPPROTO_DONE); } /* @@ -637,11 +884,12 @@ insert: * associated datagrams. */ void -frag6_freef(q6) - struct ip6q *q6; +frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6) { struct ip6asfrag *af6, *down6; + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; af6 = down6) { struct mbuf *m = IP6_REASS_MBUF(af6); @@ -662,17 +910,18 @@ frag6_freef(q6) /* restore source and destination addresses */ ip6->ip6_src = q6->ip6q_src; ip6->ip6_dst = q6->ip6q_dst; - icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_REASSEMBLY, 0); - } else - m_freem(m); - FREE(af6, M_FTABLE); + + MBUFQ_ENQUEUE(diq6, m); + } else { + MBUFQ_ENQUEUE(dfq6, m); + } + ip6af_free(af6); } frag6_remque(q6); - frag6_nfrags -= q6->ip6q_nfrag; - FREE(q6, M_FTABLE); frag6_nfragpackets--; + frag6_nfrags -= q6->ip6q_nfrag; + ip6q_free(q6); } /* @@ -680,9 +929,10 @@ frag6_freef(q6) * Like insque, but pointers in middle of structure. */ void -frag6_enq(af6, up6) - struct ip6asfrag *af6, *up6; +frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6) { + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + af6->ip6af_up = up6; af6->ip6af_down = up6->ip6af_down; up6->ip6af_down->ip6af_up = af6; @@ -693,17 +943,19 @@ frag6_enq(af6, up6) * To frag6_enq as remque is to insque. */ void -frag6_deq(af6) - struct ip6asfrag *af6; +frag6_deq(struct ip6asfrag *af6) { + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + af6->ip6af_up->ip6af_down = af6->ip6af_down; af6->ip6af_down->ip6af_up = af6->ip6af_up; } void -frag6_insque(new, old) - struct ip6q *new, *old; +frag6_insque(struct ip6q *new, struct ip6q *old) { + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + new->ip6q_prev = old; new->ip6q_next = old->ip6q_next; old->ip6q_next->ip6q_prev= new; @@ -711,9 +963,10 @@ frag6_insque(new, old) } void -frag6_remque(p6) - struct ip6q *p6; +frag6_remque(struct ip6q *p6) { + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + p6->ip6q_prev->ip6q_next = p6->ip6q_next; p6->ip6q_next->ip6q_prev = p6->ip6q_prev; } @@ -723,13 +976,24 @@ frag6_remque(p6) * if a timer expires on a reassembly * queue, discard it. */ -void -frag6_slowtimo() +static void +frag6_timeout(void *arg) { +#pragma unused(arg) + struct fq6_head dfq6, diq6; struct ip6q *q6; - lck_mtx_lock(inet6_domain_mutex); - frag6_doing_reass = 1; + MBUFQ_INIT(&dfq6); /* for deferred frees */ + MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ + + /* + * Update coarse-grained networking timestamp (in sec.); the idea + * is to piggy-back on the timeout callout to update the counter + * returnable via net_uptime(). + */ + net_update_uptime(); + + lck_mtx_lock(&ip6qlock); q6 = ip6q.ip6q_next; if (q6) while (q6 != &ip6q) { @@ -738,7 +1002,7 @@ frag6_slowtimo() if (q6->ip6q_prev->ip6q_ttl == 0) { ip6stat.ip6s_fragtimeout++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(q6->ip6q_prev); + frag6_freef(q6->ip6q_prev, &dfq6, &diq6); } } /* @@ -746,29 +1010,208 @@ frag6_slowtimo() * (due to the limit being lowered), drain off * enough to get down to the new limit. */ - while (frag6_nfragpackets > (u_int)ip6_maxfragpackets && - ip6q.ip6q_prev) { - ip6stat.ip6s_fragoverflow++; - /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(ip6q.ip6q_prev); + if (ip6_maxfragpackets >= 0) { + while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets && + ip6q.ip6q_prev) { + ip6stat.ip6s_fragoverflow++; + /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ + frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6); + } + } + /* re-arm the purge timer if there's work to do */ + frag6_timeout_run = 0; + frag6_sched_timeout(); + lck_mtx_unlock(&ip6qlock); + + /* free fragments that need to be freed */ + if (!MBUFQ_EMPTY(&dfq6)) + MBUFQ_DRAIN(&dfq6); + + frag6_icmp6_timeex_error(&diq6); + + VERIFY(MBUFQ_EMPTY(&dfq6)); + VERIFY(MBUFQ_EMPTY(&diq6)); +} + +static void +frag6_sched_timeout(void) +{ + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + + if (!frag6_timeout_run && frag6_nfragpackets > 0) { + frag6_timeout_run = 1; + timeout(frag6_timeout, NULL, hz); } - frag6_doing_reass = 0; - lck_mtx_unlock(inet6_domain_mutex); } /* * Drain off all datagram fragments. */ void -frag6_drain() +frag6_drain(void) { - if (frag6_doing_reass) - return; - lck_mtx_lock(inet6_domain_mutex); + struct fq6_head dfq6, diq6; + + MBUFQ_INIT(&dfq6); /* for deferred frees */ + MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */ + + lck_mtx_lock(&ip6qlock); while (ip6q.ip6q_next != &ip6q) { ip6stat.ip6s_fragdropped++; /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ - frag6_freef(ip6q.ip6q_next); + frag6_freef(ip6q.ip6q_next, &dfq6, &diq6); + } + lck_mtx_unlock(&ip6qlock); + + /* free fragments that need to be freed */ + if (!MBUFQ_EMPTY(&dfq6)) + MBUFQ_DRAIN(&dfq6); + + frag6_icmp6_timeex_error(&diq6); + + VERIFY(MBUFQ_EMPTY(&dfq6)); + VERIFY(MBUFQ_EMPTY(&diq6)); +} + +static struct ip6q * +ip6q_alloc(int how) +{ + struct mbuf *t; + struct ip6q *q6; + + /* + * See comments in ip6q_updateparams(). Keep the count separate + * from frag6_nfragpackets since the latter represents the elements + * already in the reassembly queues. + */ + if (ip6q_limit > 0 && ip6q_count > ip6q_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ip6q_count, 1); + q6 = mtod(t, struct ip6q *); + bzero(q6, sizeof (*q6)); + } else { + q6 = NULL; + } + return (q6); +} + +static void +ip6q_free(struct ip6q *q6) +{ + (void) m_free(dtom(q6)); + atomic_add_32(&ip6q_count, -1); +} + +static struct ip6asfrag * +ip6af_alloc(int how) +{ + struct mbuf *t; + struct ip6asfrag *af6; + + /* + * See comments in ip6q_updateparams(). Keep the count separate + * from frag6_nfrags since the latter represents the elements + * already in the reassembly queues. + */ + if (ip6af_limit > 0 && ip6af_count > ip6af_limit) + return (NULL); + + t = m_get(how, MT_FTABLE); + if (t != NULL) { + atomic_add_32(&ip6af_count, 1); + af6 = mtod(t, struct ip6asfrag *); + bzero(af6, sizeof (*af6)); + } else { + af6 = NULL; + } + return (af6); +} + +static void +ip6af_free(struct ip6asfrag *af6) +{ + (void) m_free(dtom(af6)); + atomic_add_32(&ip6af_count, -1); +} + +static void +ip6q_updateparams(void) +{ + LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED); + /* + * -1 for unlimited allocation. + */ + if (ip6_maxfragpackets < 0) + ip6q_limit = 0; + if (ip6_maxfrags < 0) + ip6af_limit = 0; + /* + * Positive number for specific bound. + */ + if (ip6_maxfragpackets > 0) + ip6q_limit = ip6_maxfragpackets; + if (ip6_maxfrags > 0) + ip6af_limit = ip6_maxfrags; + /* + * Zero specifies no further fragment queue allocation -- set the + * bound very low, but rely on implementation elsewhere to actually + * prevent allocation and reclaim current queues. + */ + if (ip6_maxfragpackets == 0) + ip6q_limit = 1; + if (ip6_maxfrags == 0) + ip6af_limit = 1; + /* + * Arm the purge timer if not already and if there's work to do + */ + frag6_sched_timeout(); +} + +static int +sysctl_maxfragpackets SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ip6qlock); + i = ip6_maxfragpackets; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; + } + ip6_maxfragpackets = i; + ip6q_updateparams(); +done: + lck_mtx_unlock(&ip6qlock); + return (error); +} + +static int +sysctl_maxfrags SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error, i; + + lck_mtx_lock(&ip6qlock); + i = ip6_maxfrags; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == USER_ADDR_NULL) + goto done; + /* impose bounds */ + if (i < -1 || i > (nmbclusters / 4)) { + error = EINVAL; + goto done; } - lck_mtx_unlock(inet6_domain_mutex); + ip6_maxfrags= i; + ip6q_updateparams(); /* see if we need to arm timer */ +done: + lck_mtx_unlock(&ip6qlock); + return (error); }