bsd/netinet6/frag6.c

   1 /*
   2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*      $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $      */
  30 /*      $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $  */
  31
  32 /*
  33  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  34  * All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. Neither the name of the project nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  */
  60
  61 #include <sys/param.h>
  62 #include <sys/systm.h>
  63 #include <sys/malloc.h>
  64 #include <sys/mcache.h>
  65 #include <sys/mbuf.h>
  66 #include <sys/domain.h>
  67 #include <sys/protosw.h>
  68 #include <sys/socket.h>
  69 #include <sys/errno.h>
  70 #include <sys/time.h>
  71 #include <sys/kernel.h>
  72 #include <sys/syslog.h>
  73 #include <kern/queue.h>
  74 #include <kern/locks.h>
  75
  76 #include <net/if.h>
  77 #include <net/route.h>
  78
  79 #include <netinet/in.h>
  80 #include <netinet/in_var.h>
  81 #include <netinet/ip.h>
  82 #include <netinet/ip_var.h>
  83 #include <netinet/ip6.h>
  84 #include <netinet6/ip6_var.h>
  85 #include <netinet/icmp6.h>
  86
  87 #include <net/net_osdep.h>
  88 #include <dev/random/randomdev.h>
  89
  90 /*
  91  * Define it to get a correct behavior on per-interface statistics.
  92  */
  93 #define IN6_IFSTAT_STRICT
  94 struct  ip6asfrag {
  95         struct ip6asfrag *ip6af_down;
  96         struct ip6asfrag *ip6af_up;
  97         struct mbuf     *ip6af_m;
  98         int             ip6af_offset;   /* offset in ip6af_m to next header */
  99         int             ip6af_frglen;   /* fragmentable part length */
 100         int             ip6af_off;      /* fragment offset */
 101         u_int16_t       ip6af_mff;      /* more fragment bit in frag off */
 102 };
 103
 104 #define IP6_REASS_MBUF(ip6af) ((ip6af)->ip6af_m)
 105
 106 MBUFQ_HEAD(fq6_head);
 107
 108 static void frag6_save_context(struct mbuf *, int);
 109 static void frag6_scrub_context(struct mbuf *);
 110 static int frag6_restore_context(struct mbuf *);
 111
 112 static void frag6_icmp6_paramprob_error(struct fq6_head *);
 113 static void frag6_icmp6_timeex_error(struct fq6_head *);
 114
 115 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
 116 static void frag6_deq(struct ip6asfrag *);
 117 static void frag6_insque(struct ip6q *, struct ip6q *);
 118 static void frag6_remque(struct ip6q *);
 119 static void frag6_purgef(struct ip6q *, struct fq6_head *, struct fq6_head *);
 120 static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *);
 121
 122 static int frag6_timeout_run;           /* frag6 timer is scheduled to run */
 123 static void frag6_timeout(void *);
 124 static void frag6_sched_timeout(void);
 125
 126 static struct ip6q *ip6q_alloc(int);
 127 static void ip6q_free(struct ip6q *);
 128 static void ip6q_updateparams(void);
 129 static struct ip6asfrag *ip6af_alloc(int);
 130 static void ip6af_free(struct ip6asfrag *);
 131
 132 decl_lck_mtx_data(static, ip6qlock);
 133 static lck_attr_t       *ip6qlock_attr;
 134 static lck_grp_t        *ip6qlock_grp;
 135 static lck_grp_attr_t   *ip6qlock_grp_attr;
 136
 137 /* IPv6 fragment reassembly queues (protected by ip6qlock) */
 138 static struct ip6q ip6q;                /* ip6 reassembly queues */
 139 static int ip6_maxfragpackets;          /* max packets in reass queues */
 140 static u_int32_t frag6_nfragpackets;    /* # of packets in reass queues */
 141 static int ip6_maxfrags;                /* max fragments in reass queues */
 142 static u_int32_t frag6_nfrags;          /* # of fragments in reass queues */
 143 static u_int32_t ip6q_limit;            /* ip6q allocation limit */
 144 static u_int32_t ip6q_count;            /* current # of allocated ip6q's */
 145 static u_int32_t ip6af_limit;           /* ip6asfrag allocation limit */
 146 static u_int32_t ip6af_count;           /* current # of allocated ip6asfrag's */
 147
 148 static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS;
 149 static int sysctl_maxfrags SYSCTL_HANDLER_ARGS;
 150
 151 SYSCTL_DECL(_net_inet6_ip6);
 152
 153 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
 154     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0,
 155     sysctl_maxfragpackets, "I",
 156     "Maximum number of IPv6 fragment reassembly queue entries");
 157
 158 SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets,
 159     CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0,
 160     "Current number of IPv6 fragment reassembly queue entries");
 161
 162 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
 163     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0,
 164     sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed");
 165
 166 /*
 167  * Initialise reassembly queue and fragment identifier.
 168  */
 169 void
 170 frag6_init(void)
 171 {
 172         /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */
 173         _CASSERT(sizeof(struct ip6q) <= _MLEN);
 174         /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */
 175         _CASSERT(sizeof(struct ip6asfrag) <= _MLEN);
 176
 177         /* IPv6 fragment reassembly queue lock */
 178         ip6qlock_grp_attr  = lck_grp_attr_alloc_init();
 179         ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr);
 180         ip6qlock_attr = lck_attr_alloc_init();
 181         lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr);
 182
 183         lck_mtx_lock(&ip6qlock);
 184         /* Initialize IPv6 reassembly queue. */
 185         ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
 186
 187         /* same limits as IPv4 */
 188         ip6_maxfragpackets = nmbclusters / 32;
 189         ip6_maxfrags = ip6_maxfragpackets * 2;
 190         ip6q_updateparams();
 191         lck_mtx_unlock(&ip6qlock);
 192 }
 193
 194 static void
 195 frag6_save_context(struct mbuf *m, int val)
 196 {
 197         m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val;
 198 }
 199
 200 static void
 201 frag6_scrub_context(struct mbuf *m)
 202 {
 203         m->m_pkthdr.pkt_hdr = NULL;
 204 }
 205
 206 static int
 207 frag6_restore_context(struct mbuf *m)
 208 {
 209         return (int)m->m_pkthdr.pkt_hdr;
 210 }
 211
 212 /*
 213  * Send any deferred ICMP param problem error messages; caller must not be
 214  * holding ip6qlock and is expected to have saved the per-packet parameter
 215  * value via frag6_save_context().
 216  */
 217 static void
 218 frag6_icmp6_paramprob_error(struct fq6_head *diq6)
 219 {
 220         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
 221
 222         if (!MBUFQ_EMPTY(diq6)) {
 223                 struct mbuf *merr, *merr_tmp;
 224                 int param;
 225                 MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) {
 226                         MBUFQ_REMOVE(diq6, merr);
 227                         MBUFQ_NEXT(merr) = NULL;
 228                         param = frag6_restore_context(merr);
 229                         frag6_scrub_context(merr);
 230                         icmp6_error(merr, ICMP6_PARAM_PROB,
 231                             ICMP6_PARAMPROB_HEADER, param);
 232                 }
 233         }
 234 }
 235
 236 /*
 237  * Send any deferred ICMP time exceeded error messages;
 238  * caller must not be holding ip6qlock.
 239  */
 240 static void
 241 frag6_icmp6_timeex_error(struct fq6_head *diq6)
 242 {
 243         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
 244
 245         if (!MBUFQ_EMPTY(diq6)) {
 246                 struct mbuf *m, *m_tmp;
 247                 MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) {
 248                         MBUFQ_REMOVE(diq6, m);
 249                         MBUFQ_NEXT(m) = NULL;
 250                         icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
 251                             ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0);
 252                 }
 253         }
 254 }
 255
 256 /*
 257  * In RFC2460, fragment and reassembly rule do not agree with each other,
 258  * in terms of next header field handling in fragment header.
 259  * While the sender will use the same value for all of the fragmented packets,
 260  * receiver is suggested not to check the consistency.
 261  *
 262  * fragment rule (p20):
 263  *      (2) A Fragment header containing:
 264  *      The Next Header value that identifies the first header of
 265  *      the Fragmentable Part of the original packet.
 266  *              -> next header field is same for all fragments
 267  *
 268  * reassembly rule (p21):
 269  *      The Next Header field of the last header of the Unfragmentable
 270  *      Part is obtained from the Next Header field of the first
 271  *      fragment's Fragment header.
 272  *              -> should grab it from the first fragment only
 273  *
 274  * The following note also contradicts with fragment rule - noone is going to
 275  * send different fragment with different next header field.
 276  *
 277  * additional note (p22):
 278  *      The Next Header values in the Fragment headers of different
 279  *      fragments of the same original packet may differ.  Only the value
 280  *      from the Offset zero fragment packet is used for reassembly.
 281  *              -> should grab it from the first fragment only
 282  *
 283  * There is no explicit reason given in the RFC.  Historical reason maybe?
 284  */
 285 /*
 286  * Fragment input
 287  */
 288 int
 289 frag6_input(struct mbuf **mp, int *offp, int proto)
 290 {
 291 #pragma unused(proto)
 292         struct mbuf *m = *mp, *t = NULL;
 293         struct ip6_hdr *ip6 = NULL;
 294         struct ip6_frag *ip6f = NULL;
 295         struct ip6q *q6 = NULL;
 296         struct ip6asfrag *af6 = NULL, *ip6af = NULL, *af6dwn = NULL;
 297         int offset = *offp, i = 0, next = 0;
 298         u_int8_t nxt = 0;
 299         int first_frag = 0;
 300         int fragoff = 0, frgpartlen = 0;        /* must be larger than u_int16_t */
 301         struct ifnet *dstifp = NULL;
 302         u_int8_t ecn = 0, ecn0 = 0;
 303         uint32_t csum = 0, csum_flags = 0;
 304         struct fq6_head diq6 = {};
 305         int locked = 0;
 306         boolean_t drop_fragq = FALSE;
 307
 308         VERIFY(m->m_flags & M_PKTHDR);
 309
 310         MBUFQ_INIT(&diq6);      /* for deferred ICMP param problem errors */
 311
 312         /* Expect 32-bit aligned data pointer on strict-align platforms */
 313         MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
 314
 315         IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done);
 316         ip6 = mtod(m, struct ip6_hdr *);
 317         ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
 318
 319 #ifdef IN6_IFSTAT_STRICT
 320         /* find the destination interface of the packet. */
 321         if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) {
 322                 uint32_t idx;
 323
 324                 if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) {
 325                         if (idx > 0 && idx <= if_index) {
 326                                 ifnet_head_lock_shared();
 327                                 dstifp = ifindex2ifnet[idx];
 328                                 ifnet_head_done();
 329                         }
 330                 }
 331         }
 332 #endif /* IN6_IFSTAT_STRICT */
 333
 334         /* we are violating the spec, this may not be the dst interface */
 335         if (dstifp == NULL) {
 336                 dstifp = m->m_pkthdr.rcvif;
 337         }
 338
 339         /* jumbo payload can't contain a fragment header */
 340         if (ip6->ip6_plen == 0) {
 341                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
 342                 in6_ifstat_inc(dstifp, ifs6_reass_fail);
 343                 m = NULL;
 344                 goto done;
 345         }
 346
 347         /*
 348          * check whether fragment packet's fragment length is
 349          * multiple of 8 octets.
 350          * sizeof(struct ip6_frag) == 8
 351          * sizeof(struct ip6_hdr) = 40
 352          */
 353         if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
 354             (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
 355                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 356                     offsetof(struct ip6_hdr, ip6_plen));
 357                 in6_ifstat_inc(dstifp, ifs6_reass_fail);
 358                 m = NULL;
 359                 goto done;
 360         }
 361
 362         /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */
 363         if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) {
 364                 ip6stat.ip6s_fragments++;
 365                 ip6stat.ip6s_fragdropped++;
 366                 in6_ifstat_inc(dstifp, ifs6_reass_fail);
 367                 m_freem(m);
 368                 m = NULL;
 369                 goto done;
 370         }
 371
 372         /* offset now points to data portion */
 373         offset += sizeof(struct ip6_frag);
 374
 375         /*
 376          * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
 377          * upfront, unrelated to any reassembly.  Just skip the fragment header.
 378          */
 379         if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
 380                 /*
 381                  * Mark packet as reassembled.
 382                  * In ICMPv6 processing, we drop certain
 383                  * NDP messages that are not expected to
 384                  * have fragment header based on recommendations
 385                  * against security vulnerability as described in
 386                  * RFC 6980.
 387                  * Treat atomic fragments as re-assembled packets as well.
 388                  */
 389                 m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED;
 390                 ip6stat.ip6s_atmfrag_rcvd++;
 391                 in6_ifstat_inc(dstifp, ifs6_atmfrag_rcvd);
 392                 *mp = m;
 393                 *offp = offset;
 394                 return ip6f->ip6f_nxt;
 395         }
 396
 397         /*
 398          * Leverage partial checksum offload for simple UDP/IP fragments,
 399          * as that is the most common case.
 400          *
 401          * Perform 1's complement adjustment of octets that got included/
 402          * excluded in the hardware-calculated checksum value.  Also take
 403          * care of any trailing bytes and subtract out their partial sum.
 404          */
 405         if (ip6f->ip6f_nxt == IPPROTO_UDP &&
 406             offset == (sizeof(*ip6) + sizeof(*ip6f)) &&
 407             (m->m_pkthdr.csum_flags &
 408             (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
 409             (CSUM_DATA_VALID | CSUM_PARTIAL)) {
 410                 uint32_t start = m->m_pkthdr.csum_rx_start;
 411                 uint32_t ip_len = (sizeof(*ip6) + ntohs(ip6->ip6_plen));
 412                 int32_t trailer = (m_pktlen(m) - ip_len);
 413                 uint32_t swbytes = (uint32_t)trailer;
 414
 415                 csum = m->m_pkthdr.csum_rx_val;
 416
 417                 ASSERT(trailer >= 0);
 418                 if (start != offset || trailer != 0) {
 419                         uint16_t s = 0, d = 0;
 420
 421                         if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
 422                                 s = ip6->ip6_src.s6_addr16[1];
 423                                 ip6->ip6_src.s6_addr16[1] = 0;
 424                         }
 425                         if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
 426                                 d = ip6->ip6_dst.s6_addr16[1];
 427                                 ip6->ip6_dst.s6_addr16[1] = 0;
 428                         }
 429
 430                         /* callee folds in sum */
 431                         csum = m_adj_sum16(m, start, offset,
 432                             (ip_len - offset), csum);
 433                         if (offset > start) {
 434                                 swbytes += (offset - start);
 435                         } else {
 436                                 swbytes += (start - offset);
 437                         }
 438
 439                         if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
 440                                 ip6->ip6_src.s6_addr16[1] = s;
 441                         }
 442                         if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
 443                                 ip6->ip6_dst.s6_addr16[1] = d;
 444                         }
 445                 }
 446                 csum_flags = m->m_pkthdr.csum_flags;
 447
 448                 if (swbytes != 0) {
 449                         udp_in6_cksum_stats(swbytes);
 450                 }
 451                 if (trailer != 0) {
 452                         m_adj(m, -trailer);
 453                 }
 454         } else {
 455                 csum = 0;
 456                 csum_flags = 0;
 457         }
 458
 459         /* Invalidate checksum */
 460         m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
 461
 462         ip6stat.ip6s_fragments++;
 463         in6_ifstat_inc(dstifp, ifs6_reass_reqd);
 464
 465         lck_mtx_lock(&ip6qlock);
 466         locked = 1;
 467
 468         for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next) {
 469                 if (ip6f->ip6f_ident == q6->ip6q_ident &&
 470                     IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
 471                     IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)) {
 472                         break;
 473                 }
 474         }
 475
 476         if (q6 == &ip6q) {
 477                 /*
 478                  * Create a reassembly queue as this is the first fragment to
 479                  * arrive.
 480                  * By first frag, we don't mean the one with offset 0, but
 481                  * any of the fragments of the fragmented packet that has
 482                  * reached us first.
 483                  */
 484                 first_frag = 1;
 485
 486                 q6 = ip6q_alloc(M_DONTWAIT);
 487                 if (q6 == NULL) {
 488                         goto dropfrag;
 489                 }
 490
 491                 frag6_insque(q6, &ip6q);
 492                 frag6_nfragpackets++;
 493
 494                 /* ip6q_nxt will be filled afterwards, from 1st fragment */
 495                 q6->ip6q_down   = q6->ip6q_up = (struct ip6asfrag *)q6;
 496 #ifdef notyet
 497                 q6->ip6q_nxtp   = (u_char *)nxtp;
 498 #endif
 499                 q6->ip6q_ident  = ip6f->ip6f_ident;
 500                 q6->ip6q_ttl    = IPV6_FRAGTTL;
 501                 q6->ip6q_src    = ip6->ip6_src;
 502                 q6->ip6q_dst    = ip6->ip6_dst;
 503                 q6->ip6q_ecn    =
 504                     (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 505                 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
 506
 507                 q6->ip6q_nfrag = 0;
 508                 q6->ip6q_flags = 0;
 509
 510                 /*
 511                  * If the first fragment has valid checksum offload
 512                  * info, the rest of fragments are eligible as well.
 513                  */
 514                 if (csum_flags != 0) {
 515                         q6->ip6q_csum = csum;
 516                         q6->ip6q_csum_flags = csum_flags;
 517                 }
 518         }
 519
 520         if (q6->ip6q_flags & IP6QF_DIRTY) {
 521                 goto dropfrag;
 522         }
 523
 524         /*
 525          * If it's the 1st fragment, record the length of the
 526          * unfragmentable part and the next header of the fragment header.
 527          */
 528         fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
 529         if (fragoff == 0) {
 530                 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
 531                     sizeof(struct ip6_frag);
 532                 q6->ip6q_nxt = ip6f->ip6f_nxt;
 533         }
 534
 535         /*
 536          * Check that the reassembled packet would not exceed 65535 bytes
 537          * in size.
 538          * If it would exceed, discard the fragment and return an ICMP error.
 539          */
 540         frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
 541         if (q6->ip6q_unfrglen >= 0) {
 542                 /* The 1st fragment has already arrived. */
 543                 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
 544                         lck_mtx_unlock(&ip6qlock);
 545                         locked = 0;
 546                         icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 547                             offset - sizeof(struct ip6_frag) +
 548                             offsetof(struct ip6_frag, ip6f_offlg));
 549                         m = NULL;
 550                         goto done;
 551                 }
 552         } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
 553                 lck_mtx_unlock(&ip6qlock);
 554                 locked = 0;
 555                 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 556                     offset - sizeof(struct ip6_frag) +
 557                     offsetof(struct ip6_frag, ip6f_offlg));
 558                 m = NULL;
 559                 goto done;
 560         }
 561         /*
 562          * If it's the first fragment, do the above check for each
 563          * fragment already stored in the reassembly queue.
 564          */
 565         if (fragoff == 0) {
 566                 /*
 567                  * https://tools.ietf.org/html/rfc8200#page-20
 568                  * If the first fragment does not include all headers through an
 569                  * Upper-Layer header, then that fragment should be discarded and
 570                  * an ICMP Parameter Problem, Code 3, message should be sent to
 571                  * the source of the fragment, with the Pointer field set to zero.
 572                  */
 573                 if (!ip6_pkt_has_ulp(m)) {
 574                         lck_mtx_unlock(&ip6qlock);
 575                         locked = 0;
 576                         icmp6_error(m, ICMP6_PARAM_PROB,
 577                             ICMP6_PARAMPROB_FIRSTFRAG_INCOMP_HDR, 0);
 578                         m = NULL;
 579                         goto done;
 580                 }
 581                 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 582                     af6 = af6dwn) {
 583                         af6dwn = af6->ip6af_down;
 584
 585                         if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
 586                             IPV6_MAXPACKET) {
 587                                 struct mbuf *merr = IP6_REASS_MBUF(af6);
 588                                 struct ip6_hdr *ip6err;
 589                                 int erroff = af6->ip6af_offset;
 590
 591                                 /* dequeue the fragment. */
 592                                 frag6_deq(af6);
 593                                 ip6af_free(af6);
 594
 595                                 /* adjust pointer. */
 596                                 ip6err = mtod(merr, struct ip6_hdr *);
 597
 598                                 /*
 599                                  * Restore source and destination addresses
 600                                  * in the erroneous IPv6 header.
 601                                  */
 602                                 ip6err->ip6_src = q6->ip6q_src;
 603                                 ip6err->ip6_dst = q6->ip6q_dst;
 604
 605                                 frag6_save_context(merr,
 606                                     erroff - sizeof(struct ip6_frag) +
 607                                     offsetof(struct ip6_frag, ip6f_offlg));
 608
 609                                 MBUFQ_ENQUEUE(&diq6, merr);
 610                         }
 611                 }
 612         }
 613
 614         ip6af = ip6af_alloc(M_DONTWAIT);
 615         if (ip6af == NULL) {
 616                 goto dropfrag;
 617         }
 618
 619         ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
 620         ip6af->ip6af_off = fragoff;
 621         ip6af->ip6af_frglen = frgpartlen;
 622         ip6af->ip6af_offset = offset;
 623         IP6_REASS_MBUF(ip6af) = m;
 624
 625         if (first_frag) {
 626                 af6 = (struct ip6asfrag *)q6;
 627                 goto insert;
 628         }
 629
 630         /*
 631          * Handle ECN by comparing this segment with the first one;
 632          * if CE is set, do not lose CE.
 633          * drop if CE and not-ECT are mixed for the same packet.
 634          */
 635         ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 636         ecn0 = q6->ip6q_ecn;
 637         if (ecn == IPTOS_ECN_CE) {
 638                 if (ecn0 == IPTOS_ECN_NOTECT) {
 639                         ip6af_free(ip6af);
 640                         goto dropfrag;
 641                 }
 642                 if (ecn0 != IPTOS_ECN_CE) {
 643                         q6->ip6q_ecn = IPTOS_ECN_CE;
 644                 }
 645         }
 646         if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
 647                 ip6af_free(ip6af);
 648                 goto dropfrag;
 649         }
 650
 651         /*
 652          * Find a segment which begins after this one does.
 653          */
 654         for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 655             af6 = af6->ip6af_down) {
 656                 if (af6->ip6af_off > ip6af->ip6af_off) {
 657                         break;
 658                 }
 659         }
 660
 661         /*
 662          * As per RFC 8200 reassembly rules, we MUST drop the entire
 663          * chain of fragments for a packet to be assembled, if we receive
 664          * any overlapping fragments.
 665          * https://tools.ietf.org/html/rfc8200#page-20
 666          *
 667          * To avoid more conditional code, just reuse frag6_freef and defer
 668          * its call to post fragment insertion in the queue.
 669          */
 670         if (af6->ip6af_up != (struct ip6asfrag *)q6) {
 671                 if (af6->ip6af_up->ip6af_off == ip6af->ip6af_off) {
 672                         if (af6->ip6af_up->ip6af_frglen != ip6af->ip6af_frglen) {
 673                                 drop_fragq = TRUE;
 674                         } else {
 675                                 /*
 676                                  * XXX Ideally we should be comparing the entire
 677                                  * packet here but for now just use off and fraglen
 678                                  * to ignore a duplicate fragment.
 679                                  */
 680                                 ip6af_free(ip6af);
 681                                 goto dropfrag;
 682                         }
 683                 } else {
 684                         i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
 685                             - ip6af->ip6af_off;
 686                         if (i > 0) {
 687                                 drop_fragq = TRUE;
 688                         }
 689                 }
 690         }
 691
 692         if (af6 != (struct ip6asfrag *)q6) {
 693                 /*
 694                  * Given that we break when af6->ip6af_off > ip6af->ip6af_off,
 695                  * we shouldn't need a check for duplicate fragment here.
 696                  * For now just assert.
 697                  */
 698                 VERIFY(af6->ip6af_off != ip6af->ip6af_off);
 699                 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
 700                 if (i > 0) {
 701                         drop_fragq = TRUE;
 702                 }
 703         }
 704
 705         /*
 706          * If this fragment contains similar checksum offload info
 707          * as that of the existing ones, accumulate checksum.  Otherwise,
 708          * invalidate checksum offload info for the entire datagram.
 709          */
 710         if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags) {
 711                 q6->ip6q_csum += csum;
 712         } else if (q6->ip6q_csum_flags != 0) {
 713                 q6->ip6q_csum_flags = 0;
 714         }
 715
 716 insert:
 717         /*
 718          * Stick new segment in its place;
 719          * check for complete reassembly.
 720          * Move to front of packet queue, as we are
 721          * the most recently active fragmented packet.
 722          */
 723         frag6_enq(ip6af, af6->ip6af_up);
 724         frag6_nfrags++;
 725         q6->ip6q_nfrag++;
 726
 727         /*
 728          * This holds true, when we receive overlapping fragments.
 729          * We must silently drop all the fragments we have received
 730          * so far.
 731          * Also mark q6 as dirty, so as to not add any new fragments to it.
 732          * Make sure even q6 marked dirty is kept till timer expires for
 733          * reassembly and when that happens, silenty get rid of q6
 734          */
 735         if (drop_fragq) {
 736                 struct fq6_head dfq6 = {0};
 737                 MBUFQ_INIT(&dfq6);      /* for deferred frees */
 738                 q6->ip6q_flags |= IP6QF_DIRTY;
 739                 /* Purge all the fragments but do not free q6 */
 740                 frag6_purgef(q6, &dfq6, NULL);
 741                 af6 = NULL;
 742
 743                 /* free fragments that need to be freed */
 744                 if (!MBUFQ_EMPTY(&dfq6)) {
 745                         MBUFQ_DRAIN(&dfq6);
 746                 }
 747                 VERIFY(MBUFQ_EMPTY(&dfq6));
 748                 /*
 749                  * Just in case the above logic got anything added
 750                  * to diq6, drain it.
 751                  * Please note that these mbufs are not present in the
 752                  * fragment queue and are added to diq6 for sending
 753                  * ICMPv6 error.
 754                  * Given that the current fragment was an overlapping
 755                  * fragment and the RFC requires us to not send any
 756                  * ICMPv6 errors while purging the entire queue.
 757                  * Just empty it out.
 758                  */
 759                 if (!MBUFQ_EMPTY(&diq6)) {
 760                         MBUFQ_DRAIN(&diq6);
 761                 }
 762                 VERIFY(MBUFQ_EMPTY(&diq6));
 763                 /*
 764                  * MBUFQ_DRAIN would have drained all the mbufs
 765                  * in the fragment queue.
 766                  * This shouldn't be needed as we are returning IPPROTO_DONE
 767                  * from here but change the passed mbuf pointer to NULL.
 768                  */
 769                 *mp = NULL;
 770                 lck_mtx_unlock(&ip6qlock);
 771                 return IPPROTO_DONE;
 772         }
 773         next = 0;
 774         for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 775             af6 = af6->ip6af_down) {
 776                 if (af6->ip6af_off != next) {
 777                         lck_mtx_unlock(&ip6qlock);
 778                         locked = 0;
 779                         m = NULL;
 780                         goto done;
 781                 }
 782                 next += af6->ip6af_frglen;
 783         }
 784         if (af6->ip6af_up->ip6af_mff) {
 785                 lck_mtx_unlock(&ip6qlock);
 786                 locked = 0;
 787                 m = NULL;
 788                 goto done;
 789         }
 790
 791         /*
 792          * Reassembly is complete; concatenate fragments.
 793          */
 794         ip6af = q6->ip6q_down;
 795         t = m = IP6_REASS_MBUF(ip6af);
 796         af6 = ip6af->ip6af_down;
 797         frag6_deq(ip6af);
 798         while (af6 != (struct ip6asfrag *)q6) {
 799                 af6dwn = af6->ip6af_down;
 800                 frag6_deq(af6);
 801                 while (t->m_next) {
 802                         t = t->m_next;
 803                 }
 804                 t->m_next = IP6_REASS_MBUF(af6);
 805                 m_adj(t->m_next, af6->ip6af_offset);
 806                 ip6af_free(af6);
 807                 af6 = af6dwn;
 808         }
 809
 810         /*
 811          * Store partial hardware checksum info from the fragment queue;
 812          * the receive start offset is set to 40 bytes (see code at the
 813          * top of this routine.)
 814          */
 815         if (q6->ip6q_csum_flags != 0) {
 816                 csum = q6->ip6q_csum;
 817
 818                 ADDCARRY(csum);
 819
 820                 m->m_pkthdr.csum_rx_val = (u_int16_t)csum;
 821                 m->m_pkthdr.csum_rx_start = sizeof(struct ip6_hdr);
 822                 m->m_pkthdr.csum_flags = q6->ip6q_csum_flags;
 823         } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
 824             (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
 825                 /* loopback checksums are always OK */
 826                 m->m_pkthdr.csum_data = 0xffff;
 827                 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 828         }
 829
 830         /* adjust offset to point where the original next header starts */
 831         offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
 832         ip6af_free(ip6af);
 833         ip6 = mtod(m, struct ip6_hdr *);
 834         ip6->ip6_plen = htons((uint16_t)(next + offset - sizeof(struct ip6_hdr)));
 835         ip6->ip6_src = q6->ip6q_src;
 836         ip6->ip6_dst = q6->ip6q_dst;
 837         if (q6->ip6q_ecn == IPTOS_ECN_CE) {
 838                 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
 839         }
 840
 841         nxt = q6->ip6q_nxt;
 842 #ifdef notyet
 843         *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
 844 #endif
 845
 846         /* Delete frag6 header */
 847         if (m->m_len >= offset + sizeof(struct ip6_frag)) {
 848                 /* This is the only possible case with !PULLDOWN_TEST */
 849                 ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
 850                     offset);
 851                 m->m_data += sizeof(struct ip6_frag);
 852                 m->m_len -= sizeof(struct ip6_frag);
 853         } else {
 854                 /* this comes with no copy if the boundary is on cluster */
 855                 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
 856                         frag6_remque(q6);
 857                         frag6_nfragpackets--;
 858                         frag6_nfrags -= q6->ip6q_nfrag;
 859                         ip6q_free(q6);
 860                         goto dropfrag;
 861                 }
 862                 m_adj(t, sizeof(struct ip6_frag));
 863                 m_cat(m, t);
 864         }
 865
 866         /*
 867          * Store NXT to the original.
 868          */
 869         {
 870                 char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
 871                 *prvnxtp = nxt;
 872         }
 873
 874         frag6_remque(q6);
 875         frag6_nfragpackets--;
 876         frag6_nfrags -= q6->ip6q_nfrag;
 877         ip6q_free(q6);
 878
 879         if (m->m_flags & M_PKTHDR) {    /* Isn't it always true? */
 880                 m_fixhdr(m);
 881                 /*
 882                  * Mark packet as reassembled
 883                  * In ICMPv6 processing, we drop certain
 884                  * NDP messages that are not expected to
 885                  * have fragment header based on recommendations
 886                  * against security vulnerability as described in
 887                  * RFC 6980.
 888                  */
 889                 m->m_pkthdr.pkt_flags |= PKTF_REASSEMBLED;
 890         }
 891         ip6stat.ip6s_reassembled++;
 892
 893         /*
 894          * Tell launch routine the next header
 895          */
 896         *mp = m;
 897         *offp = offset;
 898
 899         /* arm the purge timer if not already and if there's work to do */
 900         frag6_sched_timeout();
 901         lck_mtx_unlock(&ip6qlock);
 902         in6_ifstat_inc(dstifp, ifs6_reass_ok);
 903         frag6_icmp6_paramprob_error(&diq6);
 904         VERIFY(MBUFQ_EMPTY(&diq6));
 905         return nxt;
 906
 907 done:
 908         VERIFY(m == NULL);
 909         *mp = m;
 910         if (!locked) {
 911                 if (frag6_nfragpackets == 0) {
 912                         frag6_icmp6_paramprob_error(&diq6);
 913                         VERIFY(MBUFQ_EMPTY(&diq6));
 914                         return IPPROTO_DONE;
 915                 }
 916                 lck_mtx_lock(&ip6qlock);
 917         }
 918         /* arm the purge timer if not already and if there's work to do */
 919         frag6_sched_timeout();
 920         lck_mtx_unlock(&ip6qlock);
 921         frag6_icmp6_paramprob_error(&diq6);
 922         VERIFY(MBUFQ_EMPTY(&diq6));
 923         return IPPROTO_DONE;
 924
 925 dropfrag:
 926         ip6stat.ip6s_fragdropped++;
 927         /* arm the purge timer if not already and if there's work to do */
 928         frag6_sched_timeout();
 929         lck_mtx_unlock(&ip6qlock);
 930         in6_ifstat_inc(dstifp, ifs6_reass_fail);
 931         m_freem(m);
 932         *mp = NULL;
 933         frag6_icmp6_paramprob_error(&diq6);
 934         VERIFY(MBUFQ_EMPTY(&diq6));
 935         return IPPROTO_DONE;
 936 }
 937
 938 /*
 939  * This routine removes the enqueued frames from the passed fragment
 940  * header and enqueues those to dfq6 which is an out-arg for the dequeued
 941  * fragments.
 942  * If the caller also provides diq6, this routine also enqueues the 0 offset
 943  * fragment to that list as it potentially gets used by the caller
 944  * to prepare the relevant ICMPv6 error message (time exceeded or
 945  * param problem).
 946  * It leaves the fragment header object (q6) intact.
 947  */
 948 static void
 949 frag6_purgef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6)
 950 {
 951         struct ip6asfrag *af6 = NULL;
 952         struct ip6asfrag *down6 = NULL;
 953
 954         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
 955
 956         for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 957             af6 = down6) {
 958                 struct mbuf *m = IP6_REASS_MBUF(af6);
 959
 960                 down6 = af6->ip6af_down;
 961                 frag6_deq(af6);
 962
 963                 /*
 964                  * If caller wants to generate ICMP time-exceeded,
 965                  * as indicated by the argument diq6, return it for
 966                  * the first fragment and add others to the fragment
 967                  * free queue.
 968                  */
 969                 if (af6->ip6af_off == 0 && diq6 != NULL) {
 970                         struct ip6_hdr *ip6;
 971
 972                         /* adjust pointer */
 973                         ip6 = mtod(m, struct ip6_hdr *);
 974
 975                         /* restore source and destination addresses */
 976                         ip6->ip6_src = q6->ip6q_src;
 977                         ip6->ip6_dst = q6->ip6q_dst;
 978                         MBUFQ_ENQUEUE(diq6, m);
 979                 } else {
 980                         MBUFQ_ENQUEUE(dfq6, m);
 981                 }
 982                 ip6af_free(af6);
 983         }
 984 }
 985
 986 /*
 987  * This routine removes the enqueued frames from the passed fragment
 988  * header and enqueues those to dfq6 which is an out-arg for the dequeued
 989  * fragments.
 990  * If the caller also provides diq6, this routine also enqueues the 0 offset
 991  * fragment to that list as it potentially gets used by the caller
 992  * to prepare the relevant ICMPv6 error message (time exceeded or
 993  * param problem).
 994  * It also remove the fragment header object from the queue and frees it.
 995  */
 996 static void
 997 frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6)
 998 {
 999         frag6_purgef(q6, dfq6, diq6);
1000         frag6_remque(q6);
1001         frag6_nfragpackets--;
1002         frag6_nfrags -= q6->ip6q_nfrag;
1003         ip6q_free(q6);
1004 }
1005
1006 /*
1007  * Put an ip fragment on a reassembly chain.
1008  * Like insque, but pointers in middle of structure.
1009  */
1010 void
1011 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
1012 {
1013         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1014
1015         af6->ip6af_up = up6;
1016         af6->ip6af_down = up6->ip6af_down;
1017         up6->ip6af_down->ip6af_up = af6;
1018         up6->ip6af_down = af6;
1019 }
1020
1021 /*
1022  * To frag6_enq as remque is to insque.
1023  */
1024 void
1025 frag6_deq(struct ip6asfrag *af6)
1026 {
1027         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1028
1029         af6->ip6af_up->ip6af_down = af6->ip6af_down;
1030         af6->ip6af_down->ip6af_up = af6->ip6af_up;
1031 }
1032
1033 void
1034 frag6_insque(struct ip6q *new, struct ip6q *old)
1035 {
1036         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1037
1038         new->ip6q_prev = old;
1039         new->ip6q_next = old->ip6q_next;
1040         old->ip6q_next->ip6q_prev = new;
1041         old->ip6q_next = new;
1042 }
1043
1044 void
1045 frag6_remque(struct ip6q *p6)
1046 {
1047         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1048
1049         p6->ip6q_prev->ip6q_next = p6->ip6q_next;
1050         p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
1051 }
1052
1053 /*
1054  * IPv6 reassembling timer processing;
1055  * if a timer expires on a reassembly
1056  * queue, discard it.
1057  */
1058 static void
1059 frag6_timeout(void *arg)
1060 {
1061 #pragma unused(arg)
1062         struct fq6_head dfq6, diq6;
1063         struct fq6_head *diq6_tmp = NULL;
1064         struct ip6q *q6;
1065
1066         MBUFQ_INIT(&dfq6);      /* for deferred frees */
1067         MBUFQ_INIT(&diq6);      /* for deferred ICMP time exceeded errors */
1068
1069         /*
1070          * Update coarse-grained networking timestamp (in sec.); the idea
1071          * is to piggy-back on the timeout callout to update the counter
1072          * returnable via net_uptime().
1073          */
1074         net_update_uptime();
1075
1076         lck_mtx_lock(&ip6qlock);
1077         q6 = ip6q.ip6q_next;
1078         if (q6) {
1079                 while (q6 != &ip6q) {
1080                         --q6->ip6q_ttl;
1081                         q6 = q6->ip6q_next;
1082                         if (q6->ip6q_prev->ip6q_ttl == 0) {
1083                                 ip6stat.ip6s_fragtimeout++;
1084                                 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1085                                 /*
1086                                  * Avoid sending ICMPv6 Time Exceeded for fragment headers
1087                                  * that are marked dirty.
1088                                  */
1089                                 diq6_tmp = (q6->ip6q_prev->ip6q_flags & IP6QF_DIRTY) ?
1090                                     NULL : &diq6;
1091                                 frag6_freef(q6->ip6q_prev, &dfq6, diq6_tmp);
1092                         }
1093                 }
1094         }
1095         /*
1096          * If we are over the maximum number of fragments
1097          * (due to the limit being lowered), drain off
1098          * enough to get down to the new limit.
1099          */
1100         if (ip6_maxfragpackets >= 0) {
1101                 while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets &&
1102                     ip6q.ip6q_prev) {
1103                         ip6stat.ip6s_fragoverflow++;
1104                         /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1105                         /*
1106                          * Avoid sending ICMPv6 Time Exceeded for fragment headers
1107                          * that are marked dirty.
1108                          */
1109                         diq6_tmp = (ip6q.ip6q_prev->ip6q_flags & IP6QF_DIRTY) ?
1110                             NULL : &diq6;
1111                         frag6_freef(ip6q.ip6q_prev, &dfq6, diq6_tmp);
1112                 }
1113         }
1114         /* re-arm the purge timer if there's work to do */
1115         frag6_timeout_run = 0;
1116         frag6_sched_timeout();
1117         lck_mtx_unlock(&ip6qlock);
1118
1119         /* free fragments that need to be freed */
1120         if (!MBUFQ_EMPTY(&dfq6)) {
1121                 MBUFQ_DRAIN(&dfq6);
1122         }
1123
1124         frag6_icmp6_timeex_error(&diq6);
1125
1126         VERIFY(MBUFQ_EMPTY(&dfq6));
1127         VERIFY(MBUFQ_EMPTY(&diq6));
1128 }
1129
1130 static void
1131 frag6_sched_timeout(void)
1132 {
1133         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1134
1135         if (!frag6_timeout_run && frag6_nfragpackets > 0) {
1136                 frag6_timeout_run = 1;
1137                 timeout(frag6_timeout, NULL, hz);
1138         }
1139 }
1140
1141 /*
1142  * Drain off all datagram fragments.
1143  */
1144 void
1145 frag6_drain(void)
1146 {
1147         struct fq6_head dfq6, diq6;
1148         struct fq6_head *diq6_tmp = NULL;
1149
1150         MBUFQ_INIT(&dfq6);      /* for deferred frees */
1151         MBUFQ_INIT(&diq6);      /* for deferred ICMP time exceeded errors */
1152
1153         lck_mtx_lock(&ip6qlock);
1154         while (ip6q.ip6q_next != &ip6q) {
1155                 ip6stat.ip6s_fragdropped++;
1156                 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1157                 /*
1158                  * Avoid sending ICMPv6 Time Exceeded for fragment headers
1159                  * that are marked dirty.
1160                  */
1161                 diq6_tmp = (ip6q.ip6q_next->ip6q_flags & IP6QF_DIRTY) ?
1162                     NULL : &diq6;
1163                 frag6_freef(ip6q.ip6q_next, &dfq6, diq6_tmp);
1164         }
1165         lck_mtx_unlock(&ip6qlock);
1166
1167         /* free fragments that need to be freed */
1168         if (!MBUFQ_EMPTY(&dfq6)) {
1169                 MBUFQ_DRAIN(&dfq6);
1170         }
1171
1172         frag6_icmp6_timeex_error(&diq6);
1173
1174         VERIFY(MBUFQ_EMPTY(&dfq6));
1175         VERIFY(MBUFQ_EMPTY(&diq6));
1176 }
1177
1178 static struct ip6q *
1179 ip6q_alloc(int how)
1180 {
1181         struct mbuf *t;
1182         struct ip6q *q6;
1183
1184         /*
1185          * See comments in ip6q_updateparams().  Keep the count separate
1186          * from frag6_nfragpackets since the latter represents the elements
1187          * already in the reassembly queues.
1188          */
1189         if (ip6q_limit > 0 && ip6q_count > ip6q_limit) {
1190                 return NULL;
1191         }
1192
1193         t = m_get(how, MT_FTABLE);
1194         if (t != NULL) {
1195                 atomic_add_32(&ip6q_count, 1);
1196                 q6 = mtod(t, struct ip6q *);
1197                 bzero(q6, sizeof(*q6));
1198         } else {
1199                 q6 = NULL;
1200         }
1201         return q6;
1202 }
1203
1204 static void
1205 ip6q_free(struct ip6q *q6)
1206 {
1207         (void) m_free(dtom(q6));
1208         atomic_add_32(&ip6q_count, -1);
1209 }
1210
1211 static struct ip6asfrag *
1212 ip6af_alloc(int how)
1213 {
1214         struct mbuf *t;
1215         struct ip6asfrag *af6;
1216
1217         /*
1218          * See comments in ip6q_updateparams().  Keep the count separate
1219          * from frag6_nfrags since the latter represents the elements
1220          * already in the reassembly queues.
1221          */
1222         if (ip6af_limit > 0 && ip6af_count > ip6af_limit) {
1223                 return NULL;
1224         }
1225
1226         t = m_get(how, MT_FTABLE);
1227         if (t != NULL) {
1228                 atomic_add_32(&ip6af_count, 1);
1229                 af6 = mtod(t, struct ip6asfrag *);
1230                 bzero(af6, sizeof(*af6));
1231         } else {
1232                 af6 = NULL;
1233         }
1234         return af6;
1235 }
1236
1237 static void
1238 ip6af_free(struct ip6asfrag *af6)
1239 {
1240         (void) m_free(dtom(af6));
1241         atomic_add_32(&ip6af_count, -1);
1242 }
1243
1244 static void
1245 ip6q_updateparams(void)
1246 {
1247         LCK_MTX_ASSERT(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1248         /*
1249          * -1 for unlimited allocation.
1250          */
1251         if (ip6_maxfragpackets < 0) {
1252                 ip6q_limit = 0;
1253         }
1254         if (ip6_maxfrags < 0) {
1255                 ip6af_limit = 0;
1256         }
1257         /*
1258          * Positive number for specific bound.
1259          */
1260         if (ip6_maxfragpackets > 0) {
1261                 ip6q_limit = ip6_maxfragpackets;
1262         }
1263         if (ip6_maxfrags > 0) {
1264                 ip6af_limit = ip6_maxfrags;
1265         }
1266         /*
1267          * Zero specifies no further fragment queue allocation -- set the
1268          * bound very low, but rely on implementation elsewhere to actually
1269          * prevent allocation and reclaim current queues.
1270          */
1271         if (ip6_maxfragpackets == 0) {
1272                 ip6q_limit = 1;
1273         }
1274         if (ip6_maxfrags == 0) {
1275                 ip6af_limit = 1;
1276         }
1277         /*
1278          * Arm the purge timer if not already and if there's work to do
1279          */
1280         frag6_sched_timeout();
1281 }
1282
1283 static int
1284 sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
1285 {
1286 #pragma unused(arg1, arg2)
1287         int error, i;
1288
1289         lck_mtx_lock(&ip6qlock);
1290         i = ip6_maxfragpackets;
1291         error = sysctl_handle_int(oidp, &i, 0, req);
1292         if (error || req->newptr == USER_ADDR_NULL) {
1293                 goto done;
1294         }
1295         /* impose bounds */
1296         if (i < -1 || i > (nmbclusters / 4)) {
1297                 error = EINVAL;
1298                 goto done;
1299         }
1300         ip6_maxfragpackets = i;
1301         ip6q_updateparams();
1302 done:
1303         lck_mtx_unlock(&ip6qlock);
1304         return error;
1305 }
1306
1307 static int
1308 sysctl_maxfrags SYSCTL_HANDLER_ARGS
1309 {
1310 #pragma unused(arg1, arg2)
1311         int error, i;
1312
1313         lck_mtx_lock(&ip6qlock);
1314         i = ip6_maxfrags;
1315         error = sysctl_handle_int(oidp, &i, 0, req);
1316         if (error || req->newptr == USER_ADDR_NULL) {
1317                 goto done;
1318         }
1319         /* impose bounds */
1320         if (i < -1 || i > (nmbclusters / 4)) {
1321                 error = EINVAL;
1322                 goto done;
1323         }
1324         ip6_maxfrags = i;
1325         ip6q_updateparams();    /* see if we need to arm timer */
1326 done:
1327         lck_mtx_unlock(&ip6qlock);
1328         return error;
1329 }