2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */
30 /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/malloc.h>
64 #include <sys/mcache.h>
66 #include <sys/domain.h>
67 #include <sys/protosw.h>
68 #include <sys/socket.h>
69 #include <sys/errno.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <kern/queue.h>
74 #include <kern/locks.h>
77 #include <net/route.h>
79 #include <netinet/in.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_var.h>
85 #include <netinet/icmp6.h>
87 #include <net/net_osdep.h>
88 #include <dev/random/randomdev.h>
91 * Define it to get a correct behavior on per-interface statistics.
93 #define IN6_IFSTAT_STRICT
95 struct ip6asfrag
*ip6af_down
;
96 struct ip6asfrag
*ip6af_up
;
98 int ip6af_offset
; /* offset in ip6af_m to next header */
99 int ip6af_frglen
; /* fragmentable part length */
100 int ip6af_off
; /* fragment offset */
101 u_int16_t ip6af_mff
; /* more fragment bit in frag off */
104 #define IP6_REASS_MBUF(ip6af) ((ip6af)->ip6af_m)
106 MBUFQ_HEAD(fq6_head
);
108 static void frag6_save_context(struct mbuf
*, int);
109 static void frag6_scrub_context(struct mbuf
*);
110 static int frag6_restore_context(struct mbuf
*);
112 static void frag6_icmp6_paramprob_error(struct fq6_head
*);
113 static void frag6_icmp6_timeex_error(struct fq6_head
*);
115 static void frag6_enq(struct ip6asfrag
*, struct ip6asfrag
*);
116 static void frag6_deq(struct ip6asfrag
*);
117 static void frag6_insque(struct ip6q
*, struct ip6q
*);
118 static void frag6_remque(struct ip6q
*);
119 static void frag6_purgef(struct ip6q
*, struct fq6_head
*, struct fq6_head
*);
120 static void frag6_freef(struct ip6q
*, struct fq6_head
*, struct fq6_head
*);
122 static int frag6_timeout_run
; /* frag6 timer is scheduled to run */
123 static void frag6_timeout(void *);
124 static void frag6_sched_timeout(void);
126 static struct ip6q
*ip6q_alloc(int);
127 static void ip6q_free(struct ip6q
*);
128 static void ip6q_updateparams(void);
129 static struct ip6asfrag
*ip6af_alloc(int);
130 static void ip6af_free(struct ip6asfrag
*);
132 decl_lck_mtx_data(static, ip6qlock
);
133 static lck_attr_t
*ip6qlock_attr
;
134 static lck_grp_t
*ip6qlock_grp
;
135 static lck_grp_attr_t
*ip6qlock_grp_attr
;
137 /* IPv6 fragment reassembly queues (protected by ip6qlock) */
138 static struct ip6q ip6q
; /* ip6 reassembly queues */
139 static int ip6_maxfragpackets
; /* max packets in reass queues */
140 static u_int32_t frag6_nfragpackets
; /* # of packets in reass queues */
141 static int ip6_maxfrags
; /* max fragments in reass queues */
142 static u_int32_t frag6_nfrags
; /* # of fragments in reass queues */
143 static u_int32_t ip6q_limit
; /* ip6q allocation limit */
144 static u_int32_t ip6q_count
; /* current # of allocated ip6q's */
145 static u_int32_t ip6af_limit
; /* ip6asfrag allocation limit */
146 static u_int32_t ip6af_count
; /* current # of allocated ip6asfrag's */
148 static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
;
149 static int sysctl_maxfrags SYSCTL_HANDLER_ARGS
;
151 SYSCTL_DECL(_net_inet6_ip6
);
153 SYSCTL_PROC(_net_inet6_ip6
, IPV6CTL_MAXFRAGPACKETS
, maxfragpackets
,
154 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &ip6_maxfragpackets
, 0,
155 sysctl_maxfragpackets
, "I",
156 "Maximum number of IPv6 fragment reassembly queue entries");
158 SYSCTL_UINT(_net_inet6_ip6
, OID_AUTO
, fragpackets
,
159 CTLFLAG_RD
| CTLFLAG_LOCKED
, &frag6_nfragpackets
, 0,
160 "Current number of IPv6 fragment reassembly queue entries");
162 SYSCTL_PROC(_net_inet6_ip6
, IPV6CTL_MAXFRAGS
, maxfrags
,
163 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &ip6_maxfrags
, 0,
164 sysctl_maxfrags
, "I", "Maximum number of IPv6 fragments allowed");
167 * Initialise reassembly queue and fragment identifier.
172 /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */
173 _CASSERT(sizeof(struct ip6q
) <= _MLEN
);
174 /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */
175 _CASSERT(sizeof(struct ip6asfrag
) <= _MLEN
);
177 /* IPv6 fragment reassembly queue lock */
178 ip6qlock_grp_attr
= lck_grp_attr_alloc_init();
179 ip6qlock_grp
= lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr
);
180 ip6qlock_attr
= lck_attr_alloc_init();
181 lck_mtx_init(&ip6qlock
, ip6qlock_grp
, ip6qlock_attr
);
183 lck_mtx_lock(&ip6qlock
);
184 /* Initialize IPv6 reassembly queue. */
185 ip6q
.ip6q_next
= ip6q
.ip6q_prev
= &ip6q
;
187 /* same limits as IPv4 */
188 ip6_maxfragpackets
= nmbclusters
/ 32;
189 ip6_maxfrags
= ip6_maxfragpackets
* 2;
191 lck_mtx_unlock(&ip6qlock
);
195 frag6_save_context(struct mbuf
*m
, int val
)
197 m
->m_pkthdr
.pkt_hdr
= (void *)(uintptr_t)val
;
201 frag6_scrub_context(struct mbuf
*m
)
203 m
->m_pkthdr
.pkt_hdr
= NULL
;
207 frag6_restore_context(struct mbuf
*m
)
209 return (int)m
->m_pkthdr
.pkt_hdr
;
213 * Send any deferred ICMP param problem error messages; caller must not be
214 * holding ip6qlock and is expected to have saved the per-packet parameter
215 * value via frag6_save_context().
218 frag6_icmp6_paramprob_error(struct fq6_head
*diq6
)
220 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_NOTOWNED
);
222 if (!MBUFQ_EMPTY(diq6
)) {
223 struct mbuf
*merr
, *merr_tmp
;
225 MBUFQ_FOREACH_SAFE(merr
, diq6
, merr_tmp
) {
226 MBUFQ_REMOVE(diq6
, merr
);
227 MBUFQ_NEXT(merr
) = NULL
;
228 param
= frag6_restore_context(merr
);
229 frag6_scrub_context(merr
);
230 icmp6_error(merr
, ICMP6_PARAM_PROB
,
231 ICMP6_PARAMPROB_HEADER
, param
);
237 * Send any deferred ICMP time exceeded error messages;
238 * caller must not be holding ip6qlock.
241 frag6_icmp6_timeex_error(struct fq6_head
*diq6
)
243 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_NOTOWNED
);
245 if (!MBUFQ_EMPTY(diq6
)) {
246 struct mbuf
*m
, *m_tmp
;
247 MBUFQ_FOREACH_SAFE(m
, diq6
, m_tmp
) {
248 MBUFQ_REMOVE(diq6
, m
);
249 MBUFQ_NEXT(m
) = NULL
;
250 icmp6_error_flag(m
, ICMP6_TIME_EXCEEDED
,
251 ICMP6_TIME_EXCEED_REASSEMBLY
, 0, 0);
257 * In RFC2460, fragment and reassembly rule do not agree with each other,
258 * in terms of next header field handling in fragment header.
259 * While the sender will use the same value for all of the fragmented packets,
260 * receiver is suggested not to check the consistency.
262 * fragment rule (p20):
263 * (2) A Fragment header containing:
264 * The Next Header value that identifies the first header of
265 * the Fragmentable Part of the original packet.
266 * -> next header field is same for all fragments
268 * reassembly rule (p21):
269 * The Next Header field of the last header of the Unfragmentable
270 * Part is obtained from the Next Header field of the first
271 * fragment's Fragment header.
272 * -> should grab it from the first fragment only
274 * The following note also contradicts with fragment rule - noone is going to
275 * send different fragment with different next header field.
277 * additional note (p22):
278 * The Next Header values in the Fragment headers of different
279 * fragments of the same original packet may differ. Only the value
280 * from the Offset zero fragment packet is used for reassembly.
281 * -> should grab it from the first fragment only
283 * There is no explicit reason given in the RFC. Historical reason maybe?
289 frag6_input(struct mbuf
**mp
, int *offp
, int proto
)
291 #pragma unused(proto)
292 struct mbuf
*m
= *mp
, *t
= NULL
;
293 struct ip6_hdr
*ip6
= NULL
;
294 struct ip6_frag
*ip6f
= NULL
;
295 struct ip6q
*q6
= NULL
;
296 struct ip6asfrag
*af6
= NULL
, *ip6af
= NULL
, *af6dwn
= NULL
;
297 int offset
= *offp
, i
= 0, next
= 0;
300 int fragoff
= 0, frgpartlen
= 0; /* must be larger than u_int16_t */
301 struct ifnet
*dstifp
= NULL
;
302 u_int8_t ecn
= 0, ecn0
= 0;
303 uint32_t csum
= 0, csum_flags
= 0;
304 struct fq6_head diq6
= {};
306 boolean_t drop_fragq
= FALSE
;
308 VERIFY(m
->m_flags
& M_PKTHDR
);
310 MBUFQ_INIT(&diq6
); /* for deferred ICMP param problem errors */
312 /* Expect 32-bit aligned data pointer on strict-align platforms */
313 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m
);
315 IP6_EXTHDR_CHECK(m
, offset
, sizeof(struct ip6_frag
), goto done
);
316 ip6
= mtod(m
, struct ip6_hdr
*);
317 ip6f
= (struct ip6_frag
*)((caddr_t
)ip6
+ offset
);
319 #ifdef IN6_IFSTAT_STRICT
320 /* find the destination interface of the packet. */
321 if (m
->m_pkthdr
.pkt_flags
& PKTF_IFAINFO
) {
324 if (ip6_getdstifaddr_info(m
, &idx
, NULL
) == 0) {
325 if (idx
> 0 && idx
<= if_index
) {
326 ifnet_head_lock_shared();
327 dstifp
= ifindex2ifnet
[idx
];
332 #endif /* IN6_IFSTAT_STRICT */
334 /* we are violating the spec, this may not be the dst interface */
335 if (dstifp
== NULL
) {
336 dstifp
= m
->m_pkthdr
.rcvif
;
339 /* jumbo payload can't contain a fragment header */
340 if (ip6
->ip6_plen
== 0) {
341 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
, offset
);
342 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
348 * check whether fragment packet's fragment length is
349 * multiple of 8 octets.
350 * sizeof(struct ip6_frag) == 8
351 * sizeof(struct ip6_hdr) = 40
353 if ((ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
) &&
354 (((ntohs(ip6
->ip6_plen
) - offset
) & 0x7) != 0)) {
355 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
356 offsetof(struct ip6_hdr
, ip6_plen
));
357 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
362 /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */
363 if (ip6_maxfragpackets
== 0 || ip6_maxfrags
== 0) {
364 ip6stat
.ip6s_fragments
++;
365 ip6stat
.ip6s_fragdropped
++;
366 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
372 /* offset now points to data portion */
373 offset
+= sizeof(struct ip6_frag
);
376 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
377 * upfront, unrelated to any reassembly. Just skip the fragment header.
379 if ((ip6f
->ip6f_offlg
& ~IP6F_RESERVED_MASK
) == 0) {
381 * Mark packet as reassembled.
382 * In ICMPv6 processing, we drop certain
383 * NDP messages that are not expected to
384 * have fragment header based on recommendations
385 * against security vulnerability as described in
387 * Treat atomic fragments as re-assembled packets as well.
389 m
->m_pkthdr
.pkt_flags
|= PKTF_REASSEMBLED
;
390 ip6stat
.ip6s_atmfrag_rcvd
++;
391 in6_ifstat_inc(dstifp
, ifs6_atmfrag_rcvd
);
394 return ip6f
->ip6f_nxt
;
398 * Leverage partial checksum offload for simple UDP/IP fragments,
399 * as that is the most common case.
401 * Perform 1's complement adjustment of octets that got included/
402 * excluded in the hardware-calculated checksum value. Also take
403 * care of any trailing bytes and subtract out their partial sum.
405 if (ip6f
->ip6f_nxt
== IPPROTO_UDP
&&
406 offset
== (sizeof(*ip6
) + sizeof(*ip6f
)) &&
407 (m
->m_pkthdr
.csum_flags
&
408 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
409 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
410 uint32_t start
= m
->m_pkthdr
.csum_rx_start
;
411 uint32_t ip_len
= (sizeof(*ip6
) + ntohs(ip6
->ip6_plen
));
412 int32_t trailer
= (m_pktlen(m
) - ip_len
);
413 uint32_t swbytes
= (uint32_t)trailer
;
415 csum
= m
->m_pkthdr
.csum_rx_val
;
417 ASSERT(trailer
>= 0);
418 if (start
!= offset
|| trailer
!= 0) {
419 uint16_t s
= 0, d
= 0;
421 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
)) {
422 s
= ip6
->ip6_src
.s6_addr16
[1];
423 ip6
->ip6_src
.s6_addr16
[1] = 0;
425 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
)) {
426 d
= ip6
->ip6_dst
.s6_addr16
[1];
427 ip6
->ip6_dst
.s6_addr16
[1] = 0;
430 /* callee folds in sum */
431 csum
= m_adj_sum16(m
, start
, offset
,
432 (ip_len
- offset
), csum
);
433 if (offset
> start
) {
434 swbytes
+= (offset
- start
);
436 swbytes
+= (start
- offset
);
439 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
)) {
440 ip6
->ip6_src
.s6_addr16
[1] = s
;
442 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
)) {
443 ip6
->ip6_dst
.s6_addr16
[1] = d
;
446 csum_flags
= m
->m_pkthdr
.csum_flags
;
449 udp_in6_cksum_stats(swbytes
);
459 /* Invalidate checksum */
460 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
462 ip6stat
.ip6s_fragments
++;
463 in6_ifstat_inc(dstifp
, ifs6_reass_reqd
);
465 lck_mtx_lock(&ip6qlock
);
468 for (q6
= ip6q
.ip6q_next
; q6
!= &ip6q
; q6
= q6
->ip6q_next
) {
469 if (ip6f
->ip6f_ident
== q6
->ip6q_ident
&&
470 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_src
, &q6
->ip6q_src
) &&
471 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, &q6
->ip6q_dst
)) {
478 * Create a reassembly queue as this is the first fragment to
480 * By first frag, we don't mean the one with offset 0, but
481 * any of the fragments of the fragmented packet that has
486 q6
= ip6q_alloc(M_DONTWAIT
);
491 frag6_insque(q6
, &ip6q
);
492 frag6_nfragpackets
++;
494 /* ip6q_nxt will be filled afterwards, from 1st fragment */
495 q6
->ip6q_down
= q6
->ip6q_up
= (struct ip6asfrag
*)q6
;
497 q6
->ip6q_nxtp
= (u_char
*)nxtp
;
499 q6
->ip6q_ident
= ip6f
->ip6f_ident
;
500 q6
->ip6q_ttl
= IPV6_FRAGTTL
;
501 q6
->ip6q_src
= ip6
->ip6_src
;
502 q6
->ip6q_dst
= ip6
->ip6_dst
;
504 (ntohl(ip6
->ip6_flow
) >> 20) & IPTOS_ECN_MASK
;
505 q6
->ip6q_unfrglen
= -1; /* The 1st fragment has not arrived. */
511 * If the first fragment has valid checksum offload
512 * info, the rest of fragments are eligible as well.
514 if (csum_flags
!= 0) {
515 q6
->ip6q_csum
= csum
;
516 q6
->ip6q_csum_flags
= csum_flags
;
520 if (q6
->ip6q_flags
& IP6QF_DIRTY
) {
525 * If it's the 1st fragment, record the length of the
526 * unfragmentable part and the next header of the fragment header.
528 fragoff
= ntohs(ip6f
->ip6f_offlg
& IP6F_OFF_MASK
);
530 q6
->ip6q_unfrglen
= offset
- sizeof(struct ip6_hdr
) -
531 sizeof(struct ip6_frag
);
532 q6
->ip6q_nxt
= ip6f
->ip6f_nxt
;
536 * Check that the reassembled packet would not exceed 65535 bytes
538 * If it would exceed, discard the fragment and return an ICMP error.
540 frgpartlen
= sizeof(struct ip6_hdr
) + ntohs(ip6
->ip6_plen
) - offset
;
541 if (q6
->ip6q_unfrglen
>= 0) {
542 /* The 1st fragment has already arrived. */
543 if (q6
->ip6q_unfrglen
+ fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
544 lck_mtx_unlock(&ip6qlock
);
546 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
547 offset
- sizeof(struct ip6_frag
) +
548 offsetof(struct ip6_frag
, ip6f_offlg
));
552 } else if (fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
553 lck_mtx_unlock(&ip6qlock
);
555 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
556 offset
- sizeof(struct ip6_frag
) +
557 offsetof(struct ip6_frag
, ip6f_offlg
));
562 * If it's the first fragment, do the above check for each
563 * fragment already stored in the reassembly queue.
567 * https://tools.ietf.org/html/rfc8200#page-20
568 * If the first fragment does not include all headers through an
569 * Upper-Layer header, then that fragment should be discarded and
570 * an ICMP Parameter Problem, Code 3, message should be sent to
571 * the source of the fragment, with the Pointer field set to zero.
573 if (!ip6_pkt_has_ulp(m
)) {
574 lck_mtx_unlock(&ip6qlock
);
576 icmp6_error(m
, ICMP6_PARAM_PROB
,
577 ICMP6_PARAMPROB_FIRSTFRAG_INCOMP_HDR
, 0);
581 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
583 af6dwn
= af6
->ip6af_down
;
585 if (q6
->ip6q_unfrglen
+ af6
->ip6af_off
+ af6
->ip6af_frglen
>
587 struct mbuf
*merr
= IP6_REASS_MBUF(af6
);
588 struct ip6_hdr
*ip6err
;
589 int erroff
= af6
->ip6af_offset
;
591 /* dequeue the fragment. */
595 /* adjust pointer. */
596 ip6err
= mtod(merr
, struct ip6_hdr
*);
599 * Restore source and destination addresses
600 * in the erroneous IPv6 header.
602 ip6err
->ip6_src
= q6
->ip6q_src
;
603 ip6err
->ip6_dst
= q6
->ip6q_dst
;
605 frag6_save_context(merr
,
606 erroff
- sizeof(struct ip6_frag
) +
607 offsetof(struct ip6_frag
, ip6f_offlg
));
609 MBUFQ_ENQUEUE(&diq6
, merr
);
614 ip6af
= ip6af_alloc(M_DONTWAIT
);
619 ip6af
->ip6af_mff
= ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
;
620 ip6af
->ip6af_off
= fragoff
;
621 ip6af
->ip6af_frglen
= frgpartlen
;
622 ip6af
->ip6af_offset
= offset
;
623 IP6_REASS_MBUF(ip6af
) = m
;
626 af6
= (struct ip6asfrag
*)q6
;
631 * Handle ECN by comparing this segment with the first one;
632 * if CE is set, do not lose CE.
633 * drop if CE and not-ECT are mixed for the same packet.
635 ecn
= (ntohl(ip6
->ip6_flow
) >> 20) & IPTOS_ECN_MASK
;
637 if (ecn
== IPTOS_ECN_CE
) {
638 if (ecn0
== IPTOS_ECN_NOTECT
) {
642 if (ecn0
!= IPTOS_ECN_CE
) {
643 q6
->ip6q_ecn
= IPTOS_ECN_CE
;
646 if (ecn
== IPTOS_ECN_NOTECT
&& ecn0
!= IPTOS_ECN_NOTECT
) {
652 * Find a segment which begins after this one does.
654 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
655 af6
= af6
->ip6af_down
) {
656 if (af6
->ip6af_off
> ip6af
->ip6af_off
) {
662 * As per RFC 8200 reassembly rules, we MUST drop the entire
663 * chain of fragments for a packet to be assembled, if we receive
664 * any overlapping fragments.
665 * https://tools.ietf.org/html/rfc8200#page-20
667 * To avoid more conditional code, just reuse frag6_freef and defer
668 * its call to post fragment insertion in the queue.
670 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
671 if (af6
->ip6af_up
->ip6af_off
== ip6af
->ip6af_off
) {
672 if (af6
->ip6af_up
->ip6af_frglen
!= ip6af
->ip6af_frglen
) {
676 * XXX Ideally we should be comparing the entire
677 * packet here but for now just use off and fraglen
678 * to ignore a duplicate fragment.
684 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
692 if (af6
!= (struct ip6asfrag
*)q6
) {
694 * Given that we break when af6->ip6af_off > ip6af->ip6af_off,
695 * we shouldn't need a check for duplicate fragment here.
696 * For now just assert.
698 VERIFY(af6
->ip6af_off
!= ip6af
->ip6af_off
);
699 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
706 * If this fragment contains similar checksum offload info
707 * as that of the existing ones, accumulate checksum. Otherwise,
708 * invalidate checksum offload info for the entire datagram.
710 if (csum_flags
!= 0 && csum_flags
== q6
->ip6q_csum_flags
) {
711 q6
->ip6q_csum
+= csum
;
712 } else if (q6
->ip6q_csum_flags
!= 0) {
713 q6
->ip6q_csum_flags
= 0;
718 * Stick new segment in its place;
719 * check for complete reassembly.
720 * Move to front of packet queue, as we are
721 * the most recently active fragmented packet.
723 frag6_enq(ip6af
, af6
->ip6af_up
);
728 * This holds true, when we receive overlapping fragments.
729 * We must silently drop all the fragments we have received
731 * Also mark q6 as dirty, so as to not add any new fragments to it.
732 * Make sure even q6 marked dirty is kept till timer expires for
733 * reassembly and when that happens, silenty get rid of q6
736 struct fq6_head dfq6
= {0};
737 MBUFQ_INIT(&dfq6
); /* for deferred frees */
738 q6
->ip6q_flags
|= IP6QF_DIRTY
;
739 /* Purge all the fragments but do not free q6 */
740 frag6_purgef(q6
, &dfq6
, NULL
);
743 /* free fragments that need to be freed */
744 if (!MBUFQ_EMPTY(&dfq6
)) {
747 VERIFY(MBUFQ_EMPTY(&dfq6
));
749 * Just in case the above logic got anything added
751 * Please note that these mbufs are not present in the
752 * fragment queue and are added to diq6 for sending
754 * Given that the current fragment was an overlapping
755 * fragment and the RFC requires us to not send any
756 * ICMPv6 errors while purging the entire queue.
759 if (!MBUFQ_EMPTY(&diq6
)) {
762 VERIFY(MBUFQ_EMPTY(&diq6
));
764 * MBUFQ_DRAIN would have drained all the mbufs
765 * in the fragment queue.
766 * This shouldn't be needed as we are returning IPPROTO_DONE
767 * from here but change the passed mbuf pointer to NULL.
770 lck_mtx_unlock(&ip6qlock
);
774 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
775 af6
= af6
->ip6af_down
) {
776 if (af6
->ip6af_off
!= next
) {
777 lck_mtx_unlock(&ip6qlock
);
782 next
+= af6
->ip6af_frglen
;
784 if (af6
->ip6af_up
->ip6af_mff
) {
785 lck_mtx_unlock(&ip6qlock
);
792 * Reassembly is complete; concatenate fragments.
794 ip6af
= q6
->ip6q_down
;
795 t
= m
= IP6_REASS_MBUF(ip6af
);
796 af6
= ip6af
->ip6af_down
;
798 while (af6
!= (struct ip6asfrag
*)q6
) {
799 af6dwn
= af6
->ip6af_down
;
804 t
->m_next
= IP6_REASS_MBUF(af6
);
805 m_adj(t
->m_next
, af6
->ip6af_offset
);
811 * Store partial hardware checksum info from the fragment queue;
812 * the receive start offset is set to 40 bytes (see code at the
813 * top of this routine.)
815 if (q6
->ip6q_csum_flags
!= 0) {
816 csum
= q6
->ip6q_csum
;
820 m
->m_pkthdr
.csum_rx_val
= (u_int16_t
)csum
;
821 m
->m_pkthdr
.csum_rx_start
= sizeof(struct ip6_hdr
);
822 m
->m_pkthdr
.csum_flags
= q6
->ip6q_csum_flags
;
823 } else if ((m
->m_pkthdr
.rcvif
->if_flags
& IFF_LOOPBACK
) ||
824 (m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
825 /* loopback checksums are always OK */
826 m
->m_pkthdr
.csum_data
= 0xffff;
827 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
830 /* adjust offset to point where the original next header starts */
831 offset
= ip6af
->ip6af_offset
- sizeof(struct ip6_frag
);
833 ip6
= mtod(m
, struct ip6_hdr
*);
834 ip6
->ip6_plen
= htons((uint16_t)(next
+ offset
- sizeof(struct ip6_hdr
)));
835 ip6
->ip6_src
= q6
->ip6q_src
;
836 ip6
->ip6_dst
= q6
->ip6q_dst
;
837 if (q6
->ip6q_ecn
== IPTOS_ECN_CE
) {
838 ip6
->ip6_flow
|= htonl(IPTOS_ECN_CE
<< 20);
843 *q6
->ip6q_nxtp
= (u_char
)(nxt
& 0xff);
846 /* Delete frag6 header */
847 if (m
->m_len
>= offset
+ sizeof(struct ip6_frag
)) {
848 /* This is the only possible case with !PULLDOWN_TEST */
849 ovbcopy((caddr_t
)ip6
, (caddr_t
)ip6
+ sizeof(struct ip6_frag
),
851 m
->m_data
+= sizeof(struct ip6_frag
);
852 m
->m_len
-= sizeof(struct ip6_frag
);
854 /* this comes with no copy if the boundary is on cluster */
855 if ((t
= m_split(m
, offset
, M_DONTWAIT
)) == NULL
) {
857 frag6_nfragpackets
--;
858 frag6_nfrags
-= q6
->ip6q_nfrag
;
862 m_adj(t
, sizeof(struct ip6_frag
));
867 * Store NXT to the original.
870 char *prvnxtp
= ip6_get_prevhdr(m
, offset
); /* XXX */
875 frag6_nfragpackets
--;
876 frag6_nfrags
-= q6
->ip6q_nfrag
;
879 if (m
->m_flags
& M_PKTHDR
) { /* Isn't it always true? */
882 * Mark packet as reassembled
883 * In ICMPv6 processing, we drop certain
884 * NDP messages that are not expected to
885 * have fragment header based on recommendations
886 * against security vulnerability as described in
889 m
->m_pkthdr
.pkt_flags
|= PKTF_REASSEMBLED
;
891 ip6stat
.ip6s_reassembled
++;
894 * Tell launch routine the next header
899 /* arm the purge timer if not already and if there's work to do */
900 frag6_sched_timeout();
901 lck_mtx_unlock(&ip6qlock
);
902 in6_ifstat_inc(dstifp
, ifs6_reass_ok
);
903 frag6_icmp6_paramprob_error(&diq6
);
904 VERIFY(MBUFQ_EMPTY(&diq6
));
911 if (frag6_nfragpackets
== 0) {
912 frag6_icmp6_paramprob_error(&diq6
);
913 VERIFY(MBUFQ_EMPTY(&diq6
));
916 lck_mtx_lock(&ip6qlock
);
918 /* arm the purge timer if not already and if there's work to do */
919 frag6_sched_timeout();
920 lck_mtx_unlock(&ip6qlock
);
921 frag6_icmp6_paramprob_error(&diq6
);
922 VERIFY(MBUFQ_EMPTY(&diq6
));
926 ip6stat
.ip6s_fragdropped
++;
927 /* arm the purge timer if not already and if there's work to do */
928 frag6_sched_timeout();
929 lck_mtx_unlock(&ip6qlock
);
930 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
933 frag6_icmp6_paramprob_error(&diq6
);
934 VERIFY(MBUFQ_EMPTY(&diq6
));
939 * This routine removes the enqueued frames from the passed fragment
940 * header and enqueues those to dfq6 which is an out-arg for the dequeued
942 * If the caller also provides diq6, this routine also enqueues the 0 offset
943 * fragment to that list as it potentially gets used by the caller
944 * to prepare the relevant ICMPv6 error message (time exceeded or
946 * It leaves the fragment header object (q6) intact.
949 frag6_purgef(struct ip6q
*q6
, struct fq6_head
*dfq6
, struct fq6_head
*diq6
)
951 struct ip6asfrag
*af6
= NULL
;
952 struct ip6asfrag
*down6
= NULL
;
954 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
956 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
958 struct mbuf
*m
= IP6_REASS_MBUF(af6
);
960 down6
= af6
->ip6af_down
;
964 * If caller wants to generate ICMP time-exceeded,
965 * as indicated by the argument diq6, return it for
966 * the first fragment and add others to the fragment
969 if (af6
->ip6af_off
== 0 && diq6
!= NULL
) {
973 ip6
= mtod(m
, struct ip6_hdr
*);
975 /* restore source and destination addresses */
976 ip6
->ip6_src
= q6
->ip6q_src
;
977 ip6
->ip6_dst
= q6
->ip6q_dst
;
978 MBUFQ_ENQUEUE(diq6
, m
);
980 MBUFQ_ENQUEUE(dfq6
, m
);
987 * This routine removes the enqueued frames from the passed fragment
988 * header and enqueues those to dfq6 which is an out-arg for the dequeued
990 * If the caller also provides diq6, this routine also enqueues the 0 offset
991 * fragment to that list as it potentially gets used by the caller
992 * to prepare the relevant ICMPv6 error message (time exceeded or
994 * It also remove the fragment header object from the queue and frees it.
997 frag6_freef(struct ip6q
*q6
, struct fq6_head
*dfq6
, struct fq6_head
*diq6
)
999 frag6_purgef(q6
, dfq6
, diq6
);
1001 frag6_nfragpackets
--;
1002 frag6_nfrags
-= q6
->ip6q_nfrag
;
1007 * Put an ip fragment on a reassembly chain.
1008 * Like insque, but pointers in middle of structure.
1011 frag6_enq(struct ip6asfrag
*af6
, struct ip6asfrag
*up6
)
1013 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1015 af6
->ip6af_up
= up6
;
1016 af6
->ip6af_down
= up6
->ip6af_down
;
1017 up6
->ip6af_down
->ip6af_up
= af6
;
1018 up6
->ip6af_down
= af6
;
1022 * To frag6_enq as remque is to insque.
1025 frag6_deq(struct ip6asfrag
*af6
)
1027 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1029 af6
->ip6af_up
->ip6af_down
= af6
->ip6af_down
;
1030 af6
->ip6af_down
->ip6af_up
= af6
->ip6af_up
;
1034 frag6_insque(struct ip6q
*new, struct ip6q
*old
)
1036 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1038 new->ip6q_prev
= old
;
1039 new->ip6q_next
= old
->ip6q_next
;
1040 old
->ip6q_next
->ip6q_prev
= new;
1041 old
->ip6q_next
= new;
1045 frag6_remque(struct ip6q
*p6
)
1047 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1049 p6
->ip6q_prev
->ip6q_next
= p6
->ip6q_next
;
1050 p6
->ip6q_next
->ip6q_prev
= p6
->ip6q_prev
;
1054 * IPv6 reassembling timer processing;
1055 * if a timer expires on a reassembly
1056 * queue, discard it.
1059 frag6_timeout(void *arg
)
1062 struct fq6_head dfq6
, diq6
;
1063 struct fq6_head
*diq6_tmp
= NULL
;
1066 MBUFQ_INIT(&dfq6
); /* for deferred frees */
1067 MBUFQ_INIT(&diq6
); /* for deferred ICMP time exceeded errors */
1070 * Update coarse-grained networking timestamp (in sec.); the idea
1071 * is to piggy-back on the timeout callout to update the counter
1072 * returnable via net_uptime().
1074 net_update_uptime();
1076 lck_mtx_lock(&ip6qlock
);
1077 q6
= ip6q
.ip6q_next
;
1079 while (q6
!= &ip6q
) {
1082 if (q6
->ip6q_prev
->ip6q_ttl
== 0) {
1083 ip6stat
.ip6s_fragtimeout
++;
1084 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1086 * Avoid sending ICMPv6 Time Exceeded for fragment headers
1087 * that are marked dirty.
1089 diq6_tmp
= (q6
->ip6q_prev
->ip6q_flags
& IP6QF_DIRTY
) ?
1091 frag6_freef(q6
->ip6q_prev
, &dfq6
, diq6_tmp
);
1096 * If we are over the maximum number of fragments
1097 * (due to the limit being lowered), drain off
1098 * enough to get down to the new limit.
1100 if (ip6_maxfragpackets
>= 0) {
1101 while (frag6_nfragpackets
> (unsigned)ip6_maxfragpackets
&&
1103 ip6stat
.ip6s_fragoverflow
++;
1104 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1106 * Avoid sending ICMPv6 Time Exceeded for fragment headers
1107 * that are marked dirty.
1109 diq6_tmp
= (ip6q
.ip6q_prev
->ip6q_flags
& IP6QF_DIRTY
) ?
1111 frag6_freef(ip6q
.ip6q_prev
, &dfq6
, diq6_tmp
);
1114 /* re-arm the purge timer if there's work to do */
1115 frag6_timeout_run
= 0;
1116 frag6_sched_timeout();
1117 lck_mtx_unlock(&ip6qlock
);
1119 /* free fragments that need to be freed */
1120 if (!MBUFQ_EMPTY(&dfq6
)) {
1124 frag6_icmp6_timeex_error(&diq6
);
1126 VERIFY(MBUFQ_EMPTY(&dfq6
));
1127 VERIFY(MBUFQ_EMPTY(&diq6
));
1131 frag6_sched_timeout(void)
1133 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1135 if (!frag6_timeout_run
&& frag6_nfragpackets
> 0) {
1136 frag6_timeout_run
= 1;
1137 timeout(frag6_timeout
, NULL
, hz
);
1142 * Drain off all datagram fragments.
1147 struct fq6_head dfq6
, diq6
;
1148 struct fq6_head
*diq6_tmp
= NULL
;
1150 MBUFQ_INIT(&dfq6
); /* for deferred frees */
1151 MBUFQ_INIT(&diq6
); /* for deferred ICMP time exceeded errors */
1153 lck_mtx_lock(&ip6qlock
);
1154 while (ip6q
.ip6q_next
!= &ip6q
) {
1155 ip6stat
.ip6s_fragdropped
++;
1156 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1158 * Avoid sending ICMPv6 Time Exceeded for fragment headers
1159 * that are marked dirty.
1161 diq6_tmp
= (ip6q
.ip6q_next
->ip6q_flags
& IP6QF_DIRTY
) ?
1163 frag6_freef(ip6q
.ip6q_next
, &dfq6
, diq6_tmp
);
1165 lck_mtx_unlock(&ip6qlock
);
1167 /* free fragments that need to be freed */
1168 if (!MBUFQ_EMPTY(&dfq6
)) {
1172 frag6_icmp6_timeex_error(&diq6
);
1174 VERIFY(MBUFQ_EMPTY(&dfq6
));
1175 VERIFY(MBUFQ_EMPTY(&diq6
));
1178 static struct ip6q
*
1185 * See comments in ip6q_updateparams(). Keep the count separate
1186 * from frag6_nfragpackets since the latter represents the elements
1187 * already in the reassembly queues.
1189 if (ip6q_limit
> 0 && ip6q_count
> ip6q_limit
) {
1193 t
= m_get(how
, MT_FTABLE
);
1195 atomic_add_32(&ip6q_count
, 1);
1196 q6
= mtod(t
, struct ip6q
*);
1197 bzero(q6
, sizeof(*q6
));
1205 ip6q_free(struct ip6q
*q6
)
1207 (void) m_free(dtom(q6
));
1208 atomic_add_32(&ip6q_count
, -1);
1211 static struct ip6asfrag
*
1212 ip6af_alloc(int how
)
1215 struct ip6asfrag
*af6
;
1218 * See comments in ip6q_updateparams(). Keep the count separate
1219 * from frag6_nfrags since the latter represents the elements
1220 * already in the reassembly queues.
1222 if (ip6af_limit
> 0 && ip6af_count
> ip6af_limit
) {
1226 t
= m_get(how
, MT_FTABLE
);
1228 atomic_add_32(&ip6af_count
, 1);
1229 af6
= mtod(t
, struct ip6asfrag
*);
1230 bzero(af6
, sizeof(*af6
));
1238 ip6af_free(struct ip6asfrag
*af6
)
1240 (void) m_free(dtom(af6
));
1241 atomic_add_32(&ip6af_count
, -1);
1245 ip6q_updateparams(void)
1247 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1249 * -1 for unlimited allocation.
1251 if (ip6_maxfragpackets
< 0) {
1254 if (ip6_maxfrags
< 0) {
1258 * Positive number for specific bound.
1260 if (ip6_maxfragpackets
> 0) {
1261 ip6q_limit
= ip6_maxfragpackets
;
1263 if (ip6_maxfrags
> 0) {
1264 ip6af_limit
= ip6_maxfrags
;
1267 * Zero specifies no further fragment queue allocation -- set the
1268 * bound very low, but rely on implementation elsewhere to actually
1269 * prevent allocation and reclaim current queues.
1271 if (ip6_maxfragpackets
== 0) {
1274 if (ip6_maxfrags
== 0) {
1278 * Arm the purge timer if not already and if there's work to do
1280 frag6_sched_timeout();
1284 sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
1286 #pragma unused(arg1, arg2)
1289 lck_mtx_lock(&ip6qlock
);
1290 i
= ip6_maxfragpackets
;
1291 error
= sysctl_handle_int(oidp
, &i
, 0, req
);
1292 if (error
|| req
->newptr
== USER_ADDR_NULL
) {
1296 if (i
< -1 || i
> (nmbclusters
/ 4)) {
1300 ip6_maxfragpackets
= i
;
1301 ip6q_updateparams();
1303 lck_mtx_unlock(&ip6qlock
);
1308 sysctl_maxfrags SYSCTL_HANDLER_ARGS
1310 #pragma unused(arg1, arg2)
1313 lck_mtx_lock(&ip6qlock
);
1315 error
= sysctl_handle_int(oidp
, &i
, 0, req
);
1316 if (error
|| req
->newptr
== USER_ADDR_NULL
) {
1320 if (i
< -1 || i
> (nmbclusters
/ 4)) {
1325 ip6q_updateparams(); /* see if we need to arm timer */
1327 lck_mtx_unlock(&ip6qlock
);