2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */
30 /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/malloc.h>
64 #include <sys/mcache.h>
66 #include <sys/domain.h>
67 #include <sys/protosw.h>
68 #include <sys/socket.h>
69 #include <sys/errno.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <kern/queue.h>
74 #include <kern/locks.h>
77 #include <net/route.h>
79 #include <netinet/in.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip_var.h>
83 #include <netinet/ip6.h>
84 #include <netinet6/ip6_var.h>
85 #include <netinet/icmp6.h>
87 #include <net/net_osdep.h>
88 #include <dev/random/randomdev.h>
91 * Define it to get a correct behavior on per-interface statistics.
93 #define IN6_IFSTAT_STRICT
97 static void frag6_save_context(struct mbuf
*, int);
98 static void frag6_scrub_context(struct mbuf
*);
99 static int frag6_restore_context(struct mbuf
*);
101 static void frag6_icmp6_paramprob_error(struct fq6_head
*);
102 static void frag6_icmp6_timeex_error(struct fq6_head
*);
104 static void frag6_enq(struct ip6asfrag
*, struct ip6asfrag
*);
105 static void frag6_deq(struct ip6asfrag
*);
106 static void frag6_insque(struct ip6q
*, struct ip6q
*);
107 static void frag6_remque(struct ip6q
*);
108 static void frag6_freef(struct ip6q
*, struct fq6_head
*, struct fq6_head
*);
110 static int frag6_timeout_run
; /* frag6 timer is scheduled to run */
111 static void frag6_timeout(void *);
112 static void frag6_sched_timeout(void);
114 static struct ip6q
*ip6q_alloc(int);
115 static void ip6q_free(struct ip6q
*);
116 static void ip6q_updateparams(void);
117 static struct ip6asfrag
*ip6af_alloc(int);
118 static void ip6af_free(struct ip6asfrag
*);
120 decl_lck_mtx_data(static, ip6qlock
);
121 static lck_attr_t
*ip6qlock_attr
;
122 static lck_grp_t
*ip6qlock_grp
;
123 static lck_grp_attr_t
*ip6qlock_grp_attr
;
125 /* IPv6 fragment reassembly queues (protected by ip6qlock) */
126 static struct ip6q ip6q
; /* ip6 reassembly queues */
127 static int ip6_maxfragpackets
; /* max packets in reass queues */
128 static u_int32_t frag6_nfragpackets
; /* # of packets in reass queues */
129 static int ip6_maxfrags
; /* max fragments in reass queues */
130 static u_int32_t frag6_nfrags
; /* # of fragments in reass queues */
131 static u_int32_t ip6q_limit
; /* ip6q allocation limit */
132 static u_int32_t ip6q_count
; /* current # of allocated ip6q's */
133 static u_int32_t ip6af_limit
; /* ip6asfrag allocation limit */
134 static u_int32_t ip6af_count
; /* current # of allocated ip6asfrag's */
136 static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
;
137 static int sysctl_maxfrags SYSCTL_HANDLER_ARGS
;
139 SYSCTL_DECL(_net_inet6_ip6
);
141 SYSCTL_PROC(_net_inet6_ip6
, IPV6CTL_MAXFRAGPACKETS
, maxfragpackets
,
142 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &ip6_maxfragpackets
, 0,
143 sysctl_maxfragpackets
, "I",
144 "Maximum number of IPv6 fragment reassembly queue entries");
146 SYSCTL_UINT(_net_inet6_ip6
, OID_AUTO
, fragpackets
,
147 CTLFLAG_RD
| CTLFLAG_LOCKED
, &frag6_nfragpackets
, 0,
148 "Current number of IPv6 fragment reassembly queue entries");
150 SYSCTL_PROC(_net_inet6_ip6
, IPV6CTL_MAXFRAGS
, maxfrags
,
151 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
, &ip6_maxfrags
, 0,
152 sysctl_maxfrags
, "I", "Maximum number of IPv6 fragments allowed");
155 * Initialise reassembly queue and fragment identifier.
160 /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */
161 _CASSERT(sizeof (struct ip6q
) <= _MLEN
);
162 /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */
163 _CASSERT(sizeof (struct ip6asfrag
) <= _MLEN
);
165 /* IPv6 fragment reassembly queue lock */
166 ip6qlock_grp_attr
= lck_grp_attr_alloc_init();
167 ip6qlock_grp
= lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr
);
168 ip6qlock_attr
= lck_attr_alloc_init();
169 lck_mtx_init(&ip6qlock
, ip6qlock_grp
, ip6qlock_attr
);
171 lck_mtx_lock(&ip6qlock
);
172 /* Initialize IPv6 reassembly queue. */
173 ip6q
.ip6q_next
= ip6q
.ip6q_prev
= &ip6q
;
175 /* same limits as IPv4 */
176 ip6_maxfragpackets
= nmbclusters
/ 32;
177 ip6_maxfrags
= ip6_maxfragpackets
* 2;
179 lck_mtx_unlock(&ip6qlock
);
183 frag6_save_context(struct mbuf
*m
, int val
)
185 m
->m_pkthdr
.pkt_hdr
= (void *)(uintptr_t)val
;
189 frag6_scrub_context(struct mbuf
*m
)
191 m
->m_pkthdr
.pkt_hdr
= NULL
;
195 frag6_restore_context(struct mbuf
*m
)
197 return ((int)m
->m_pkthdr
.pkt_hdr
);
201 * Send any deferred ICMP param problem error messages; caller must not be
202 * holding ip6qlock and is expected to have saved the per-packet parameter
203 * value via frag6_save_context().
206 frag6_icmp6_paramprob_error(struct fq6_head
*diq6
)
208 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_NOTOWNED
);
210 if (!MBUFQ_EMPTY(diq6
)) {
211 struct mbuf
*merr
, *merr_tmp
;
213 MBUFQ_FOREACH_SAFE(merr
, diq6
, merr_tmp
) {
214 MBUFQ_REMOVE(diq6
, merr
);
215 MBUFQ_NEXT(merr
) = NULL
;
216 param
= frag6_restore_context(merr
);
217 frag6_scrub_context(merr
);
218 icmp6_error(merr
, ICMP6_PARAM_PROB
,
219 ICMP6_PARAMPROB_HEADER
, param
);
225 * Send any deferred ICMP time exceeded error messages;
226 * caller must not be holding ip6qlock.
229 frag6_icmp6_timeex_error(struct fq6_head
*diq6
)
231 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_NOTOWNED
);
233 if (!MBUFQ_EMPTY(diq6
)) {
234 struct mbuf
*m
, *m_tmp
;
235 MBUFQ_FOREACH_SAFE(m
, diq6
, m_tmp
) {
236 MBUFQ_REMOVE(diq6
, m
);
237 MBUFQ_NEXT(m
) = NULL
;
238 icmp6_error_flag(m
, ICMP6_TIME_EXCEEDED
,
239 ICMP6_TIME_EXCEED_REASSEMBLY
, 0, 0);
245 * In RFC2460, fragment and reassembly rule do not agree with each other,
246 * in terms of next header field handling in fragment header.
247 * While the sender will use the same value for all of the fragmented packets,
248 * receiver is suggested not to check the consistency.
250 * fragment rule (p20):
251 * (2) A Fragment header containing:
252 * The Next Header value that identifies the first header of
253 * the Fragmentable Part of the original packet.
254 * -> next header field is same for all fragments
256 * reassembly rule (p21):
257 * The Next Header field of the last header of the Unfragmentable
258 * Part is obtained from the Next Header field of the first
259 * fragment's Fragment header.
260 * -> should grab it from the first fragment only
262 * The following note also contradicts with fragment rule - noone is going to
263 * send different fragment with different next header field.
265 * additional note (p22):
266 * The Next Header values in the Fragment headers of different
267 * fragments of the same original packet may differ. Only the value
268 * from the Offset zero fragment packet is used for reassembly.
269 * -> should grab it from the first fragment only
271 * There is no explicit reason given in the RFC. Historical reason maybe?
277 frag6_input(struct mbuf
**mp
, int *offp
, int proto
)
279 #pragma unused(proto)
280 struct mbuf
*m
= *mp
, *t
;
282 struct ip6_frag
*ip6f
;
284 struct ip6asfrag
*af6
, *ip6af
, *af6dwn
;
285 int offset
= *offp
, nxt
, i
, next
;
287 int fragoff
, frgpartlen
; /* must be larger than u_int16_t */
288 struct ifnet
*dstifp
= NULL
;
290 uint32_t csum
, csum_flags
;
291 struct fq6_head diq6
;
294 VERIFY(m
->m_flags
& M_PKTHDR
);
296 MBUFQ_INIT(&diq6
); /* for deferred ICMP param problem errors */
298 /* Expect 32-bit aligned data pointer on strict-align platforms */
299 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m
);
301 ip6
= mtod(m
, struct ip6_hdr
*);
302 IP6_EXTHDR_CHECK(m
, offset
, sizeof(struct ip6_frag
), goto done
);
303 ip6f
= (struct ip6_frag
*)((caddr_t
)ip6
+ offset
);
305 #ifdef IN6_IFSTAT_STRICT
306 /* find the destination interface of the packet. */
307 if (m
->m_pkthdr
.pkt_flags
& PKTF_IFAINFO
) {
310 if (ip6_getdstifaddr_info(m
, &idx
, NULL
) == 0) {
311 if (idx
> 0 && idx
<= if_index
) {
312 ifnet_head_lock_shared();
313 dstifp
= ifindex2ifnet
[idx
];
318 #endif /* IN6_IFSTAT_STRICT */
320 /* we are violating the spec, this may not be the dst interface */
322 dstifp
= m
->m_pkthdr
.rcvif
;
324 /* jumbo payload can't contain a fragment header */
325 if (ip6
->ip6_plen
== 0) {
326 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
, offset
);
327 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
333 * check whether fragment packet's fragment length is
334 * multiple of 8 octets.
335 * sizeof(struct ip6_frag) == 8
336 * sizeof(struct ip6_hdr) = 40
338 if ((ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
) &&
339 (((ntohs(ip6
->ip6_plen
) - offset
) & 0x7) != 0)) {
340 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
341 offsetof(struct ip6_hdr
, ip6_plen
));
342 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
347 /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */
348 if (ip6_maxfragpackets
== 0 || ip6_maxfrags
== 0) {
349 ip6stat
.ip6s_fragments
++;
350 ip6stat
.ip6s_fragdropped
++;
351 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
357 /* offset now points to data portion */
358 offset
+= sizeof(struct ip6_frag
);
361 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
362 * upfront, unrelated to any reassembly. Just skip the fragment header.
364 if ((ip6f
->ip6f_offlg
& ~IP6F_RESERVED_MASK
) == 0) {
366 * In ICMPv6 processing, we drop certain
367 * NDP messages that are not expected to
368 * have fragment header based on recommendations
369 * against security vulnerability as described in
371 * We set PKTF_REASSEMBLED flag to let ICMPv6 NDP
373 * However there are already devices running software
374 * that are creating interface with MTU < IPv6 Min
375 * MTU. We should not have allowed that but they are
376 * out, and sending atomic NDP fragments.
377 * For that reason, we do not set the same flag here
378 * and relax the check.
380 ip6stat
.ip6s_atmfrag_rcvd
++;
381 in6_ifstat_inc(dstifp
, ifs6_atmfrag_rcvd
);
383 return (ip6f
->ip6f_nxt
);
387 * Leverage partial checksum offload for simple UDP/IP fragments,
388 * as that is the most common case.
390 * Perform 1's complement adjustment of octets that got included/
391 * excluded in the hardware-calculated checksum value. Also take
392 * care of any trailing bytes and subtract out their partial sum.
394 if (ip6f
->ip6f_nxt
== IPPROTO_UDP
&&
395 offset
== (sizeof (*ip6
) + sizeof (*ip6f
)) &&
396 (m
->m_pkthdr
.csum_flags
&
397 (CSUM_DATA_VALID
| CSUM_PARTIAL
| CSUM_PSEUDO_HDR
)) ==
398 (CSUM_DATA_VALID
| CSUM_PARTIAL
)) {
399 uint32_t start
= m
->m_pkthdr
.csum_rx_start
;
400 uint32_t ip_len
= (sizeof (*ip6
) + ntohs(ip6
->ip6_plen
));
401 int32_t trailer
= (m_pktlen(m
) - ip_len
);
402 uint32_t swbytes
= (uint32_t)trailer
;
404 csum
= m
->m_pkthdr
.csum_rx_val
;
406 ASSERT(trailer
>= 0);
407 if (start
!= offset
|| trailer
!= 0) {
408 uint16_t s
= 0, d
= 0;
410 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
)) {
411 s
= ip6
->ip6_src
.s6_addr16
[1];
412 ip6
->ip6_src
.s6_addr16
[1] = 0 ;
414 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
)) {
415 d
= ip6
->ip6_dst
.s6_addr16
[1];
416 ip6
->ip6_dst
.s6_addr16
[1] = 0;
419 /* callee folds in sum */
420 csum
= m_adj_sum16(m
, start
, offset
,
421 (ip_len
- offset
), csum
);
423 swbytes
+= (offset
- start
);
425 swbytes
+= (start
- offset
);
427 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_src
))
428 ip6
->ip6_src
.s6_addr16
[1] = s
;
429 if (IN6_IS_SCOPE_EMBED(&ip6
->ip6_dst
))
430 ip6
->ip6_dst
.s6_addr16
[1] = d
;
433 csum_flags
= m
->m_pkthdr
.csum_flags
;
436 udp_in6_cksum_stats(swbytes
);
444 /* Invalidate checksum */
445 m
->m_pkthdr
.csum_flags
&= ~CSUM_DATA_VALID
;
447 ip6stat
.ip6s_fragments
++;
448 in6_ifstat_inc(dstifp
, ifs6_reass_reqd
);
450 lck_mtx_lock(&ip6qlock
);
453 for (q6
= ip6q
.ip6q_next
; q6
!= &ip6q
; q6
= q6
->ip6q_next
)
454 if (ip6f
->ip6f_ident
== q6
->ip6q_ident
&&
455 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_src
, &q6
->ip6q_src
) &&
456 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, &q6
->ip6q_dst
))
461 * the first fragment to arrive, create a reassembly queue.
465 q6
= ip6q_alloc(M_DONTWAIT
);
469 frag6_insque(q6
, &ip6q
);
470 frag6_nfragpackets
++;
472 /* ip6q_nxt will be filled afterwards, from 1st fragment */
473 q6
->ip6q_down
= q6
->ip6q_up
= (struct ip6asfrag
*)q6
;
475 q6
->ip6q_nxtp
= (u_char
*)nxtp
;
477 q6
->ip6q_ident
= ip6f
->ip6f_ident
;
478 q6
->ip6q_ttl
= IPV6_FRAGTTL
;
479 q6
->ip6q_src
= ip6
->ip6_src
;
480 q6
->ip6q_dst
= ip6
->ip6_dst
;
482 (ntohl(ip6
->ip6_flow
) >> 20) & IPTOS_ECN_MASK
;
483 q6
->ip6q_unfrglen
= -1; /* The 1st fragment has not arrived. */
488 * If the first fragment has valid checksum offload
489 * info, the rest of fragments are eligible as well.
491 if (csum_flags
!= 0) {
492 q6
->ip6q_csum
= csum
;
493 q6
->ip6q_csum_flags
= csum_flags
;
498 * If it's the 1st fragment, record the length of the
499 * unfragmentable part and the next header of the fragment header.
501 fragoff
= ntohs(ip6f
->ip6f_offlg
& IP6F_OFF_MASK
);
503 q6
->ip6q_unfrglen
= offset
- sizeof(struct ip6_hdr
) -
504 sizeof(struct ip6_frag
);
505 q6
->ip6q_nxt
= ip6f
->ip6f_nxt
;
509 * Check that the reassembled packet would not exceed 65535 bytes
511 * If it would exceed, discard the fragment and return an ICMP error.
513 frgpartlen
= sizeof(struct ip6_hdr
) + ntohs(ip6
->ip6_plen
) - offset
;
514 if (q6
->ip6q_unfrglen
>= 0) {
515 /* The 1st fragment has already arrived. */
516 if (q6
->ip6q_unfrglen
+ fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
517 lck_mtx_unlock(&ip6qlock
);
519 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
520 offset
- sizeof(struct ip6_frag
) +
521 offsetof(struct ip6_frag
, ip6f_offlg
));
525 } else if (fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
526 lck_mtx_unlock(&ip6qlock
);
528 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
529 offset
- sizeof(struct ip6_frag
) +
530 offsetof(struct ip6_frag
, ip6f_offlg
));
535 * If it's the first fragment, do the above check for each
536 * fragment already stored in the reassembly queue.
539 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
541 af6dwn
= af6
->ip6af_down
;
543 if (q6
->ip6q_unfrglen
+ af6
->ip6af_off
+ af6
->ip6af_frglen
>
545 struct mbuf
*merr
= IP6_REASS_MBUF(af6
);
546 struct ip6_hdr
*ip6err
;
547 int erroff
= af6
->ip6af_offset
;
549 /* dequeue the fragment. */
553 /* adjust pointer. */
554 ip6err
= mtod(merr
, struct ip6_hdr
*);
557 * Restore source and destination addresses
558 * in the erroneous IPv6 header.
560 ip6err
->ip6_src
= q6
->ip6q_src
;
561 ip6err
->ip6_dst
= q6
->ip6q_dst
;
563 frag6_save_context(merr
,
564 erroff
- sizeof (struct ip6_frag
) +
565 offsetof(struct ip6_frag
, ip6f_offlg
));
567 MBUFQ_ENQUEUE(&diq6
, merr
);
572 ip6af
= ip6af_alloc(M_DONTWAIT
);
576 ip6af
->ip6af_mff
= ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
;
577 ip6af
->ip6af_off
= fragoff
;
578 ip6af
->ip6af_frglen
= frgpartlen
;
579 ip6af
->ip6af_offset
= offset
;
580 IP6_REASS_MBUF(ip6af
) = m
;
583 af6
= (struct ip6asfrag
*)q6
;
588 * Handle ECN by comparing this segment with the first one;
589 * if CE is set, do not lose CE.
590 * drop if CE and not-ECT are mixed for the same packet.
592 ecn
= (ntohl(ip6
->ip6_flow
) >> 20) & IPTOS_ECN_MASK
;
594 if (ecn
== IPTOS_ECN_CE
) {
595 if (ecn0
== IPTOS_ECN_NOTECT
) {
599 if (ecn0
!= IPTOS_ECN_CE
)
600 q6
->ip6q_ecn
= IPTOS_ECN_CE
;
602 if (ecn
== IPTOS_ECN_NOTECT
&& ecn0
!= IPTOS_ECN_NOTECT
) {
608 * Find a segment which begins after this one does.
610 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
611 af6
= af6
->ip6af_down
)
612 if (af6
->ip6af_off
> ip6af
->ip6af_off
)
617 * If there is a preceding segment, it may provide some of
618 * our data already. If so, drop the data from the incoming
619 * segment. If it provides all of our data, drop us.
621 * If some of the data is dropped from the preceding
622 * segment, then it's checksum is invalidated.
624 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
625 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
628 if (i
>= ip6af
->ip6af_frglen
)
630 m_adj(IP6_REASS_MBUF(ip6af
), i
);
631 q6
->ip6q_csum_flags
= 0;
632 ip6af
->ip6af_off
+= i
;
633 ip6af
->ip6af_frglen
-= i
;
638 * While we overlap succeeding segments trim them or,
639 * if they are completely covered, dequeue them.
641 while (af6
!= (struct ip6asfrag
*)q6
&&
642 ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
> af6
->ip6af_off
) {
643 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
644 if (i
< af6
->ip6af_frglen
) {
645 af6
->ip6af_frglen
-= i
;
647 m_adj(IP6_REASS_MBUF(af6
), i
);
648 q6
->ip6q_csum_flags
= 0;
651 af6
= af6
->ip6af_down
;
652 m_freem(IP6_REASS_MBUF(af6
->ip6af_up
));
653 frag6_deq(af6
->ip6af_up
);
657 * If the incoming framgent overlaps some existing fragments in
658 * the reassembly queue, drop it, since it is dangerous to override
659 * existing fragments from a security point of view.
660 * We don't know which fragment is the bad guy - here we trust
661 * fragment that came in earlier, with no real reason.
663 * Note: due to changes after disabling this part, mbuf passed to
664 * m_adj() below now does not meet the requirement.
666 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
667 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
670 #if 0 /* suppress the noisy log */
671 log(LOG_ERR
, "%d bytes of a fragment from %s "
672 "overlaps the previous fragment\n",
673 i
, ip6_sprintf(&q6
->ip6q_src
));
679 if (af6
!= (struct ip6asfrag
*)q6
) {
680 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
682 #if 0 /* suppress the noisy log */
683 log(LOG_ERR
, "%d bytes of a fragment from %s "
684 "overlaps the succeeding fragment",
685 i
, ip6_sprintf(&q6
->ip6q_src
));
694 * If this fragment contains similar checksum offload info
695 * as that of the existing ones, accumulate checksum. Otherwise,
696 * invalidate checksum offload info for the entire datagram.
698 if (csum_flags
!= 0 && csum_flags
== q6
->ip6q_csum_flags
)
699 q6
->ip6q_csum
+= csum
;
700 else if (q6
->ip6q_csum_flags
!= 0)
701 q6
->ip6q_csum_flags
= 0;
706 * Stick new segment in its place;
707 * check for complete reassembly.
708 * Move to front of packet queue, as we are
709 * the most recently active fragmented packet.
711 frag6_enq(ip6af
, af6
->ip6af_up
);
715 if (q6
!= ip6q
.ip6q_next
) {
717 frag6_insque(q6
, &ip6q
);
721 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
722 af6
= af6
->ip6af_down
) {
723 if (af6
->ip6af_off
!= next
) {
724 lck_mtx_unlock(&ip6qlock
);
729 next
+= af6
->ip6af_frglen
;
731 if (af6
->ip6af_up
->ip6af_mff
) {
732 lck_mtx_unlock(&ip6qlock
);
739 * Reassembly is complete; concatenate fragments.
741 ip6af
= q6
->ip6q_down
;
742 t
= m
= IP6_REASS_MBUF(ip6af
);
743 af6
= ip6af
->ip6af_down
;
745 while (af6
!= (struct ip6asfrag
*)q6
) {
746 af6dwn
= af6
->ip6af_down
;
750 t
->m_next
= IP6_REASS_MBUF(af6
);
751 m_adj(t
->m_next
, af6
->ip6af_offset
);
757 * Store partial hardware checksum info from the fragment queue;
758 * the receive start offset is set to 40 bytes (see code at the
759 * top of this routine.)
761 if (q6
->ip6q_csum_flags
!= 0) {
762 csum
= q6
->ip6q_csum
;
766 m
->m_pkthdr
.csum_rx_val
= csum
;
767 m
->m_pkthdr
.csum_rx_start
= sizeof (struct ip6_hdr
);
768 m
->m_pkthdr
.csum_flags
= q6
->ip6q_csum_flags
;
769 } else if ((m
->m_pkthdr
.rcvif
->if_flags
& IFF_LOOPBACK
) ||
770 (m
->m_pkthdr
.pkt_flags
& PKTF_LOOP
)) {
771 /* loopback checksums are always OK */
772 m
->m_pkthdr
.csum_data
= 0xffff;
773 m
->m_pkthdr
.csum_flags
&= ~CSUM_PARTIAL
;
774 m
->m_pkthdr
.csum_flags
= CSUM_DATA_VALID
| CSUM_PSEUDO_HDR
;
777 /* adjust offset to point where the original next header starts */
778 offset
= ip6af
->ip6af_offset
- sizeof(struct ip6_frag
);
780 ip6
= mtod(m
, struct ip6_hdr
*);
781 ip6
->ip6_plen
= htons((u_short
)next
+ offset
- sizeof(struct ip6_hdr
));
782 ip6
->ip6_src
= q6
->ip6q_src
;
783 ip6
->ip6_dst
= q6
->ip6q_dst
;
784 if (q6
->ip6q_ecn
== IPTOS_ECN_CE
)
785 ip6
->ip6_flow
|= htonl(IPTOS_ECN_CE
<< 20);
789 *q6
->ip6q_nxtp
= (u_char
)(nxt
& 0xff);
792 /* Delete frag6 header */
793 if (m
->m_len
>= offset
+ sizeof(struct ip6_frag
)) {
794 /* This is the only possible case with !PULLDOWN_TEST */
795 ovbcopy((caddr_t
)ip6
, (caddr_t
)ip6
+ sizeof(struct ip6_frag
),
797 m
->m_data
+= sizeof(struct ip6_frag
);
798 m
->m_len
-= sizeof(struct ip6_frag
);
800 /* this comes with no copy if the boundary is on cluster */
801 if ((t
= m_split(m
, offset
, M_DONTWAIT
)) == NULL
) {
803 frag6_nfragpackets
--;
804 frag6_nfrags
-= q6
->ip6q_nfrag
;
808 m_adj(t
, sizeof(struct ip6_frag
));
813 * Store NXT to the original.
816 char *prvnxtp
= ip6_get_prevhdr(m
, offset
); /* XXX */
821 frag6_nfragpackets
--;
822 frag6_nfrags
-= q6
->ip6q_nfrag
;
825 if (m
->m_flags
& M_PKTHDR
) { /* Isn't it always true? */
828 * Mark packet as reassembled
829 * In ICMPv6 processing, we drop certain
830 * NDP messages that are not expected to
831 * have fragment header based on recommendations
832 * against security vulnerability as described in
835 m
->m_pkthdr
.pkt_flags
|= PKTF_REASSEMBLED
;
837 ip6stat
.ip6s_reassembled
++;
840 * Tell launch routine the next header
845 /* arm the purge timer if not already and if there's work to do */
846 frag6_sched_timeout();
847 lck_mtx_unlock(&ip6qlock
);
848 in6_ifstat_inc(dstifp
, ifs6_reass_ok
);
849 frag6_icmp6_paramprob_error(&diq6
);
850 VERIFY(MBUFQ_EMPTY(&diq6
));
856 if (frag6_nfragpackets
== 0) {
857 frag6_icmp6_paramprob_error(&diq6
);
858 VERIFY(MBUFQ_EMPTY(&diq6
));
859 return (IPPROTO_DONE
);
861 lck_mtx_lock(&ip6qlock
);
863 /* arm the purge timer if not already and if there's work to do */
864 frag6_sched_timeout();
865 lck_mtx_unlock(&ip6qlock
);
866 frag6_icmp6_paramprob_error(&diq6
);
867 VERIFY(MBUFQ_EMPTY(&diq6
));
868 return (IPPROTO_DONE
);
871 ip6stat
.ip6s_fragdropped
++;
872 /* arm the purge timer if not already and if there's work to do */
873 frag6_sched_timeout();
874 lck_mtx_unlock(&ip6qlock
);
875 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
877 frag6_icmp6_paramprob_error(&diq6
);
878 VERIFY(MBUFQ_EMPTY(&diq6
));
879 return (IPPROTO_DONE
);
883 * Free a fragment reassembly header and all
884 * associated datagrams.
887 frag6_freef(struct ip6q
*q6
, struct fq6_head
*dfq6
, struct fq6_head
*diq6
)
889 struct ip6asfrag
*af6
, *down6
;
891 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
893 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
895 struct mbuf
*m
= IP6_REASS_MBUF(af6
);
897 down6
= af6
->ip6af_down
;
901 * Return ICMP time exceeded error for the 1st fragment.
902 * Just free other fragments.
904 if (af6
->ip6af_off
== 0) {
908 ip6
= mtod(m
, struct ip6_hdr
*);
910 /* restore source and destination addresses */
911 ip6
->ip6_src
= q6
->ip6q_src
;
912 ip6
->ip6_dst
= q6
->ip6q_dst
;
914 MBUFQ_ENQUEUE(diq6
, m
);
916 MBUFQ_ENQUEUE(dfq6
, m
);
922 frag6_nfragpackets
--;
923 frag6_nfrags
-= q6
->ip6q_nfrag
;
928 * Put an ip fragment on a reassembly chain.
929 * Like insque, but pointers in middle of structure.
932 frag6_enq(struct ip6asfrag
*af6
, struct ip6asfrag
*up6
)
934 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
937 af6
->ip6af_down
= up6
->ip6af_down
;
938 up6
->ip6af_down
->ip6af_up
= af6
;
939 up6
->ip6af_down
= af6
;
943 * To frag6_enq as remque is to insque.
946 frag6_deq(struct ip6asfrag
*af6
)
948 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
950 af6
->ip6af_up
->ip6af_down
= af6
->ip6af_down
;
951 af6
->ip6af_down
->ip6af_up
= af6
->ip6af_up
;
955 frag6_insque(struct ip6q
*new, struct ip6q
*old
)
957 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
959 new->ip6q_prev
= old
;
960 new->ip6q_next
= old
->ip6q_next
;
961 old
->ip6q_next
->ip6q_prev
= new;
962 old
->ip6q_next
= new;
966 frag6_remque(struct ip6q
*p6
)
968 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
970 p6
->ip6q_prev
->ip6q_next
= p6
->ip6q_next
;
971 p6
->ip6q_next
->ip6q_prev
= p6
->ip6q_prev
;
975 * IPv6 reassembling timer processing;
976 * if a timer expires on a reassembly
980 frag6_timeout(void *arg
)
983 struct fq6_head dfq6
, diq6
;
986 MBUFQ_INIT(&dfq6
); /* for deferred frees */
987 MBUFQ_INIT(&diq6
); /* for deferred ICMP time exceeded errors */
990 * Update coarse-grained networking timestamp (in sec.); the idea
991 * is to piggy-back on the timeout callout to update the counter
992 * returnable via net_uptime().
996 lck_mtx_lock(&ip6qlock
);
999 while (q6
!= &ip6q
) {
1002 if (q6
->ip6q_prev
->ip6q_ttl
== 0) {
1003 ip6stat
.ip6s_fragtimeout
++;
1004 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1005 frag6_freef(q6
->ip6q_prev
, &dfq6
, &diq6
);
1009 * If we are over the maximum number of fragments
1010 * (due to the limit being lowered), drain off
1011 * enough to get down to the new limit.
1013 if (ip6_maxfragpackets
>= 0) {
1014 while (frag6_nfragpackets
> (unsigned)ip6_maxfragpackets
&&
1016 ip6stat
.ip6s_fragoverflow
++;
1017 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1018 frag6_freef(ip6q
.ip6q_prev
, &dfq6
, &diq6
);
1021 /* re-arm the purge timer if there's work to do */
1022 frag6_timeout_run
= 0;
1023 frag6_sched_timeout();
1024 lck_mtx_unlock(&ip6qlock
);
1026 /* free fragments that need to be freed */
1027 if (!MBUFQ_EMPTY(&dfq6
))
1030 frag6_icmp6_timeex_error(&diq6
);
1032 VERIFY(MBUFQ_EMPTY(&dfq6
));
1033 VERIFY(MBUFQ_EMPTY(&diq6
));
1037 frag6_sched_timeout(void)
1039 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1041 if (!frag6_timeout_run
&& frag6_nfragpackets
> 0) {
1042 frag6_timeout_run
= 1;
1043 timeout(frag6_timeout
, NULL
, hz
);
1048 * Drain off all datagram fragments.
1053 struct fq6_head dfq6
, diq6
;
1055 MBUFQ_INIT(&dfq6
); /* for deferred frees */
1056 MBUFQ_INIT(&diq6
); /* for deferred ICMP time exceeded errors */
1058 lck_mtx_lock(&ip6qlock
);
1059 while (ip6q
.ip6q_next
!= &ip6q
) {
1060 ip6stat
.ip6s_fragdropped
++;
1061 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1062 frag6_freef(ip6q
.ip6q_next
, &dfq6
, &diq6
);
1064 lck_mtx_unlock(&ip6qlock
);
1066 /* free fragments that need to be freed */
1067 if (!MBUFQ_EMPTY(&dfq6
))
1070 frag6_icmp6_timeex_error(&diq6
);
1072 VERIFY(MBUFQ_EMPTY(&dfq6
));
1073 VERIFY(MBUFQ_EMPTY(&diq6
));
1076 static struct ip6q
*
1083 * See comments in ip6q_updateparams(). Keep the count separate
1084 * from frag6_nfragpackets since the latter represents the elements
1085 * already in the reassembly queues.
1087 if (ip6q_limit
> 0 && ip6q_count
> ip6q_limit
)
1090 t
= m_get(how
, MT_FTABLE
);
1092 atomic_add_32(&ip6q_count
, 1);
1093 q6
= mtod(t
, struct ip6q
*);
1094 bzero(q6
, sizeof (*q6
));
1102 ip6q_free(struct ip6q
*q6
)
1104 (void) m_free(dtom(q6
));
1105 atomic_add_32(&ip6q_count
, -1);
1108 static struct ip6asfrag
*
1109 ip6af_alloc(int how
)
1112 struct ip6asfrag
*af6
;
1115 * See comments in ip6q_updateparams(). Keep the count separate
1116 * from frag6_nfrags since the latter represents the elements
1117 * already in the reassembly queues.
1119 if (ip6af_limit
> 0 && ip6af_count
> ip6af_limit
)
1122 t
= m_get(how
, MT_FTABLE
);
1124 atomic_add_32(&ip6af_count
, 1);
1125 af6
= mtod(t
, struct ip6asfrag
*);
1126 bzero(af6
, sizeof (*af6
));
1134 ip6af_free(struct ip6asfrag
*af6
)
1136 (void) m_free(dtom(af6
));
1137 atomic_add_32(&ip6af_count
, -1);
1141 ip6q_updateparams(void)
1143 LCK_MTX_ASSERT(&ip6qlock
, LCK_MTX_ASSERT_OWNED
);
1145 * -1 for unlimited allocation.
1147 if (ip6_maxfragpackets
< 0)
1149 if (ip6_maxfrags
< 0)
1152 * Positive number for specific bound.
1154 if (ip6_maxfragpackets
> 0)
1155 ip6q_limit
= ip6_maxfragpackets
;
1156 if (ip6_maxfrags
> 0)
1157 ip6af_limit
= ip6_maxfrags
;
1159 * Zero specifies no further fragment queue allocation -- set the
1160 * bound very low, but rely on implementation elsewhere to actually
1161 * prevent allocation and reclaim current queues.
1163 if (ip6_maxfragpackets
== 0)
1165 if (ip6_maxfrags
== 0)
1168 * Arm the purge timer if not already and if there's work to do
1170 frag6_sched_timeout();
1174 sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
1176 #pragma unused(arg1, arg2)
1179 lck_mtx_lock(&ip6qlock
);
1180 i
= ip6_maxfragpackets
;
1181 error
= sysctl_handle_int(oidp
, &i
, 0, req
);
1182 if (error
|| req
->newptr
== USER_ADDR_NULL
)
1185 if (i
< -1 || i
> (nmbclusters
/ 4)) {
1189 ip6_maxfragpackets
= i
;
1190 ip6q_updateparams();
1192 lck_mtx_unlock(&ip6qlock
);
1197 sysctl_maxfrags SYSCTL_HANDLER_ARGS
1199 #pragma unused(arg1, arg2)
1202 lck_mtx_lock(&ip6qlock
);
1204 error
= sysctl_handle_int(oidp
, &i
, 0, req
);
1205 if (error
|| req
->newptr
== USER_ADDR_NULL
)
1208 if (i
< -1 || i
> (nmbclusters
/ 4)) {
1213 ip6q_updateparams(); /* see if we need to arm timer */
1215 lck_mtx_unlock(&ip6qlock
);