]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/frag6.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / netinet6 / frag6.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/frag6.c,v 1.2.2.5 2001/07/03 11:01:50 ume Exp $ */
30 /* $KAME: frag6.c,v 1.31 2001/05/17 13:45:34 jinmei Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/malloc.h>
64 #include <sys/mcache.h>
65 #include <sys/mbuf.h>
66 #include <sys/domain.h>
67 #include <sys/protosw.h>
68 #include <sys/socket.h>
69 #include <sys/errno.h>
70 #include <sys/time.h>
71 #include <sys/kernel.h>
72 #include <sys/syslog.h>
73 #include <kern/queue.h>
74 #include <kern/locks.h>
75
76 #include <net/if.h>
77 #include <net/route.h>
78
79 #include <netinet/in.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip6.h>
83 #include <netinet6/ip6_var.h>
84 #include <netinet/icmp6.h>
85
86 #include <net/net_osdep.h>
87 #include <dev/random/randomdev.h>
88
89 /*
90 * Define it to get a correct behavior on per-interface statistics.
91 */
92 #define IN6_IFSTAT_STRICT
93
94 MBUFQ_HEAD(fq6_head);
95
96 static void frag6_save_context(struct mbuf *, int);
97 static void frag6_scrub_context(struct mbuf *);
98 static int frag6_restore_context(struct mbuf *);
99
100 static void frag6_icmp6_paramprob_error(struct fq6_head *);
101 static void frag6_icmp6_timeex_error(struct fq6_head *);
102
103 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
104 static void frag6_deq(struct ip6asfrag *);
105 static void frag6_insque(struct ip6q *, struct ip6q *);
106 static void frag6_remque(struct ip6q *);
107 static void frag6_freef(struct ip6q *, struct fq6_head *, struct fq6_head *);
108
109 static int frag6_timeout_run; /* frag6 timer is scheduled to run */
110 static void frag6_timeout(void *);
111 static void frag6_sched_timeout(void);
112
113 static struct ip6q *ip6q_alloc(int);
114 static void ip6q_free(struct ip6q *);
115 static void ip6q_updateparams(void);
116 static struct ip6asfrag *ip6af_alloc(int);
117 static void ip6af_free(struct ip6asfrag *);
118
119 decl_lck_mtx_data(static, ip6qlock);
120 static lck_attr_t *ip6qlock_attr;
121 static lck_grp_t *ip6qlock_grp;
122 static lck_grp_attr_t *ip6qlock_grp_attr;
123
124 /* IPv6 fragment reassembly queues (protected by ip6qlock) */
125 static struct ip6q ip6q; /* ip6 reassembly queues */
126 static int ip6_maxfragpackets; /* max packets in reass queues */
127 static u_int32_t frag6_nfragpackets; /* # of packets in reass queues */
128 static int ip6_maxfrags; /* max fragments in reass queues */
129 static u_int32_t frag6_nfrags; /* # of fragments in reass queues */
130 static u_int32_t ip6q_limit; /* ip6q allocation limit */
131 static u_int32_t ip6q_count; /* current # of allocated ip6q's */
132 static u_int32_t ip6af_limit; /* ip6asfrag allocation limit */
133 static u_int32_t ip6af_count; /* current # of allocated ip6asfrag's */
134
135 static int sysctl_maxfragpackets SYSCTL_HANDLER_ARGS;
136 static int sysctl_maxfrags SYSCTL_HANDLER_ARGS;
137
138 SYSCTL_DECL(_net_inet6_ip6);
139
140 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
141 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfragpackets, 0,
142 sysctl_maxfragpackets, "I",
143 "Maximum number of IPv6 fragment reassembly queue entries");
144
145 SYSCTL_UINT(_net_inet6_ip6, OID_AUTO, fragpackets,
146 CTLFLAG_RD | CTLFLAG_LOCKED, &frag6_nfragpackets, 0,
147 "Current number of IPv6 fragment reassembly queue entries");
148
149 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
150 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxfrags, 0,
151 sysctl_maxfrags, "I", "Maximum number of IPv6 fragments allowed");
152
153 /*
154 * Initialise reassembly queue and fragment identifier.
155 */
156 void
157 frag6_init(void)
158 {
159 /* ip6q_alloc() uses mbufs for IPv6 fragment queue structures */
160 _CASSERT(sizeof (struct ip6q) <= _MLEN);
161 /* ip6af_alloc() uses mbufs for IPv6 fragment queue structures */
162 _CASSERT(sizeof (struct ip6asfrag) <= _MLEN);
163
164 /* IPv6 fragment reassembly queue lock */
165 ip6qlock_grp_attr = lck_grp_attr_alloc_init();
166 ip6qlock_grp = lck_grp_alloc_init("ip6qlock", ip6qlock_grp_attr);
167 ip6qlock_attr = lck_attr_alloc_init();
168 lck_mtx_init(&ip6qlock, ip6qlock_grp, ip6qlock_attr);
169
170 lck_mtx_lock(&ip6qlock);
171 /* Initialize IPv6 reassembly queue. */
172 ip6q.ip6q_next = ip6q.ip6q_prev = &ip6q;
173
174 /* same limits as IPv4 */
175 ip6_maxfragpackets = nmbclusters / 32;
176 ip6_maxfrags = ip6_maxfragpackets * 2;
177 ip6q_updateparams();
178 lck_mtx_unlock(&ip6qlock);
179 }
180
181 static void
182 frag6_save_context(struct mbuf *m, int val)
183 {
184 m->m_pkthdr.pkt_hdr = (void *)(uintptr_t)val;
185 }
186
187 static void
188 frag6_scrub_context(struct mbuf *m)
189 {
190 m->m_pkthdr.pkt_hdr = NULL;
191 }
192
193 static int
194 frag6_restore_context(struct mbuf *m)
195 {
196 return ((int)m->m_pkthdr.pkt_hdr);
197 }
198
199 /*
200 * Send any deferred ICMP param problem error messages; caller must not be
201 * holding ip6qlock and is expected to have saved the per-packet parameter
202 * value via frag6_save_context().
203 */
204 static void
205 frag6_icmp6_paramprob_error(struct fq6_head *diq6)
206 {
207 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
208
209 if (!MBUFQ_EMPTY(diq6)) {
210 struct mbuf *merr, *merr_tmp;
211 int param;
212 MBUFQ_FOREACH_SAFE(merr, diq6, merr_tmp) {
213 MBUFQ_REMOVE(diq6, merr);
214 MBUFQ_NEXT(merr) = NULL;
215 param = frag6_restore_context(merr);
216 frag6_scrub_context(merr);
217 icmp6_error(merr, ICMP6_PARAM_PROB,
218 ICMP6_PARAMPROB_HEADER, param);
219 }
220 }
221 }
222
223 /*
224 * Send any deferred ICMP time exceeded error messages;
225 * caller must not be holding ip6qlock.
226 */
227 static void
228 frag6_icmp6_timeex_error(struct fq6_head *diq6)
229 {
230 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_NOTOWNED);
231
232 if (!MBUFQ_EMPTY(diq6)) {
233 struct mbuf *m, *m_tmp;
234 MBUFQ_FOREACH_SAFE(m, diq6, m_tmp) {
235 MBUFQ_REMOVE(diq6, m);
236 MBUFQ_NEXT(m) = NULL;
237 icmp6_error_flag(m, ICMP6_TIME_EXCEEDED,
238 ICMP6_TIME_EXCEED_REASSEMBLY, 0, 0);
239 }
240 }
241 }
242
243 /*
244 * In RFC2460, fragment and reassembly rule do not agree with each other,
245 * in terms of next header field handling in fragment header.
246 * While the sender will use the same value for all of the fragmented packets,
247 * receiver is suggested not to check the consistency.
248 *
249 * fragment rule (p20):
250 * (2) A Fragment header containing:
251 * The Next Header value that identifies the first header of
252 * the Fragmentable Part of the original packet.
253 * -> next header field is same for all fragments
254 *
255 * reassembly rule (p21):
256 * The Next Header field of the last header of the Unfragmentable
257 * Part is obtained from the Next Header field of the first
258 * fragment's Fragment header.
259 * -> should grab it from the first fragment only
260 *
261 * The following note also contradicts with fragment rule - noone is going to
262 * send different fragment with different next header field.
263 *
264 * additional note (p22):
265 * The Next Header values in the Fragment headers of different
266 * fragments of the same original packet may differ. Only the value
267 * from the Offset zero fragment packet is used for reassembly.
268 * -> should grab it from the first fragment only
269 *
270 * There is no explicit reason given in the RFC. Historical reason maybe?
271 */
272 /*
273 * Fragment input
274 */
275 int
276 frag6_input(struct mbuf **mp, int *offp, int proto)
277 {
278 #pragma unused(proto)
279 struct mbuf *m = *mp, *t;
280 struct ip6_hdr *ip6;
281 struct ip6_frag *ip6f;
282 struct ip6q *q6;
283 struct ip6asfrag *af6, *ip6af, *af6dwn;
284 int offset = *offp, nxt, i, next;
285 int first_frag = 0;
286 int fragoff, frgpartlen; /* must be larger than u_int16_t */
287 struct ifnet *dstifp = NULL;
288 u_int8_t ecn, ecn0;
289 uint32_t csum, csum_flags;
290 struct fq6_head diq6;
291 int locked = 0;
292
293 VERIFY(m->m_flags & M_PKTHDR);
294
295 MBUFQ_INIT(&diq6); /* for deferred ICMP param problem errors */
296
297 /* Expect 32-bit aligned data pointer on strict-align platforms */
298 MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m);
299
300 ip6 = mtod(m, struct ip6_hdr *);
301 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), goto done);
302 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
303
304 #ifdef IN6_IFSTAT_STRICT
305 /* find the destination interface of the packet. */
306 if (m->m_pkthdr.pkt_flags & PKTF_IFAINFO) {
307 uint32_t idx;
308
309 if (ip6_getdstifaddr_info(m, &idx, NULL) == 0) {
310 if (idx > 0 && idx <= if_index) {
311 ifnet_head_lock_shared();
312 dstifp = ifindex2ifnet[idx];
313 ifnet_head_done();
314 }
315 }
316 }
317 #endif /* IN6_IFSTAT_STRICT */
318
319 /* we are violating the spec, this may not be the dst interface */
320 if (dstifp == NULL)
321 dstifp = m->m_pkthdr.rcvif;
322
323 /* jumbo payload can't contain a fragment header */
324 if (ip6->ip6_plen == 0) {
325 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
326 in6_ifstat_inc(dstifp, ifs6_reass_fail);
327 m = NULL;
328 goto done;
329 }
330
331 /*
332 * check whether fragment packet's fragment length is
333 * multiple of 8 octets.
334 * sizeof(struct ip6_frag) == 8
335 * sizeof(struct ip6_hdr) = 40
336 */
337 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
338 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
339 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
340 offsetof(struct ip6_hdr, ip6_plen));
341 in6_ifstat_inc(dstifp, ifs6_reass_fail);
342 m = NULL;
343 goto done;
344 }
345
346 /* If ip6_maxfragpackets or ip6_maxfrags is 0, never accept fragments */
347 if (ip6_maxfragpackets == 0 || ip6_maxfrags == 0) {
348 ip6stat.ip6s_fragments++;
349 ip6stat.ip6s_fragdropped++;
350 in6_ifstat_inc(dstifp, ifs6_reass_fail);
351 m_freem(m);
352 m = NULL;
353 goto done;
354 }
355
356 /* offset now points to data portion */
357 offset += sizeof(struct ip6_frag);
358
359 /*
360 * Leverage partial checksum offload for simple UDP/IP fragments,
361 * as that is the most common case.
362 *
363 * Perform 1's complement adjustment of octets that got included/
364 * excluded in the hardware-calculated checksum value.
365 */
366 if (ip6f->ip6f_nxt == IPPROTO_UDP &&
367 offset == (sizeof (*ip6) + sizeof (*ip6f)) &&
368 (m->m_pkthdr.csum_flags &
369 (CSUM_DATA_VALID | CSUM_PARTIAL | CSUM_PSEUDO_HDR)) ==
370 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
371 uint32_t start;
372
373 start = m->m_pkthdr.csum_rx_start;
374 csum = m->m_pkthdr.csum_rx_val;
375
376 if (start != offset) {
377 uint16_t s, d;
378
379 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
380 s = ip6->ip6_src.s6_addr16[1];
381 ip6->ip6_src.s6_addr16[1] = 0 ;
382 }
383 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
384 d = ip6->ip6_dst.s6_addr16[1];
385 ip6->ip6_dst.s6_addr16[1] = 0;
386 }
387
388 /* callee folds in sum */
389 csum = m_adj_sum16(m, start, offset, csum);
390
391 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
392 ip6->ip6_src.s6_addr16[1] = s;
393 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
394 ip6->ip6_dst.s6_addr16[1] = d;
395
396 }
397 csum_flags = m->m_pkthdr.csum_flags;
398 } else {
399 csum = 0;
400 csum_flags = 0;
401 }
402
403 /* Invalidate checksum */
404 m->m_pkthdr.csum_flags &= ~CSUM_DATA_VALID;
405
406 ip6stat.ip6s_fragments++;
407 in6_ifstat_inc(dstifp, ifs6_reass_reqd);
408
409 lck_mtx_lock(&ip6qlock);
410 locked = 1;
411
412 for (q6 = ip6q.ip6q_next; q6 != &ip6q; q6 = q6->ip6q_next)
413 if (ip6f->ip6f_ident == q6->ip6q_ident &&
414 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
415 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst))
416 break;
417
418 if (q6 == &ip6q) {
419 /*
420 * the first fragment to arrive, create a reassembly queue.
421 */
422 first_frag = 1;
423
424 q6 = ip6q_alloc(M_DONTWAIT);
425 if (q6 == NULL)
426 goto dropfrag;
427
428 frag6_insque(q6, &ip6q);
429 frag6_nfragpackets++;
430
431 /* ip6q_nxt will be filled afterwards, from 1st fragment */
432 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6;
433 #ifdef notyet
434 q6->ip6q_nxtp = (u_char *)nxtp;
435 #endif
436 q6->ip6q_ident = ip6f->ip6f_ident;
437 q6->ip6q_ttl = IPV6_FRAGTTL;
438 q6->ip6q_src = ip6->ip6_src;
439 q6->ip6q_dst = ip6->ip6_dst;
440 q6->ip6q_ecn =
441 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
442 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */
443
444 q6->ip6q_nfrag = 0;
445
446 /*
447 * If the first fragment has valid checksum offload
448 * info, the rest of fragments are eligible as well.
449 */
450 if (csum_flags != 0) {
451 q6->ip6q_csum = csum;
452 q6->ip6q_csum_flags = csum_flags;
453 }
454 }
455
456 /*
457 * If it's the 1st fragment, record the length of the
458 * unfragmentable part and the next header of the fragment header.
459 */
460 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
461 if (fragoff == 0) {
462 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
463 sizeof(struct ip6_frag);
464 q6->ip6q_nxt = ip6f->ip6f_nxt;
465 }
466
467 /*
468 * Check that the reassembled packet would not exceed 65535 bytes
469 * in size.
470 * If it would exceed, discard the fragment and return an ICMP error.
471 */
472 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
473 if (q6->ip6q_unfrglen >= 0) {
474 /* The 1st fragment has already arrived. */
475 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
476 lck_mtx_unlock(&ip6qlock);
477 locked = 0;
478 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
479 offset - sizeof(struct ip6_frag) +
480 offsetof(struct ip6_frag, ip6f_offlg));
481 m = NULL;
482 goto done;
483 }
484 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
485 lck_mtx_unlock(&ip6qlock);
486 locked = 0;
487 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
488 offset - sizeof(struct ip6_frag) +
489 offsetof(struct ip6_frag, ip6f_offlg));
490 m = NULL;
491 goto done;
492 }
493 /*
494 * If it's the first fragment, do the above check for each
495 * fragment already stored in the reassembly queue.
496 */
497 if (fragoff == 0) {
498 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
499 af6 = af6dwn) {
500 af6dwn = af6->ip6af_down;
501
502 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
503 IPV6_MAXPACKET) {
504 struct mbuf *merr = IP6_REASS_MBUF(af6);
505 struct ip6_hdr *ip6err;
506 int erroff = af6->ip6af_offset;
507
508 /* dequeue the fragment. */
509 frag6_deq(af6);
510 ip6af_free(af6);
511
512 /* adjust pointer. */
513 ip6err = mtod(merr, struct ip6_hdr *);
514
515 /*
516 * Restore source and destination addresses
517 * in the erroneous IPv6 header.
518 */
519 ip6err->ip6_src = q6->ip6q_src;
520 ip6err->ip6_dst = q6->ip6q_dst;
521
522 frag6_save_context(merr,
523 erroff - sizeof (struct ip6_frag) +
524 offsetof(struct ip6_frag, ip6f_offlg));
525
526 MBUFQ_ENQUEUE(&diq6, merr);
527 }
528 }
529 }
530
531 ip6af = ip6af_alloc(M_DONTWAIT);
532 if (ip6af == NULL)
533 goto dropfrag;
534
535 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
536 ip6af->ip6af_off = fragoff;
537 ip6af->ip6af_frglen = frgpartlen;
538 ip6af->ip6af_offset = offset;
539 IP6_REASS_MBUF(ip6af) = m;
540
541 if (first_frag) {
542 af6 = (struct ip6asfrag *)q6;
543 goto insert;
544 }
545
546 /*
547 * Handle ECN by comparing this segment with the first one;
548 * if CE is set, do not lose CE.
549 * drop if CE and not-ECT are mixed for the same packet.
550 */
551 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
552 ecn0 = q6->ip6q_ecn;
553 if (ecn == IPTOS_ECN_CE) {
554 if (ecn0 == IPTOS_ECN_NOTECT) {
555 ip6af_free(ip6af);
556 goto dropfrag;
557 }
558 if (ecn0 != IPTOS_ECN_CE)
559 q6->ip6q_ecn = IPTOS_ECN_CE;
560 }
561 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
562 ip6af_free(ip6af);
563 goto dropfrag;
564 }
565
566 /*
567 * Find a segment which begins after this one does.
568 */
569 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
570 af6 = af6->ip6af_down)
571 if (af6->ip6af_off > ip6af->ip6af_off)
572 break;
573
574 #if 0
575 /*
576 * If there is a preceding segment, it may provide some of
577 * our data already. If so, drop the data from the incoming
578 * segment. If it provides all of our data, drop us.
579 *
580 * If some of the data is dropped from the preceding
581 * segment, then it's checksum is invalidated.
582 */
583 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
584 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
585 - ip6af->ip6af_off;
586 if (i > 0) {
587 if (i >= ip6af->ip6af_frglen)
588 goto dropfrag;
589 m_adj(IP6_REASS_MBUF(ip6af), i);
590 q6->ip6q_csum_flags = 0;
591 ip6af->ip6af_off += i;
592 ip6af->ip6af_frglen -= i;
593 }
594 }
595
596 /*
597 * While we overlap succeeding segments trim them or,
598 * if they are completely covered, dequeue them.
599 */
600 while (af6 != (struct ip6asfrag *)q6 &&
601 ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
602 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
603 if (i < af6->ip6af_frglen) {
604 af6->ip6af_frglen -= i;
605 af6->ip6af_off += i;
606 m_adj(IP6_REASS_MBUF(af6), i);
607 q6->ip6q_csum_flags = 0;
608 break;
609 }
610 af6 = af6->ip6af_down;
611 m_freem(IP6_REASS_MBUF(af6->ip6af_up));
612 frag6_deq(af6->ip6af_up);
613 }
614 #else
615 /*
616 * If the incoming framgent overlaps some existing fragments in
617 * the reassembly queue, drop it, since it is dangerous to override
618 * existing fragments from a security point of view.
619 * We don't know which fragment is the bad guy - here we trust
620 * fragment that came in earlier, with no real reason.
621 *
622 * Note: due to changes after disabling this part, mbuf passed to
623 * m_adj() below now does not meet the requirement.
624 */
625 if (af6->ip6af_up != (struct ip6asfrag *)q6) {
626 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
627 - ip6af->ip6af_off;
628 if (i > 0) {
629 #if 0 /* suppress the noisy log */
630 log(LOG_ERR, "%d bytes of a fragment from %s "
631 "overlaps the previous fragment\n",
632 i, ip6_sprintf(&q6->ip6q_src));
633 #endif
634 ip6af_free(ip6af);
635 goto dropfrag;
636 }
637 }
638 if (af6 != (struct ip6asfrag *)q6) {
639 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
640 if (i > 0) {
641 #if 0 /* suppress the noisy log */
642 log(LOG_ERR, "%d bytes of a fragment from %s "
643 "overlaps the succeeding fragment",
644 i, ip6_sprintf(&q6->ip6q_src));
645 #endif
646 ip6af_free(ip6af);
647 goto dropfrag;
648 }
649 }
650 #endif
651
652 /*
653 * If this fragment contains similar checksum offload info
654 * as that of the existing ones, accumulate checksum. Otherwise,
655 * invalidate checksum offload info for the entire datagram.
656 */
657 if (csum_flags != 0 && csum_flags == q6->ip6q_csum_flags)
658 q6->ip6q_csum += csum;
659 else if (q6->ip6q_csum_flags != 0)
660 q6->ip6q_csum_flags = 0;
661
662 insert:
663
664 /*
665 * Stick new segment in its place;
666 * check for complete reassembly.
667 * Move to front of packet queue, as we are
668 * the most recently active fragmented packet.
669 */
670 frag6_enq(ip6af, af6->ip6af_up);
671 frag6_nfrags++;
672 q6->ip6q_nfrag++;
673 #if 0 /* xxx */
674 if (q6 != ip6q.ip6q_next) {
675 frag6_remque(q6);
676 frag6_insque(q6, &ip6q);
677 }
678 #endif
679 next = 0;
680 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
681 af6 = af6->ip6af_down) {
682 if (af6->ip6af_off != next) {
683 lck_mtx_unlock(&ip6qlock);
684 locked = 0;
685 m = NULL;
686 goto done;
687 }
688 next += af6->ip6af_frglen;
689 }
690 if (af6->ip6af_up->ip6af_mff) {
691 lck_mtx_unlock(&ip6qlock);
692 locked = 0;
693 m = NULL;
694 goto done;
695 }
696
697 /*
698 * Reassembly is complete; concatenate fragments.
699 */
700 ip6af = q6->ip6q_down;
701 t = m = IP6_REASS_MBUF(ip6af);
702 af6 = ip6af->ip6af_down;
703 frag6_deq(ip6af);
704 while (af6 != (struct ip6asfrag *)q6) {
705 af6dwn = af6->ip6af_down;
706 frag6_deq(af6);
707 while (t->m_next)
708 t = t->m_next;
709 t->m_next = IP6_REASS_MBUF(af6);
710 m_adj(t->m_next, af6->ip6af_offset);
711 ip6af_free(af6);
712 af6 = af6dwn;
713 }
714
715 /*
716 * Store partial hardware checksum info from the fragment queue;
717 * the receive start offset is set to 40 bytes (see code at the
718 * top of this routine.)
719 */
720 if (q6->ip6q_csum_flags != 0) {
721 csum = q6->ip6q_csum;
722
723 ADDCARRY(csum);
724
725 m->m_pkthdr.csum_rx_val = csum;
726 m->m_pkthdr.csum_rx_start = sizeof (struct ip6_hdr);
727 m->m_pkthdr.csum_flags = q6->ip6q_csum_flags;
728 } else if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
729 (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
730 /* loopback checksums are always OK */
731 m->m_pkthdr.csum_data = 0xffff;
732 m->m_pkthdr.csum_flags &= ~CSUM_PARTIAL;
733 m->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
734 }
735
736 /* adjust offset to point where the original next header starts */
737 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
738 ip6af_free(ip6af);
739 ip6 = mtod(m, struct ip6_hdr *);
740 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
741 ip6->ip6_src = q6->ip6q_src;
742 ip6->ip6_dst = q6->ip6q_dst;
743 if (q6->ip6q_ecn == IPTOS_ECN_CE)
744 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
745
746 nxt = q6->ip6q_nxt;
747 #ifdef notyet
748 *q6->ip6q_nxtp = (u_char)(nxt & 0xff);
749 #endif
750
751 /* Delete frag6 header */
752 if (m->m_len >= offset + sizeof(struct ip6_frag)) {
753 /* This is the only possible case with !PULLDOWN_TEST */
754 ovbcopy((caddr_t)ip6, (caddr_t)ip6 + sizeof(struct ip6_frag),
755 offset);
756 m->m_data += sizeof(struct ip6_frag);
757 m->m_len -= sizeof(struct ip6_frag);
758 } else {
759 /* this comes with no copy if the boundary is on cluster */
760 if ((t = m_split(m, offset, M_DONTWAIT)) == NULL) {
761 frag6_remque(q6);
762 frag6_nfragpackets--;
763 frag6_nfrags -= q6->ip6q_nfrag;
764 ip6q_free(q6);
765 goto dropfrag;
766 }
767 m_adj(t, sizeof(struct ip6_frag));
768 m_cat(m, t);
769 }
770
771 /*
772 * Store NXT to the original.
773 */
774 {
775 char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
776 *prvnxtp = nxt;
777 }
778
779 frag6_remque(q6);
780 frag6_nfragpackets--;
781 frag6_nfrags -= q6->ip6q_nfrag;
782 ip6q_free(q6);
783
784 if (m->m_flags & M_PKTHDR) /* Isn't it always true? */
785 m_fixhdr(m);
786
787 ip6stat.ip6s_reassembled++;
788
789 /*
790 * Tell launch routine the next header
791 */
792 *mp = m;
793 *offp = offset;
794
795 /* arm the purge timer if not already and if there's work to do */
796 frag6_sched_timeout();
797 lck_mtx_unlock(&ip6qlock);
798 in6_ifstat_inc(dstifp, ifs6_reass_ok);
799 frag6_icmp6_paramprob_error(&diq6);
800 VERIFY(MBUFQ_EMPTY(&diq6));
801 return (nxt);
802
803 done:
804 VERIFY(m == NULL);
805 if (!locked) {
806 if (frag6_nfragpackets == 0) {
807 frag6_icmp6_paramprob_error(&diq6);
808 VERIFY(MBUFQ_EMPTY(&diq6));
809 return (IPPROTO_DONE);
810 }
811 lck_mtx_lock(&ip6qlock);
812 }
813 /* arm the purge timer if not already and if there's work to do */
814 frag6_sched_timeout();
815 lck_mtx_unlock(&ip6qlock);
816 frag6_icmp6_paramprob_error(&diq6);
817 VERIFY(MBUFQ_EMPTY(&diq6));
818 return (IPPROTO_DONE);
819
820 dropfrag:
821 ip6stat.ip6s_fragdropped++;
822 /* arm the purge timer if not already and if there's work to do */
823 frag6_sched_timeout();
824 lck_mtx_unlock(&ip6qlock);
825 in6_ifstat_inc(dstifp, ifs6_reass_fail);
826 m_freem(m);
827 frag6_icmp6_paramprob_error(&diq6);
828 VERIFY(MBUFQ_EMPTY(&diq6));
829 return (IPPROTO_DONE);
830 }
831
832 /*
833 * Free a fragment reassembly header and all
834 * associated datagrams.
835 */
836 void
837 frag6_freef(struct ip6q *q6, struct fq6_head *dfq6, struct fq6_head *diq6)
838 {
839 struct ip6asfrag *af6, *down6;
840
841 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
842
843 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
844 af6 = down6) {
845 struct mbuf *m = IP6_REASS_MBUF(af6);
846
847 down6 = af6->ip6af_down;
848 frag6_deq(af6);
849
850 /*
851 * Return ICMP time exceeded error for the 1st fragment.
852 * Just free other fragments.
853 */
854 if (af6->ip6af_off == 0) {
855 struct ip6_hdr *ip6;
856
857 /* adjust pointer */
858 ip6 = mtod(m, struct ip6_hdr *);
859
860 /* restore source and destination addresses */
861 ip6->ip6_src = q6->ip6q_src;
862 ip6->ip6_dst = q6->ip6q_dst;
863
864 MBUFQ_ENQUEUE(diq6, m);
865 } else {
866 MBUFQ_ENQUEUE(dfq6, m);
867 }
868 ip6af_free(af6);
869
870 }
871 frag6_remque(q6);
872 frag6_nfragpackets--;
873 frag6_nfrags -= q6->ip6q_nfrag;
874 ip6q_free(q6);
875 }
876
877 /*
878 * Put an ip fragment on a reassembly chain.
879 * Like insque, but pointers in middle of structure.
880 */
881 void
882 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
883 {
884 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
885
886 af6->ip6af_up = up6;
887 af6->ip6af_down = up6->ip6af_down;
888 up6->ip6af_down->ip6af_up = af6;
889 up6->ip6af_down = af6;
890 }
891
892 /*
893 * To frag6_enq as remque is to insque.
894 */
895 void
896 frag6_deq(struct ip6asfrag *af6)
897 {
898 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
899
900 af6->ip6af_up->ip6af_down = af6->ip6af_down;
901 af6->ip6af_down->ip6af_up = af6->ip6af_up;
902 }
903
904 void
905 frag6_insque(struct ip6q *new, struct ip6q *old)
906 {
907 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
908
909 new->ip6q_prev = old;
910 new->ip6q_next = old->ip6q_next;
911 old->ip6q_next->ip6q_prev= new;
912 old->ip6q_next = new;
913 }
914
915 void
916 frag6_remque(struct ip6q *p6)
917 {
918 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
919
920 p6->ip6q_prev->ip6q_next = p6->ip6q_next;
921 p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
922 }
923
924 /*
925 * IPv6 reassembling timer processing;
926 * if a timer expires on a reassembly
927 * queue, discard it.
928 */
929 static void
930 frag6_timeout(void *arg)
931 {
932 #pragma unused(arg)
933 struct fq6_head dfq6, diq6;
934 struct ip6q *q6;
935
936 MBUFQ_INIT(&dfq6); /* for deferred frees */
937 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */
938
939 /*
940 * Update coarse-grained networking timestamp (in sec.); the idea
941 * is to piggy-back on the timeout callout to update the counter
942 * returnable via net_uptime().
943 */
944 net_update_uptime();
945
946 lck_mtx_lock(&ip6qlock);
947 q6 = ip6q.ip6q_next;
948 if (q6)
949 while (q6 != &ip6q) {
950 --q6->ip6q_ttl;
951 q6 = q6->ip6q_next;
952 if (q6->ip6q_prev->ip6q_ttl == 0) {
953 ip6stat.ip6s_fragtimeout++;
954 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
955 frag6_freef(q6->ip6q_prev, &dfq6, &diq6);
956 }
957 }
958 /*
959 * If we are over the maximum number of fragments
960 * (due to the limit being lowered), drain off
961 * enough to get down to the new limit.
962 */
963 if (ip6_maxfragpackets >= 0) {
964 while (frag6_nfragpackets > (unsigned)ip6_maxfragpackets &&
965 ip6q.ip6q_prev) {
966 ip6stat.ip6s_fragoverflow++;
967 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
968 frag6_freef(ip6q.ip6q_prev, &dfq6, &diq6);
969 }
970 }
971 /* re-arm the purge timer if there's work to do */
972 frag6_timeout_run = 0;
973 frag6_sched_timeout();
974 lck_mtx_unlock(&ip6qlock);
975
976 /* free fragments that need to be freed */
977 if (!MBUFQ_EMPTY(&dfq6))
978 MBUFQ_DRAIN(&dfq6);
979
980 frag6_icmp6_timeex_error(&diq6);
981
982 VERIFY(MBUFQ_EMPTY(&dfq6));
983 VERIFY(MBUFQ_EMPTY(&diq6));
984 }
985
986 static void
987 frag6_sched_timeout(void)
988 {
989 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
990
991 if (!frag6_timeout_run && frag6_nfragpackets > 0) {
992 frag6_timeout_run = 1;
993 timeout(frag6_timeout, NULL, hz);
994 }
995 }
996
997 /*
998 * Drain off all datagram fragments.
999 */
1000 void
1001 frag6_drain(void)
1002 {
1003 struct fq6_head dfq6, diq6;
1004
1005 MBUFQ_INIT(&dfq6); /* for deferred frees */
1006 MBUFQ_INIT(&diq6); /* for deferred ICMP time exceeded errors */
1007
1008 lck_mtx_lock(&ip6qlock);
1009 while (ip6q.ip6q_next != &ip6q) {
1010 ip6stat.ip6s_fragdropped++;
1011 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
1012 frag6_freef(ip6q.ip6q_next, &dfq6, &diq6);
1013 }
1014 lck_mtx_unlock(&ip6qlock);
1015
1016 /* free fragments that need to be freed */
1017 if (!MBUFQ_EMPTY(&dfq6))
1018 MBUFQ_DRAIN(&dfq6);
1019
1020 frag6_icmp6_timeex_error(&diq6);
1021
1022 VERIFY(MBUFQ_EMPTY(&dfq6));
1023 VERIFY(MBUFQ_EMPTY(&diq6));
1024 }
1025
1026 static struct ip6q *
1027 ip6q_alloc(int how)
1028 {
1029 struct mbuf *t;
1030 struct ip6q *q6;
1031
1032 /*
1033 * See comments in ip6q_updateparams(). Keep the count separate
1034 * from frag6_nfragpackets since the latter represents the elements
1035 * already in the reassembly queues.
1036 */
1037 if (ip6q_limit > 0 && ip6q_count > ip6q_limit)
1038 return (NULL);
1039
1040 t = m_get(how, MT_FTABLE);
1041 if (t != NULL) {
1042 atomic_add_32(&ip6q_count, 1);
1043 q6 = mtod(t, struct ip6q *);
1044 bzero(q6, sizeof (*q6));
1045 } else {
1046 q6 = NULL;
1047 }
1048 return (q6);
1049 }
1050
1051 static void
1052 ip6q_free(struct ip6q *q6)
1053 {
1054 (void) m_free(dtom(q6));
1055 atomic_add_32(&ip6q_count, -1);
1056 }
1057
1058 static struct ip6asfrag *
1059 ip6af_alloc(int how)
1060 {
1061 struct mbuf *t;
1062 struct ip6asfrag *af6;
1063
1064 /*
1065 * See comments in ip6q_updateparams(). Keep the count separate
1066 * from frag6_nfrags since the latter represents the elements
1067 * already in the reassembly queues.
1068 */
1069 if (ip6af_limit > 0 && ip6af_count > ip6af_limit)
1070 return (NULL);
1071
1072 t = m_get(how, MT_FTABLE);
1073 if (t != NULL) {
1074 atomic_add_32(&ip6af_count, 1);
1075 af6 = mtod(t, struct ip6asfrag *);
1076 bzero(af6, sizeof (*af6));
1077 } else {
1078 af6 = NULL;
1079 }
1080 return (af6);
1081 }
1082
1083 static void
1084 ip6af_free(struct ip6asfrag *af6)
1085 {
1086 (void) m_free(dtom(af6));
1087 atomic_add_32(&ip6af_count, -1);
1088 }
1089
1090 static void
1091 ip6q_updateparams(void)
1092 {
1093 lck_mtx_assert(&ip6qlock, LCK_MTX_ASSERT_OWNED);
1094 /*
1095 * -1 for unlimited allocation.
1096 */
1097 if (ip6_maxfragpackets < 0)
1098 ip6q_limit = 0;
1099 if (ip6_maxfrags < 0)
1100 ip6af_limit = 0;
1101 /*
1102 * Positive number for specific bound.
1103 */
1104 if (ip6_maxfragpackets > 0)
1105 ip6q_limit = ip6_maxfragpackets;
1106 if (ip6_maxfrags > 0)
1107 ip6af_limit = ip6_maxfrags;
1108 /*
1109 * Zero specifies no further fragment queue allocation -- set the
1110 * bound very low, but rely on implementation elsewhere to actually
1111 * prevent allocation and reclaim current queues.
1112 */
1113 if (ip6_maxfragpackets == 0)
1114 ip6q_limit = 1;
1115 if (ip6_maxfrags == 0)
1116 ip6af_limit = 1;
1117 /*
1118 * Arm the purge timer if not already and if there's work to do
1119 */
1120 frag6_sched_timeout();
1121 }
1122
1123 static int
1124 sysctl_maxfragpackets SYSCTL_HANDLER_ARGS
1125 {
1126 #pragma unused(arg1, arg2)
1127 int error, i;
1128
1129 lck_mtx_lock(&ip6qlock);
1130 i = ip6_maxfragpackets;
1131 error = sysctl_handle_int(oidp, &i, 0, req);
1132 if (error || req->newptr == USER_ADDR_NULL)
1133 goto done;
1134 /* impose bounds */
1135 if (i < -1 || i > (nmbclusters / 4)) {
1136 error = EINVAL;
1137 goto done;
1138 }
1139 ip6_maxfragpackets = i;
1140 ip6q_updateparams();
1141 done:
1142 lck_mtx_unlock(&ip6qlock);
1143 return (error);
1144 }
1145
1146 static int
1147 sysctl_maxfrags SYSCTL_HANDLER_ARGS
1148 {
1149 #pragma unused(arg1, arg2)
1150 int error, i;
1151
1152 lck_mtx_lock(&ip6qlock);
1153 i = ip6_maxfrags;
1154 error = sysctl_handle_int(oidp, &i, 0, req);
1155 if (error || req->newptr == USER_ADDR_NULL)
1156 goto done;
1157 /* impose bounds */
1158 if (i < -1 || i > (nmbclusters / 4)) {
1159 error = EINVAL;
1160 goto done;
1161 }
1162 ip6_maxfrags= i;
1163 ip6q_updateparams(); /* see if we need to arm timer */
1164 done:
1165 lck_mtx_unlock(&ip6qlock);
1166 return (error);
1167 }