]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/ip6_output.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58 /*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
92 /*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections. This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
98
99 #include <sys/param.h>
100 #include <sys/malloc.h>
101 #include <sys/mbuf.h>
102 #include <sys/errno.h>
103 #include <sys/protosw.h>
104 #include <sys/socket.h>
105 #include <sys/socketvar.h>
106 #include <sys/systm.h>
107 #include <sys/kernel.h>
108 #include <sys/proc.h>
109 #include <sys/kauth.h>
110 #include <sys/mcache.h>
111 #include <sys/sysctl.h>
112 #include <kern/zalloc.h>
113 #include <libkern/OSByteOrder.h>
114
115 #include <pexpert/pexpert.h>
116 #include <mach/sdt.h>
117
118 #include <net/if.h>
119 #include <net/route.h>
120 #include <net/dlil.h>
121 #include <net/net_osdep.h>
122
123 #include <netinet/in.h>
124 #include <netinet/in_var.h>
125 #include <netinet/ip_var.h>
126 #include <netinet6/in6_var.h>
127 #include <netinet/ip6.h>
128 #include <netinet/kpi_ipfilter_var.h>
129
130 #include <netinet6/ip6protosw.h>
131 #include <netinet/icmp6.h>
132 #include <netinet6/ip6_var.h>
133 #include <netinet/in_pcb.h>
134 #include <netinet6/nd6.h>
135 #include <netinet6/scope6_var.h>
136 #if IPSEC
137 #include <netinet6/ipsec.h>
138 #include <netinet6/ipsec6.h>
139 #include <netkey/key.h>
140 extern int ipsec_bypass;
141 #endif /* IPSEC */
142
143 #if CONFIG_MACF_NET
144 #include <security/mac.h>
145 #endif /* CONFIG_MACF_NET */
146
147 #if DUMMYNET
148 #include <netinet6/ip6_fw.h>
149 #include <netinet/ip_fw.h>
150 #include <netinet/ip_dummynet.h>
151 #endif /* DUMMYNET */
152
153 #if PF
154 #include <net/pfvar.h>
155 #endif /* PF */
156
157 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
158 static void ip6_out_cksum_stats(int, u_int32_t);
159 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
160 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
161 struct ip6_frag **);
162 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
163 struct ifnet *, struct in6_addr *, u_int32_t *, boolean_t *);
164 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
165 struct sockopt *sopt);
166 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
167 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
168 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
169 static void im6o_trace(struct ip6_moptions *, int);
170 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
171 int, int);
172 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
173 static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
174 int, uint32_t, uint32_t);
175
176 #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
177
178 /* For gdb */
179 __private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
180
181 struct ip6_moptions_dbg {
182 struct ip6_moptions im6o; /* ip6_moptions */
183 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
184 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
185 /*
186 * Alloc and free callers.
187 */
188 ctrace_t im6o_alloc;
189 ctrace_t im6o_free;
190 /*
191 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
192 */
193 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
194 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
195 };
196
197 #if DEBUG
198 static unsigned int im6o_debug = 1; /* debugging (enabled) */
199 #else
200 static unsigned int im6o_debug; /* debugging (disabled) */
201 #endif /* !DEBUG */
202
203 static unsigned int im6o_size; /* size of zone element */
204 static struct zone *im6o_zone; /* zone for ip6_moptions */
205
206 #define IM6O_ZONE_MAX 64 /* maximum elements in zone */
207 #define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
208
209 SYSCTL_DECL(_net_inet6_ip6);
210
211 static int ip6_maxchainsent = 0;
212 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
213 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
214 "use dlil_output_list");
215
216 /*
217 * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
218 * walks through the packet chain and sends each mbuf separately.
219 */
220 int
221 ip6_output_list(struct mbuf *m0, int packetlist, struct ip6_pktopts *opt,
222 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
223 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
224 {
225 #pragma unused(packetlist)
226 struct mbuf *m = m0, *nextpkt;
227 int error = 0;
228
229 while (m != NULL) {
230 /*
231 * Break the chain before calling ip6_output() and free the
232 * mbufs if there was an error.
233 */
234 nextpkt = m->m_nextpkt;
235 m->m_nextpkt = NULL;
236 error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oa);
237 if (error != 0) {
238 if (nextpkt != NULL)
239 m_freem_list(nextpkt);
240 return (error);
241 }
242 m = nextpkt;
243 }
244
245 return (error);
246 }
247
248 /*
249 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
250 * header (with pri, len, nxt, hlim, src, dst).
251 * This function may modify ver and hlim only.
252 * The mbuf chain containing the packet will be freed.
253 * The mbuf opt, if present, will not be freed.
254 *
255 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
256 * skipped and ro->ro_rt would be used. Otherwise the result of route
257 * lookup is stored in ro->ro_rt.
258 *
259 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
260 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
261 * which is rt_rmx.rmx_mtu.
262 */
263 int
264 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro,
265 int flags, struct ip6_moptions *im6o, struct ifnet **ifpp,
266 struct ip6_out_args *ip6oa)
267 {
268 struct ip6_hdr *ip6;
269 u_char *nexthdrp;
270 struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
271 struct mbuf *m, *mprev;
272 int hlen, tlen, len, off, nxt0;
273 struct route_in6 *ro_pmtu = NULL;
274 struct rtentry *rt = NULL;
275 struct sockaddr_in6 *dst, src_sa, dst_sa;
276 int error = 0;
277 struct in6_ifaddr *ia = NULL, *src_ia = NULL;
278 u_int32_t mtu;
279 boolean_t alwaysfrag = FALSE;
280 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
281 struct ip6_rthdr *rh;
282 struct in6_addr finaldst;
283 ipfilter_t inject_filter_ref;
284 struct ipf_pktopts *ippo = NULL;
285 struct flowadv *adv = NULL;
286 #if DUMMYNET
287 struct m_tag *tag;
288 struct ip6_out_args saved_ip6oa;
289 struct sockaddr_in6 dst_buf;
290 #endif /* DUMMYNET */
291 #if IPSEC
292 struct socket *so = NULL;
293 struct secpolicy *sp = NULL;
294 struct route_in6 *ipsec_saved_route = NULL;
295 boolean_t needipsectun = FALSE;
296 #endif /* IPSEC */
297 struct {
298 struct ipf_pktopts ipf_pktopts;
299 struct ip6_exthdrs exthdrs;
300 struct route_in6 ip6route;
301 #if IPSEC
302 struct ipsec_output_state ipsec_state;
303 #endif /* IPSEC */
304 #if DUMMYNET
305 struct route_in6 saved_route;
306 struct route_in6 saved_ro_pmtu;
307 struct ip_fw_args args;
308 #endif /* DUMMYNET */
309 } ip6obz;
310 #define ipf_pktopts ip6obz.ipf_pktopts
311 #define exthdrs ip6obz.exthdrs
312 #define ip6route ip6obz.ip6route
313 #define ipsec_state ip6obz.ipsec_state
314 #define saved_route ip6obz.saved_route
315 #define saved_ro_pmtu ip6obz.saved_ro_pmtu
316 #define args ip6obz.args
317 union {
318 struct {
319 boolean_t select_srcif : 1;
320 boolean_t hdrsplit : 1;
321 boolean_t dontfrag : 1;
322 #if IPSEC
323 boolean_t needipsec : 1;
324 boolean_t noipsec : 1;
325 #endif /* IPSEC */
326 };
327 uint32_t raw;
328 } ip6obf = { .raw = 0 };
329
330 VERIFY(m0->m_flags & M_PKTHDR);
331
332 /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
333 bzero(&ip6obz, sizeof (ip6obz));
334
335 #if DUMMYNET
336 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
337 goto tags_done;
338
339 /* Grab info from mtags prepended to the chain */
340 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
341 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
342 struct dn_pkt_tag *dn_tag;
343
344 dn_tag = (struct dn_pkt_tag *)(tag+1);
345 args.fwa_pf_rule = dn_tag->dn_pf_rule;
346
347 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof (dst_buf));
348 dst = &dst_buf;
349 ifp = dn_tag->dn_ifp;
350 if (ifp != NULL)
351 ifnet_reference(ifp);
352 flags = dn_tag->dn_flags;
353 if (dn_tag->dn_flags & IPV6_OUTARGS) {
354 saved_ip6oa = dn_tag->dn_ip6oa;
355 ip6oa = &saved_ip6oa;
356 }
357
358 saved_route = dn_tag->dn_ro6;
359 ro = &saved_route;
360 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
361 ro_pmtu = &saved_ro_pmtu;
362 origifp = dn_tag->dn_origifp;
363 if (origifp != NULL)
364 ifnet_reference(origifp);
365 mtu = dn_tag->dn_mtu;
366 alwaysfrag = (dn_tag->dn_alwaysfrag != 0);
367 unfragpartlen = dn_tag->dn_unfragpartlen;
368
369 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof (exthdrs));
370
371 m_tag_delete(m0, tag);
372 }
373
374 tags_done:
375 #endif /* DUMMYNET */
376
377 m = m0;
378 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
379
380 #if IPSEC
381 /* for AH processing. stupid to have "socket" variable in IP layer... */
382 if (ipsec_bypass == 0) {
383 so = ipsec_getsocket(m);
384 (void) ipsec_setsocket(m, NULL);
385
386 /* If packet is bound to an interface, check bound policies */
387 if ((flags & IPV6_OUTARGS) &&
388 (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) &&
389 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
390 /* ip6obf.noipsec is a bitfield, use temp integer */
391 int noipsec = 0;
392
393 if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
394 flags, ip6oa, &noipsec, &sp) != 0)
395 goto bad;
396
397 ip6obf.noipsec = (noipsec != 0);
398 }
399 }
400 #endif /* IPSEC */
401
402 ip6 = mtod(m, struct ip6_hdr *);
403 nxt0 = ip6->ip6_nxt;
404 finaldst = ip6->ip6_dst;
405 inject_filter_ref = ipf_get_inject_filter(m);
406 ippo = &ipf_pktopts;
407
408 if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
409 /*
410 * In the forwarding case, only the ifscope value is used,
411 * as source interface selection doesn't take place.
412 */
413 if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
414 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
415 (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF))))
416 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
417
418 if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
419 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
420 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
421 (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
422 }
423
424 if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR)
425 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
426 } else {
427 ip6obf.select_srcif = FALSE;
428 if (flags & IPV6_OUTARGS) {
429 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
430 ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
431 IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
432 }
433 }
434
435 if ((flags & IPV6_OUTARGS) && (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR))
436 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
437
438 if (flags & IPV6_OUTARGS) {
439 adv = &ip6oa->ip6oa_flowadv;
440 adv->code = FADV_SUCCESS;
441 ip6oa->ip6oa_retflags = 0;
442 }
443
444 #if DUMMYNET
445 if (args.fwa_pf_rule) {
446 ip6 = mtod(m, struct ip6_hdr *);
447 VERIFY(ro != NULL); /* ro == saved_route */
448 goto check_with_pf;
449 }
450 #endif /* DUMMYNET */
451
452 #define MAKE_EXTHDR(hp, mp) do { \
453 if (hp != NULL) { \
454 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
455 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
456 ((eh)->ip6e_len + 1) << 3); \
457 if (error) \
458 goto freehdrs; \
459 } \
460 } while (0)
461
462 if (opt != NULL) {
463 /* Hop-by-Hop options header */
464 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
465 /* Destination options header(1st part) */
466 if (opt->ip6po_rthdr) {
467 /*
468 * Destination options header(1st part)
469 * This only makes sense with a routing header.
470 * See Section 9.2 of RFC 3542.
471 * Disabling this part just for MIP6 convenience is
472 * a bad idea. We need to think carefully about a
473 * way to make the advanced API coexist with MIP6
474 * options, which might automatically be inserted in
475 * the kernel.
476 */
477 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
478 }
479 /* Routing header */
480 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
481 /* Destination options header(2nd part) */
482 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
483 }
484
485 #undef MAKE_EXTHDR
486
487 #if IPSEC
488 if (ipsec_bypass != 0 || ip6obf.noipsec)
489 goto skip_ipsec;
490
491 /* May have been set above if packet was bound */
492 if (sp == NULL) {
493 /* get a security policy for this packet */
494 if (so == NULL) {
495 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
496 0, &error);
497 } else {
498 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
499 so, &error);
500 }
501 if (sp == NULL) {
502 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
503 goto freehdrs;
504 }
505 }
506
507 error = 0;
508
509 /* check policy */
510 switch (sp->policy) {
511 case IPSEC_POLICY_DISCARD:
512 case IPSEC_POLICY_GENERATE:
513 /*
514 * This packet is just discarded.
515 */
516 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
517 goto freehdrs;
518
519 case IPSEC_POLICY_BYPASS:
520 case IPSEC_POLICY_NONE:
521 /* no need to do IPsec. */
522 ip6obf.needipsec = FALSE;
523 break;
524
525 case IPSEC_POLICY_IPSEC:
526 if (sp->req == NULL) {
527 /* acquire a policy */
528 error = key_spdacquire(sp);
529 goto freehdrs;
530 }
531 if (sp->ipsec_if) {
532 /* Verify the redirect to ipsec interface */
533 if (sp->ipsec_if == ifp) {
534 /* Set policy for mbuf */
535 m->m_pkthdr.ipsec_policy = sp->id;
536 goto skip_ipsec;
537 }
538 goto bad;
539 } else {
540 ip6obf.needipsec = TRUE;
541 }
542 break;
543
544 case IPSEC_POLICY_ENTRUST:
545 default:
546 printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
547 break;
548 }
549 skip_ipsec:
550 #endif /* IPSEC */
551
552 /*
553 * Calculate the total length of the extension header chain.
554 * Keep the length of the unfragmentable part for fragmentation.
555 */
556 optlen = 0;
557 if (exthdrs.ip6e_hbh != NULL)
558 optlen += exthdrs.ip6e_hbh->m_len;
559 if (exthdrs.ip6e_dest1 != NULL)
560 optlen += exthdrs.ip6e_dest1->m_len;
561 if (exthdrs.ip6e_rthdr != NULL)
562 optlen += exthdrs.ip6e_rthdr->m_len;
563 unfragpartlen = optlen + sizeof (struct ip6_hdr);
564
565 /* NOTE: we don't add AH/ESP length here. do that later. */
566 if (exthdrs.ip6e_dest2 != NULL)
567 optlen += exthdrs.ip6e_dest2->m_len;
568
569 /*
570 * If we need IPsec, or there is at least one extension header,
571 * separate IP6 header from the payload.
572 */
573 if ((
574 #if IPSEC
575 ip6obf.needipsec ||
576 #endif /* IPSEC */
577 optlen) && !ip6obf.hdrsplit) {
578 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
579 m = NULL;
580 goto freehdrs;
581 }
582 m = exthdrs.ip6e_ip6;
583 ip6obf.hdrsplit = TRUE;
584 }
585
586 /* adjust pointer */
587 ip6 = mtod(m, struct ip6_hdr *);
588
589 /* adjust mbuf packet header length */
590 m->m_pkthdr.len += optlen;
591 plen = m->m_pkthdr.len - sizeof (*ip6);
592
593 /* If this is a jumbo payload, insert a jumbo payload option. */
594 if (plen > IPV6_MAXPACKET) {
595 if (!ip6obf.hdrsplit) {
596 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
597 m = NULL;
598 goto freehdrs;
599 }
600 m = exthdrs.ip6e_ip6;
601 ip6obf.hdrsplit = TRUE;
602 }
603 /* adjust pointer */
604 ip6 = mtod(m, struct ip6_hdr *);
605 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
606 goto freehdrs;
607 ip6->ip6_plen = 0;
608 } else {
609 ip6->ip6_plen = htons(plen);
610 }
611 /*
612 * Concatenate headers and fill in next header fields.
613 * Here we have, on "m"
614 * IPv6 payload
615 * and we insert headers accordingly. Finally, we should be getting:
616 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
617 *
618 * during the header composing process, "m" points to IPv6 header.
619 * "mprev" points to an extension header prior to esp.
620 */
621 nexthdrp = &ip6->ip6_nxt;
622 mprev = m;
623
624 /*
625 * we treat dest2 specially. this makes IPsec processing
626 * much easier. the goal here is to make mprev point the
627 * mbuf prior to dest2.
628 *
629 * result: IPv6 dest2 payload
630 * m and mprev will point to IPv6 header.
631 */
632 if (exthdrs.ip6e_dest2 != NULL) {
633 if (!ip6obf.hdrsplit) {
634 panic("assumption failed: hdr not split");
635 /* NOTREACHED */
636 }
637 exthdrs.ip6e_dest2->m_next = m->m_next;
638 m->m_next = exthdrs.ip6e_dest2;
639 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
640 ip6->ip6_nxt = IPPROTO_DSTOPTS;
641 }
642
643 #define MAKE_CHAIN(m, mp, p, i) do { \
644 if (m != NULL) { \
645 if (!ip6obf.hdrsplit) { \
646 panic("assumption failed: hdr not split"); \
647 /* NOTREACHED */ \
648 } \
649 *mtod((m), u_char *) = *(p); \
650 *(p) = (i); \
651 p = mtod((m), u_char *); \
652 (m)->m_next = (mp)->m_next; \
653 (mp)->m_next = (m); \
654 (mp) = (m); \
655 } \
656 } while (0)
657 /*
658 * result: IPv6 hbh dest1 rthdr dest2 payload
659 * m will point to IPv6 header. mprev will point to the
660 * extension header prior to dest2 (rthdr in the above case).
661 */
662 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
663 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
664 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
665
666 #undef MAKE_CHAIN
667
668 #if IPSEC
669 if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA))
670 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
671 #endif /* IPSEC */
672
673 if (!TAILQ_EMPTY(&ipv6_filters)) {
674 struct ipfilter *filter;
675 int seen = (inject_filter_ref == NULL);
676 int fixscope = 0;
677
678 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
679 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
680 IM6O_LOCK(im6o);
681 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
682 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
683 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
684 IM6O_UNLOCK(im6o);
685 }
686
687 /* Hack: embed the scope_id in the destination */
688 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
689 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
690 fixscope = 1;
691 ip6->ip6_dst.s6_addr16[1] =
692 htons(ro->ro_dst.sin6_scope_id);
693 }
694
695 ipf_ref();
696 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
697 /*
698 * Don't process packet twice if we've already seen it.
699 */
700 if (seen == 0) {
701 if ((struct ipfilter *)inject_filter_ref ==
702 filter)
703 seen = 1;
704 } else if (filter->ipf_filter.ipf_output != NULL) {
705 errno_t result;
706
707 result = filter->ipf_filter.ipf_output(
708 filter->ipf_filter.cookie,
709 (mbuf_t *)&m, ippo);
710 if (result == EJUSTRETURN) {
711 ipf_unref();
712 goto done;
713 }
714 if (result != 0) {
715 ipf_unref();
716 goto bad;
717 }
718 }
719 }
720 ipf_unref();
721
722 ip6 = mtod(m, struct ip6_hdr *);
723 /* Hack: cleanup embedded scope_id if we put it there */
724 if (fixscope)
725 ip6->ip6_dst.s6_addr16[1] = 0;
726 }
727
728 #if IPSEC
729 if (ip6obf.needipsec) {
730 int segleft_org;
731
732 /*
733 * pointers after IPsec headers are not valid any more.
734 * other pointers need a great care too.
735 * (IPsec routines should not mangle mbufs prior to AH/ESP)
736 */
737 exthdrs.ip6e_dest2 = NULL;
738
739 if (exthdrs.ip6e_rthdr != NULL) {
740 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
741 segleft_org = rh->ip6r_segleft;
742 rh->ip6r_segleft = 0;
743 } else {
744 rh = NULL;
745 segleft_org = 0;
746 }
747
748 ipsec_state.m = m;
749 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
750 sp, flags, &needipsectun);
751 m = ipsec_state.m;
752 if (error) {
753 /* mbuf is already reclaimed in ipsec6_output_trans. */
754 m = NULL;
755 switch (error) {
756 case EHOSTUNREACH:
757 case ENETUNREACH:
758 case EMSGSIZE:
759 case ENOBUFS:
760 case ENOMEM:
761 break;
762 default:
763 printf("ip6_output (ipsec): error code %d\n",
764 error);
765 /* FALLTHRU */
766 case ENOENT:
767 /* don't show these error codes to the user */
768 error = 0;
769 break;
770 }
771 goto bad;
772 }
773 if (exthdrs.ip6e_rthdr != NULL) {
774 /* ah6_output doesn't modify mbuf chain */
775 rh->ip6r_segleft = segleft_org;
776 }
777 }
778 #endif /* IPSEC */
779
780 /*
781 * If there is a routing header, replace the destination address field
782 * with the first hop of the routing header.
783 */
784 if (exthdrs.ip6e_rthdr != NULL) {
785 struct ip6_rthdr0 *rh0;
786 struct in6_addr *addr;
787 struct sockaddr_in6 sa;
788
789 rh = (struct ip6_rthdr *)
790 (mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *));
791 switch (rh->ip6r_type) {
792 case IPV6_RTHDR_TYPE_0:
793 rh0 = (struct ip6_rthdr0 *)rh;
794 addr = (struct in6_addr *)(void *)(rh0 + 1);
795
796 /*
797 * construct a sockaddr_in6 form of
798 * the first hop.
799 *
800 * XXX: we may not have enough
801 * information about its scope zone;
802 * there is no standard API to pass
803 * the information from the
804 * application.
805 */
806 bzero(&sa, sizeof (sa));
807 sa.sin6_family = AF_INET6;
808 sa.sin6_len = sizeof (sa);
809 sa.sin6_addr = addr[0];
810 if ((error = sa6_embedscope(&sa,
811 ip6_use_defzone)) != 0) {
812 goto bad;
813 }
814 ip6->ip6_dst = sa.sin6_addr;
815 bcopy(&addr[1], &addr[0], sizeof (struct in6_addr) *
816 (rh0->ip6r0_segleft - 1));
817 addr[rh0->ip6r0_segleft - 1] = finaldst;
818 /* XXX */
819 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
820 break;
821 default: /* is it possible? */
822 error = EINVAL;
823 goto bad;
824 }
825 }
826
827 /* Source address validation */
828 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
829 !(flags & IPV6_UNSPECSRC)) {
830 error = EOPNOTSUPP;
831 ip6stat.ip6s_badscope++;
832 goto bad;
833 }
834 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
835 error = EOPNOTSUPP;
836 ip6stat.ip6s_badscope++;
837 goto bad;
838 }
839
840 ip6stat.ip6s_localout++;
841
842 /*
843 * Route packet.
844 */
845 if (ro == NULL) {
846 ro = &ip6route;
847 bzero((caddr_t)ro, sizeof (*ro));
848 }
849 VERIFY(ro_pmtu == NULL); /* must not get here if dummynet */
850 ro_pmtu = ro;
851 if (opt != NULL && opt->ip6po_rthdr)
852 ro = &opt->ip6po_route;
853 dst = SIN6(&ro->ro_dst);
854
855 if (ro->ro_rt != NULL)
856 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
857 /*
858 * if specified, try to fill in the traffic class field.
859 * do not override if a non-zero value is already set.
860 * we check the diffserv field and the ecn field separately.
861 */
862 if (opt != NULL && opt->ip6po_tclass >= 0) {
863 int mask = 0;
864
865 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
866 mask |= 0xfc;
867 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
868 mask |= 0x03;
869 if (mask != 0) {
870 ip6->ip6_flow |=
871 htonl((opt->ip6po_tclass & mask) << 20);
872 }
873 }
874
875 /* fill in or override the hop limit field, if necessary. */
876 if (opt && opt->ip6po_hlim != -1) {
877 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
878 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
879 if (im6o != NULL) {
880 IM6O_LOCK(im6o);
881 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
882 IM6O_UNLOCK(im6o);
883 } else {
884 ip6->ip6_hlim = ip6_defmcasthlim;
885 }
886 }
887
888 /*
889 * If there is a cached route, check that it is to the same
890 * destination and is still up. If not, free it and try again.
891 * Test rt_flags without holding rt_lock for performance reasons;
892 * if the route is down it will hopefully be caught by the layer
893 * below (since it uses this route as a hint) or during the
894 * next transmit.
895 */
896 if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
897 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))
898 ROUTE_RELEASE(ro);
899
900 if (ro->ro_rt == NULL) {
901 bzero(dst, sizeof (*dst));
902 dst->sin6_family = AF_INET6;
903 dst->sin6_len = sizeof (struct sockaddr_in6);
904 dst->sin6_addr = ip6->ip6_dst;
905 }
906 #if IPSEC
907 if (ip6obf.needipsec && needipsectun) {
908 #if CONFIG_DTRACE
909 struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
910 #endif /* CONFIG_DTRACE */
911 /*
912 * All the extension headers will become inaccessible
913 * (since they can be encrypted).
914 * Don't panic, we need no more updates to extension headers
915 * on inner IPv6 packet (since they are now encapsulated).
916 *
917 * IPv6 [ESP|AH] IPv6 [extension headers] payload
918 */
919 bzero(&exthdrs, sizeof (exthdrs));
920 exthdrs.ip6e_ip6 = m;
921
922 ipsec_state.m = m;
923 route_copyout(&ipsec_state.ro, (struct route *)ro,
924 sizeof (ipsec_state.ro));
925 ipsec_state.dst = SA(dst);
926
927 /* So that we can see packets inside the tunnel */
928 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
929 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
930 struct ip *, NULL, struct ip6_hdr *, ip6);
931
932 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
933 /* tunneled in IPv4? packet is gone */
934 if (ipsec_state.tunneled == 4)
935 goto done;
936 m = ipsec_state.m;
937 ipsec_saved_route = ro;
938 ro = (struct route_in6 *)&ipsec_state.ro;
939 dst = SIN6(ipsec_state.dst);
940 if (error) {
941 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
942 m0 = m = NULL;
943 m = NULL;
944 switch (error) {
945 case EHOSTUNREACH:
946 case ENETUNREACH:
947 case EMSGSIZE:
948 case ENOBUFS:
949 case ENOMEM:
950 break;
951 default:
952 printf("ip6_output (ipsec): error code %d\n",
953 error);
954 /* FALLTHRU */
955 case ENOENT:
956 /* don't show these error codes to the user */
957 error = 0;
958 break;
959 }
960 goto bad;
961 }
962 /*
963 * The packet has been encapsulated so the ifscope
964 * is no longer valid since it does not apply to the
965 * outer address: ignore the ifscope.
966 */
967 if (flags & IPV6_OUTARGS) {
968 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
969 ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
970 }
971 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
972 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
973 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
974 }
975 exthdrs.ip6e_ip6 = m;
976 }
977 #endif /* IPSEC */
978
979 /* for safety */
980 if (ifp != NULL) {
981 ifnet_release(ifp);
982 ifp = NULL;
983 }
984
985 /* adjust pointer */
986 ip6 = mtod(m, struct ip6_hdr *);
987
988 if (ip6obf.select_srcif) {
989 bzero(&src_sa, sizeof (src_sa));
990 src_sa.sin6_family = AF_INET6;
991 src_sa.sin6_len = sizeof (src_sa);
992 src_sa.sin6_addr = ip6->ip6_src;
993 }
994 bzero(&dst_sa, sizeof (dst_sa));
995 dst_sa.sin6_family = AF_INET6;
996 dst_sa.sin6_len = sizeof (dst_sa);
997 dst_sa.sin6_addr = ip6->ip6_dst;
998
999 /*
1000 * in6_selectroute() might return an ifp with its reference held
1001 * even in the error case, so make sure to release its reference.
1002 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
1003 */
1004 if ((error = in6_selectroute(ip6obf.select_srcif ? &src_sa : NULL,
1005 &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa)) != 0) {
1006 switch (error) {
1007 case EHOSTUNREACH:
1008 ip6stat.ip6s_noroute++;
1009 break;
1010 case EADDRNOTAVAIL:
1011 default:
1012 break; /* XXX statistics? */
1013 }
1014 if (ifp != NULL)
1015 in6_ifstat_inc(ifp, ifs6_out_discard);
1016 /* ifp (if non-NULL) will be released at the end */
1017 goto bad;
1018 }
1019 if (rt == NULL) {
1020 /*
1021 * If in6_selectroute() does not return a route entry,
1022 * dst may not have been updated.
1023 */
1024 *dst = dst_sa; /* XXX */
1025 }
1026
1027 /*
1028 * then rt (for unicast) and ifp must be non-NULL valid values.
1029 */
1030 if (!(flags & IPV6_FORWARDING)) {
1031 /* XXX: the FORWARDING flag can be set for mrouting. */
1032 in6_ifstat_inc_na(ifp, ifs6_out_request);
1033 }
1034 if (rt != NULL) {
1035 RT_LOCK(rt);
1036 ia = (struct in6_ifaddr *)(rt->rt_ifa);
1037 if (ia != NULL)
1038 IFA_ADDREF(&ia->ia_ifa);
1039 rt->rt_use++;
1040 RT_UNLOCK(rt);
1041 }
1042
1043 /*
1044 * The outgoing interface must be in the zone of source and
1045 * destination addresses (except local/loopback). We should
1046 * use ia_ifp to support the case of sending packets to an
1047 * address of our own.
1048 */
1049 if (ia != NULL && ia->ia_ifp) {
1050 ifnet_reference(ia->ia_ifp); /* for origifp */
1051 if (origifp != NULL)
1052 ifnet_release(origifp);
1053 origifp = ia->ia_ifp;
1054 } else {
1055 if (ifp != NULL)
1056 ifnet_reference(ifp); /* for origifp */
1057 if (origifp != NULL)
1058 ifnet_release(origifp);
1059 origifp = ifp;
1060 }
1061
1062 /* skip scope enforcements for local/loopback route */
1063 if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1064 struct in6_addr src0, dst0;
1065 u_int32_t zone;
1066
1067 src0 = ip6->ip6_src;
1068 if (in6_setscope(&src0, origifp, &zone))
1069 goto badscope;
1070 bzero(&src_sa, sizeof (src_sa));
1071 src_sa.sin6_family = AF_INET6;
1072 src_sa.sin6_len = sizeof (src_sa);
1073 src_sa.sin6_addr = ip6->ip6_src;
1074 if ((sa6_recoverscope(&src_sa, TRUE) ||
1075 zone != src_sa.sin6_scope_id))
1076 goto badscope;
1077
1078 dst0 = ip6->ip6_dst;
1079 if ((in6_setscope(&dst0, origifp, &zone)))
1080 goto badscope;
1081 /* re-initialize to be sure */
1082 bzero(&dst_sa, sizeof (dst_sa));
1083 dst_sa.sin6_family = AF_INET6;
1084 dst_sa.sin6_len = sizeof (dst_sa);
1085 dst_sa.sin6_addr = ip6->ip6_dst;
1086 if ((sa6_recoverscope(&dst_sa, TRUE) ||
1087 zone != dst_sa.sin6_scope_id))
1088 goto badscope;
1089
1090 /* scope check is done. */
1091 goto routefound;
1092
1093 badscope:
1094 ip6stat.ip6s_badscope++;
1095 in6_ifstat_inc(origifp, ifs6_out_discard);
1096 if (error == 0)
1097 error = EHOSTUNREACH; /* XXX */
1098 goto bad;
1099 }
1100
1101 routefound:
1102 if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1103 if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1104 /*
1105 * The nexthop is explicitly specified by the
1106 * application. We assume the next hop is an IPv6
1107 * address.
1108 */
1109 dst = SIN6(opt->ip6po_nexthop);
1110 } else if ((rt->rt_flags & RTF_GATEWAY)) {
1111 dst = SIN6(rt->rt_gateway);
1112 }
1113 /*
1114 * For packets destined to local/loopback, record the
1115 * source the source interface (which owns the source
1116 * address), as well as the output interface. This is
1117 * needed to reconstruct the embedded zone for the
1118 * link-local address case in ip6_input().
1119 */
1120 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1121 uint32_t srcidx;
1122
1123 if (src_ia != NULL)
1124 srcidx = src_ia->ia_ifp->if_index;
1125 else if (ro->ro_srcia != NULL)
1126 srcidx = ro->ro_srcia->ifa_ifp->if_index;
1127 else
1128 srcidx = 0;
1129
1130 ip6_setsrcifaddr_info(m, srcidx, NULL);
1131 ip6_setdstifaddr_info(m, 0, ia);
1132 }
1133 }
1134
1135 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1136 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1137 } else {
1138 struct in6_multi *in6m;
1139
1140 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1141 in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1142
1143 /*
1144 * Confirm that the outgoing interface supports multicast.
1145 */
1146 if (!(ifp->if_flags & IFF_MULTICAST)) {
1147 ip6stat.ip6s_noroute++;
1148 in6_ifstat_inc(ifp, ifs6_out_discard);
1149 error = ENETUNREACH;
1150 goto bad;
1151 }
1152 in6_multihead_lock_shared();
1153 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1154 in6_multihead_lock_done();
1155 if (im6o != NULL)
1156 IM6O_LOCK(im6o);
1157 if (in6m != NULL &&
1158 (im6o == NULL || im6o->im6o_multicast_loop)) {
1159 if (im6o != NULL)
1160 IM6O_UNLOCK(im6o);
1161 /*
1162 * If we belong to the destination multicast group
1163 * on the outgoing interface, and the caller did not
1164 * forbid loopback, loop back a copy.
1165 */
1166 ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
1167 } else {
1168 if (im6o != NULL)
1169 IM6O_UNLOCK(im6o);
1170 /*
1171 * If we are acting as a multicast router, perform
1172 * multicast forwarding as if the packet had just
1173 * arrived on the interface to which we are about
1174 * to send. The multicast forwarding function
1175 * recursively calls this function, using the
1176 * IPV6_FORWARDING flag to prevent infinite recursion.
1177 *
1178 * Multicasts that are looped back by ip6_mloopback(),
1179 * above, will be forwarded by the ip6_input() routine,
1180 * if necessary.
1181 */
1182 #if MROUTING
1183 if (ip6_mrouter && !(flags & IPV6_FORWARDING)) {
1184 /*
1185 * XXX: ip6_mforward expects that rcvif is NULL
1186 * when it is called from the originating path.
1187 * However, it is not always the case, since
1188 * some versions of MGETHDR() does not
1189 * initialize the field.
1190 */
1191 m->m_pkthdr.rcvif = NULL;
1192 if (ip6_mforward(ip6, ifp, m) != 0) {
1193 m_freem(m);
1194 if (in6m != NULL)
1195 IN6M_REMREF(in6m);
1196 goto done;
1197 }
1198 }
1199 #endif /* MROUTING */
1200 }
1201 if (in6m != NULL)
1202 IN6M_REMREF(in6m);
1203 /*
1204 * Multicasts with a hoplimit of zero may be looped back,
1205 * above, but must not be transmitted on a network.
1206 * Also, multicasts addressed to the loopback interface
1207 * are not sent -- the above call to ip6_mloopback() will
1208 * loop back a copy if this host actually belongs to the
1209 * destination group on the loopback interface.
1210 */
1211 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1212 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1213 m_freem(m);
1214 goto done;
1215 }
1216 }
1217
1218 /*
1219 * Fill the outgoing inteface to tell the upper layer
1220 * to increment per-interface statistics.
1221 */
1222 if (ifpp != NULL) {
1223 ifnet_reference(ifp); /* for caller */
1224 if (*ifpp != NULL)
1225 ifnet_release(*ifpp);
1226 *ifpp = ifp;
1227 }
1228
1229 /* Determine path MTU. */
1230 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1231 &alwaysfrag)) != 0)
1232 goto bad;
1233
1234 /*
1235 * The caller of this function may specify to use the minimum MTU
1236 * in some cases.
1237 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1238 * setting. The logic is a bit complicated; by default, unicast
1239 * packets will follow path MTU while multicast packets will be sent at
1240 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1241 * including unicast ones will be sent at the minimum MTU. Multicast
1242 * packets will always be sent at the minimum MTU unless
1243 * IP6PO_MINMTU_DISABLE is explicitly specified.
1244 * See RFC 3542 for more details.
1245 */
1246 if (mtu > IPV6_MMTU) {
1247 if ((flags & IPV6_MINMTU)) {
1248 mtu = IPV6_MMTU;
1249 } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
1250 mtu = IPV6_MMTU;
1251 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1252 (opt == NULL ||
1253 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1254 mtu = IPV6_MMTU;
1255 }
1256 }
1257
1258 /*
1259 * clear embedded scope identifiers if necessary.
1260 * in6_clearscope will touch the addresses only when necessary.
1261 */
1262 in6_clearscope(&ip6->ip6_src);
1263 in6_clearscope(&ip6->ip6_dst);
1264
1265 #if IPFW2
1266 /*
1267 * Check with the firewall...
1268 */
1269 if (ip6_fw_enable && ip6_fw_chk_ptr) {
1270 u_short port = 0;
1271 m->m_pkthdr.rcvif = NULL; /* XXX */
1272 /* If ipfw says divert, we have to just drop packet */
1273 if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
1274 m_freem(m);
1275 goto done;
1276 }
1277 if (m == NULL) {
1278 error = EACCES;
1279 goto done;
1280 }
1281 }
1282 #endif /* IPFW2 */
1283
1284 /*
1285 * If the outgoing packet contains a hop-by-hop options header,
1286 * it must be examined and processed even by the source node.
1287 * (RFC 2460, section 4.)
1288 */
1289 if (exthdrs.ip6e_hbh != NULL) {
1290 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1291 u_int32_t dummy; /* XXX unused */
1292 uint32_t oplen = 0; /* for ip6_process_hopopts() */
1293 #if DIAGNOSTIC
1294 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1295 panic("ip6e_hbh is not continuous");
1296 #endif
1297 /*
1298 * XXX: If we have to send an ICMPv6 error to the sender,
1299 * we need the M_LOOP flag since icmp6_error() expects
1300 * the IPv6 and the hop-by-hop options header are
1301 * continuous unless the flag is set.
1302 */
1303 m->m_flags |= M_LOOP;
1304 m->m_pkthdr.rcvif = ifp;
1305 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1306 ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh),
1307 &dummy, &oplen) < 0) {
1308 /* m was already freed at this point */
1309 error = EINVAL; /* better error? */
1310 goto done;
1311 }
1312 m->m_flags &= ~M_LOOP; /* XXX */
1313 m->m_pkthdr.rcvif = NULL;
1314 }
1315
1316 #if DUMMYNET
1317 check_with_pf:
1318 #endif /* DUMMYNET */
1319 #if PF
1320 if (PF_IS_ENABLED) {
1321 #if DUMMYNET
1322 /*
1323 * TODO: Need to save opt->ip6po_flags for reinjection
1324 * rdar://10434993
1325 */
1326 args.fwa_m = m;
1327 args.fwa_oif = ifp;
1328 args.fwa_oflags = flags;
1329 if (flags & IPV6_OUTARGS)
1330 args.fwa_ip6oa = ip6oa;
1331 args.fwa_ro6 = ro;
1332 args.fwa_dst6 = dst;
1333 args.fwa_ro6_pmtu = ro_pmtu;
1334 args.fwa_origifp = origifp;
1335 args.fwa_mtu = mtu;
1336 args.fwa_alwaysfrag = alwaysfrag;
1337 args.fwa_unfragpartlen = unfragpartlen;
1338 args.fwa_exthdrs = &exthdrs;
1339 /* Invoke outbound packet filter */
1340 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1341 #else /* !DUMMYNET */
1342 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1343 #endif /* !DUMMYNET */
1344
1345 if (error != 0 || m == NULL) {
1346 /*
1347 * Note that if we ever handle packet chain, we will
1348 * have to restore the linkage from the previous
1349 * packet to the next like in ip_outout_list()
1350 */
1351 if (m != NULL) {
1352 panic("%s: unexpected packet %p\n",
1353 __func__, m);
1354 /* NOTREACHED */
1355 }
1356 /* Already freed by callee */
1357 goto done;
1358 }
1359 ip6 = mtod(m, struct ip6_hdr *);
1360 }
1361 #endif /* PF */
1362
1363 /*
1364 * Send the packet to the outgoing interface.
1365 * If necessary, do IPv6 fragmentation before sending.
1366 *
1367 * the logic here is rather complex:
1368 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1369 * 1-a: send as is if tlen <= path mtu
1370 * 1-b: fragment if tlen > path mtu
1371 *
1372 * 2: if user asks us not to fragment (dontfrag == 1)
1373 * 2-a: send as is if tlen <= interface mtu
1374 * 2-b: error if tlen > interface mtu
1375 *
1376 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1377 * always fragment
1378 *
1379 * 4: if dontfrag == 1 && alwaysfrag == 1
1380 * error, as we cannot handle this conflicting request
1381 */
1382 tlen = m->m_pkthdr.len;
1383
1384 if (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG))
1385 ip6obf.dontfrag = TRUE;
1386 else
1387 ip6obf.dontfrag = FALSE;
1388 if (ip6obf.dontfrag && alwaysfrag) { /* case 4 */
1389 /* conflicting request - can't transmit */
1390 error = EMSGSIZE;
1391 goto bad;
1392 }
1393
1394 lck_rw_lock_shared(nd_if_rwlock);
1395 /* Access without acquiring nd_ifinfo lock for performance */
1396 if (ip6obf.dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
1397 lck_rw_done(nd_if_rwlock);
1398 /*
1399 * Even if the DONTFRAG option is specified, we cannot send the
1400 * packet when the data length is larger than the MTU of the
1401 * outgoing interface.
1402 * Notify the error by sending IPV6_PATHMTU ancillary data as
1403 * well as returning an error code (the latter is not described
1404 * in the API spec.)
1405 */
1406 u_int32_t mtu32;
1407 struct ip6ctlparam ip6cp;
1408
1409 mtu32 = (u_int32_t)mtu;
1410 bzero(&ip6cp, sizeof (ip6cp));
1411 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1412 pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
1413 error = EMSGSIZE;
1414 goto bad;
1415 } else {
1416 lck_rw_done(nd_if_rwlock);
1417 }
1418
1419 /*
1420 * transmit packet without fragmentation
1421 */
1422 if (ip6obf.dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
1423 (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
1424 (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1425 #ifdef IPSEC
1426 /* clean ipsec history once it goes out of the node */
1427 ipsec_delaux(m);
1428 #endif /* IPSEC */
1429
1430 ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
1431
1432 if (ro->ro_rt)
1433 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1434 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
1435 goto done;
1436 }
1437
1438 /*
1439 * try to fragment the packet. case 1-b and 3
1440 */
1441 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
1442 /* TSO and fragment aren't compatible */
1443 error = EMSGSIZE;
1444 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1445 goto bad;
1446 } else if (mtu < IPV6_MMTU) {
1447 /* path MTU cannot be less than IPV6_MMTU */
1448 error = EMSGSIZE;
1449 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1450 goto bad;
1451 } else if (ip6->ip6_plen == 0) {
1452 /* jumbo payload cannot be fragmented */
1453 error = EMSGSIZE;
1454 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1455 goto bad;
1456 } else {
1457 struct mbuf **mnext, *m_frgpart;
1458 struct ip6_frag *ip6f;
1459 u_int32_t id = htonl(ip6_randomid());
1460 u_char nextproto;
1461
1462 /*
1463 * Too large for the destination or interface;
1464 * fragment if possible.
1465 * Must be able to put at least 8 bytes per fragment.
1466 */
1467 hlen = unfragpartlen;
1468 if (mtu > IPV6_MAXPACKET)
1469 mtu = IPV6_MAXPACKET;
1470
1471 len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7;
1472 if (len < 8) {
1473 error = EMSGSIZE;
1474 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1475 goto bad;
1476 }
1477
1478 mnext = &m->m_nextpkt;
1479
1480 /*
1481 * Change the next header field of the last header in the
1482 * unfragmentable part.
1483 */
1484 if (exthdrs.ip6e_rthdr != NULL) {
1485 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1486 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1487 } else if (exthdrs.ip6e_dest1 != NULL) {
1488 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1489 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1490 } else if (exthdrs.ip6e_hbh != NULL) {
1491 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1492 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1493 } else {
1494 nextproto = ip6->ip6_nxt;
1495 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1496 }
1497
1498 if (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
1499 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
1500
1501 /*
1502 * Loop through length of segment after first fragment,
1503 * make new header and copy data of each part and link onto
1504 * chain.
1505 */
1506 m0 = m;
1507 for (off = hlen; off < tlen; off += len) {
1508 struct ip6_hdr *mhip6;
1509
1510 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1511 if (m == NULL) {
1512 error = ENOBUFS;
1513 ip6stat.ip6s_odropped++;
1514 goto sendorfree;
1515 }
1516 m->m_pkthdr.rcvif = NULL;
1517 m->m_flags = m0->m_flags & M_COPYFLAGS;
1518 *mnext = m;
1519 mnext = &m->m_nextpkt;
1520 m->m_data += max_linkhdr;
1521 mhip6 = mtod(m, struct ip6_hdr *);
1522 *mhip6 = *ip6;
1523 m->m_len = sizeof (*mhip6);
1524 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1525 if (error) {
1526 ip6stat.ip6s_odropped++;
1527 goto sendorfree;
1528 }
1529 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1530 if (off + len >= tlen)
1531 len = tlen - off;
1532 else
1533 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1534 mhip6->ip6_plen = htons((u_short)(len + hlen +
1535 sizeof (*ip6f) - sizeof (struct ip6_hdr)));
1536 if ((m_frgpart = m_copy(m0, off, len)) == NULL) {
1537 error = ENOBUFS;
1538 ip6stat.ip6s_odropped++;
1539 goto sendorfree;
1540 }
1541 m_cat(m, m_frgpart);
1542 m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
1543 m->m_pkthdr.rcvif = NULL;
1544
1545 M_COPY_CLASSIFIER(m, m0);
1546 M_COPY_PFTAG(m, m0);
1547
1548 #ifdef notyet
1549 #if CONFIG_MACF_NET
1550 mac_create_fragment(m0, m);
1551 #endif /* CONFIG_MACF_NET */
1552 #endif /* notyet */
1553
1554 ip6f->ip6f_reserved = 0;
1555 ip6f->ip6f_ident = id;
1556 ip6f->ip6f_nxt = nextproto;
1557 ip6stat.ip6s_ofragments++;
1558 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1559 }
1560
1561 in6_ifstat_inc(ifp, ifs6_out_fragok);
1562 }
1563
1564 /*
1565 * Remove leading garbages.
1566 */
1567 sendorfree:
1568 m = m0->m_nextpkt;
1569 m0->m_nextpkt = NULL;
1570 m_freem(m0);
1571 for (m0 = m; m != NULL; m = m0) {
1572 m0 = m->m_nextpkt;
1573 m->m_nextpkt = NULL;
1574 if (error == 0) {
1575 #if IPSEC
1576 /* clean ipsec history once it goes out of the node */
1577 ipsec_delaux(m);
1578 #endif /* IPSEC */
1579 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
1580 adv);
1581 } else {
1582 m_freem(m);
1583 }
1584 }
1585
1586 if (error == 0)
1587 ip6stat.ip6s_fragmented++;
1588
1589 done:
1590 ROUTE_RELEASE(&ip6route);
1591 #if IPSEC
1592 ROUTE_RELEASE(&ipsec_state.ro);
1593 if (sp != NULL)
1594 key_freesp(sp, KEY_SADB_UNLOCKED);
1595 #endif /* IPSEC */
1596 #if DUMMYNET
1597 ROUTE_RELEASE(&saved_route);
1598 ROUTE_RELEASE(&saved_ro_pmtu);
1599 #endif /* DUMMYNET */
1600
1601 if (ia != NULL)
1602 IFA_REMREF(&ia->ia_ifa);
1603 if (src_ia != NULL)
1604 IFA_REMREF(&src_ia->ia_ifa);
1605 if (ifp != NULL)
1606 ifnet_release(ifp);
1607 if (origifp != NULL)
1608 ifnet_release(origifp);
1609 return (error);
1610
1611 freehdrs:
1612 if (exthdrs.ip6e_hbh != NULL)
1613 m_freem(exthdrs.ip6e_hbh);
1614 if (exthdrs.ip6e_dest1 != NULL)
1615 m_freem(exthdrs.ip6e_dest1);
1616 if (exthdrs.ip6e_rthdr != NULL)
1617 m_freem(exthdrs.ip6e_rthdr);
1618 if (exthdrs.ip6e_dest2 != NULL)
1619 m_freem(exthdrs.ip6e_dest2);
1620 /* FALLTHRU */
1621 bad:
1622 if (m != NULL)
1623 m_freem(m);
1624 goto done;
1625
1626 #undef ipf_pktopts
1627 #undef exthdrs
1628 #undef ip6route
1629 #undef ipsec_state
1630 #undef saved_route
1631 #undef saved_ro_pmtu
1632 #undef args
1633 }
1634
1635 static int
1636 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1637 {
1638 struct mbuf *m;
1639
1640 if (hlen > MCLBYTES)
1641 return (ENOBUFS); /* XXX */
1642
1643 MGET(m, M_DONTWAIT, MT_DATA);
1644 if (m == NULL)
1645 return (ENOBUFS);
1646
1647 if (hlen > MLEN) {
1648 MCLGET(m, M_DONTWAIT);
1649 if (!(m->m_flags & M_EXT)) {
1650 m_free(m);
1651 return (ENOBUFS);
1652 }
1653 }
1654 m->m_len = hlen;
1655 if (hdr != NULL)
1656 bcopy(hdr, mtod(m, caddr_t), hlen);
1657
1658 *mp = m;
1659 return (0);
1660 }
1661
1662 static void
1663 ip6_out_cksum_stats(int proto, u_int32_t len)
1664 {
1665 switch (proto) {
1666 case IPPROTO_TCP:
1667 tcp_out6_cksum_stats(len);
1668 break;
1669 case IPPROTO_UDP:
1670 udp_out6_cksum_stats(len);
1671 break;
1672 default:
1673 /* keep only TCP or UDP stats for now */
1674 break;
1675 }
1676 }
1677
1678 /*
1679 * Process a delayed payload checksum calculation (outbound path.)
1680 *
1681 * hoff is the number of bytes beyond the mbuf data pointer which
1682 * points to the IPv6 header. optlen is the number of bytes, if any,
1683 * between the end of IPv6 header and the beginning of the ULP payload
1684 * header, which represents the extension headers. If optlen is less
1685 * than zero, this routine will bail when it detects extension headers.
1686 *
1687 * Returns a bitmask representing all the work done in software.
1688 */
1689 uint32_t
1690 in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
1691 int32_t nxt0, uint32_t csum_flags)
1692 {
1693 unsigned char buf[sizeof (struct ip6_hdr)] __attribute__((aligned(8)));
1694 struct ip6_hdr *ip6;
1695 uint32_t offset, mlen, hlen, olen, sw_csum;
1696 uint16_t csum, ulpoff, plen;
1697 uint8_t nxt;
1698
1699 _CASSERT(sizeof (csum) == sizeof (uint16_t));
1700 VERIFY(m->m_flags & M_PKTHDR);
1701
1702 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
1703
1704 if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0)
1705 goto done;
1706
1707 mlen = m->m_pkthdr.len; /* total mbuf len */
1708 hlen = sizeof (*ip6); /* IPv6 header len */
1709
1710 /* sanity check (need at least IPv6 header) */
1711 if (mlen < (hoff + hlen)) {
1712 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
1713 "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
1714 /* NOTREACHED */
1715 }
1716
1717 /*
1718 * In case the IPv6 header is not contiguous, or not 32-bit
1719 * aligned, copy it to a local buffer.
1720 */
1721 if ((hoff + hlen) > m->m_len ||
1722 !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
1723 m_copydata(m, hoff, hlen, (caddr_t)buf);
1724 ip6 = (struct ip6_hdr *)(void *)buf;
1725 } else {
1726 ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
1727 }
1728
1729 nxt = ip6->ip6_nxt;
1730 plen = ntohs(ip6->ip6_plen);
1731 if (plen != (mlen - (hoff + hlen))) {
1732 plen = OSSwapInt16(plen);
1733 if (plen != (mlen - (hoff + hlen))) {
1734 /* Don't complain for jumbograms */
1735 if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
1736 printf("%s: mbuf 0x%llx proto %d IPv6 "
1737 "plen %d (%x) [swapped %d (%x)] doesn't "
1738 "match actual packet length; %d is used "
1739 "instead\n", __func__,
1740 (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
1741 ip6->ip6_plen, ip6->ip6_plen, plen, plen,
1742 (mlen - (hoff + hlen)));
1743 }
1744 plen = mlen - (hoff + hlen);
1745 }
1746 }
1747
1748 if (optlen < 0) {
1749 /* next header isn't TCP/UDP and we don't know optlen, bail */
1750 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
1751 sw_csum = 0;
1752 goto done;
1753 }
1754 olen = 0;
1755 } else {
1756 /* caller supplied the original transport number; use it */
1757 if (nxt0 >= 0)
1758 nxt = nxt0;
1759 olen = optlen;
1760 }
1761
1762 offset = hoff + hlen + olen; /* ULP header */
1763
1764 /* sanity check */
1765 if (mlen < offset) {
1766 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
1767 "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
1768 /* NOTREACHED */
1769 }
1770
1771 /*
1772 * offset is added to the lower 16-bit value of csum_data,
1773 * which is expected to contain the ULP offset; therefore
1774 * CSUM_PARTIAL offset adjustment must be undone.
1775 */
1776 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL|CSUM_DATA_VALID)) ==
1777 (CSUM_PARTIAL|CSUM_DATA_VALID)) {
1778 /*
1779 * Get back the original ULP offset (this will
1780 * undo the CSUM_PARTIAL logic in ip6_output.)
1781 */
1782 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
1783 m->m_pkthdr.csum_tx_start);
1784 }
1785
1786 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
1787
1788 if (mlen < (ulpoff + sizeof (csum))) {
1789 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
1790 "cksum offset (%u) cksum flags 0x%x\n", __func__,
1791 m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
1792 /* NOTREACHED */
1793 }
1794
1795 csum = inet6_cksum(m, 0, offset, plen - olen);
1796
1797 /* Update stats */
1798 ip6_out_cksum_stats(nxt, plen - olen);
1799
1800 /* RFC1122 4.1.3.4 */
1801 if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6))
1802 csum = 0xffff;
1803
1804 /* Insert the checksum in the ULP csum field */
1805 offset += ulpoff;
1806 if ((offset + sizeof (csum)) > m->m_len) {
1807 m_copyback(m, offset, sizeof (csum), &csum);
1808 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
1809 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
1810 } else {
1811 bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
1812 }
1813 m->m_pkthdr.csum_flags &=
1814 ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
1815
1816 done:
1817 return (sw_csum);
1818 }
1819
1820 /*
1821 * Insert jumbo payload option.
1822 */
1823 static int
1824 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1825 {
1826 struct mbuf *mopt;
1827 u_char *optbuf;
1828 u_int32_t v;
1829
1830 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1831
1832 /*
1833 * If there is no hop-by-hop options header, allocate new one.
1834 * If there is one but it doesn't have enough space to store the
1835 * jumbo payload option, allocate a cluster to store the whole options.
1836 * Otherwise, use it to store the options.
1837 */
1838 if (exthdrs->ip6e_hbh == NULL) {
1839 MGET(mopt, M_DONTWAIT, MT_DATA);
1840 if (mopt == NULL)
1841 return (ENOBUFS);
1842 mopt->m_len = JUMBOOPTLEN;
1843 optbuf = mtod(mopt, u_char *);
1844 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1845 exthdrs->ip6e_hbh = mopt;
1846 } else {
1847 struct ip6_hbh *hbh;
1848
1849 mopt = exthdrs->ip6e_hbh;
1850 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1851 /*
1852 * XXX assumption:
1853 * - exthdrs->ip6e_hbh is not referenced from places
1854 * other than exthdrs.
1855 * - exthdrs->ip6e_hbh is not an mbuf chain.
1856 */
1857 u_int32_t oldoptlen = mopt->m_len;
1858 struct mbuf *n;
1859
1860 /*
1861 * XXX: give up if the whole (new) hbh header does
1862 * not fit even in an mbuf cluster.
1863 */
1864 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1865 return (ENOBUFS);
1866
1867 /*
1868 * As a consequence, we must always prepare a cluster
1869 * at this point.
1870 */
1871 MGET(n, M_DONTWAIT, MT_DATA);
1872 if (n != NULL) {
1873 MCLGET(n, M_DONTWAIT);
1874 if (!(n->m_flags & M_EXT)) {
1875 m_freem(n);
1876 n = NULL;
1877 }
1878 }
1879 if (n == NULL)
1880 return (ENOBUFS);
1881 n->m_len = oldoptlen + JUMBOOPTLEN;
1882 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1883 oldoptlen);
1884 optbuf = mtod(n, u_char *) + oldoptlen;
1885 m_freem(mopt);
1886 mopt = exthdrs->ip6e_hbh = n;
1887 } else {
1888 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1889 mopt->m_len += JUMBOOPTLEN;
1890 }
1891 optbuf[0] = IP6OPT_PADN;
1892 optbuf[1] = 1;
1893
1894 /*
1895 * Adjust the header length according to the pad and
1896 * the jumbo payload option.
1897 */
1898 hbh = mtod(mopt, struct ip6_hbh *);
1899 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1900 }
1901
1902 /* fill in the option. */
1903 optbuf[2] = IP6OPT_JUMBO;
1904 optbuf[3] = 4;
1905 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1906 bcopy(&v, &optbuf[4], sizeof (u_int32_t));
1907
1908 /* finally, adjust the packet header length */
1909 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1910
1911 return (0);
1912 #undef JUMBOOPTLEN
1913 }
1914
1915 /*
1916 * Insert fragment header and copy unfragmentable header portions.
1917 */
1918 static int
1919 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
1920 struct ip6_frag **frghdrp)
1921 {
1922 struct mbuf *n, *mlast;
1923
1924 if (hlen > sizeof (struct ip6_hdr)) {
1925 n = m_copym(m0, sizeof (struct ip6_hdr),
1926 hlen - sizeof (struct ip6_hdr), M_DONTWAIT);
1927 if (n == NULL)
1928 return (ENOBUFS);
1929 m->m_next = n;
1930 } else
1931 n = m;
1932
1933 /* Search for the last mbuf of unfragmentable part. */
1934 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1935 ;
1936
1937 if (!(mlast->m_flags & M_EXT) &&
1938 M_TRAILINGSPACE(mlast) >= sizeof (struct ip6_frag)) {
1939 /* use the trailing space of the last mbuf for the frag hdr */
1940 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1941 mlast->m_len);
1942 mlast->m_len += sizeof (struct ip6_frag);
1943 m->m_pkthdr.len += sizeof (struct ip6_frag);
1944 } else {
1945 /* allocate a new mbuf for the fragment header */
1946 struct mbuf *mfrg;
1947
1948 MGET(mfrg, M_DONTWAIT, MT_DATA);
1949 if (mfrg == NULL)
1950 return (ENOBUFS);
1951 mfrg->m_len = sizeof (struct ip6_frag);
1952 *frghdrp = mtod(mfrg, struct ip6_frag *);
1953 mlast->m_next = mfrg;
1954 }
1955
1956 return (0);
1957 }
1958
1959 static int
1960 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1961 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
1962 boolean_t *alwaysfragp)
1963 {
1964 u_int32_t mtu = 0;
1965 boolean_t alwaysfrag = FALSE;
1966 int error = 0;
1967
1968 if (ro_pmtu != ro) {
1969 /* The first hop and the final destination may differ. */
1970 struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
1971 if (ROUTE_UNUSABLE(ro_pmtu) ||
1972 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
1973 ROUTE_RELEASE(ro_pmtu);
1974
1975 if (ro_pmtu->ro_rt == NULL) {
1976 bzero(sa6_dst, sizeof (*sa6_dst));
1977 sa6_dst->sin6_family = AF_INET6;
1978 sa6_dst->sin6_len = sizeof (struct sockaddr_in6);
1979 sa6_dst->sin6_addr = *dst;
1980
1981 rtalloc_scoped((struct route *)ro_pmtu,
1982 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
1983 }
1984 }
1985
1986 if (ro_pmtu->ro_rt != NULL) {
1987 u_int32_t ifmtu;
1988
1989 lck_rw_lock_shared(nd_if_rwlock);
1990 /* Access without acquiring nd_ifinfo lock for performance */
1991 ifmtu = IN6_LINKMTU(ifp);
1992 lck_rw_done(nd_if_rwlock);
1993
1994 /*
1995 * Access rmx_mtu without holding the route entry lock,
1996 * for performance; this isn't something that changes
1997 * often, so optimize.
1998 */
1999 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2000 if (mtu > ifmtu || mtu == 0) {
2001 /*
2002 * The MTU on the route is larger than the MTU on
2003 * the interface! This shouldn't happen, unless the
2004 * MTU of the interface has been changed after the
2005 * interface was brought up. Change the MTU in the
2006 * route to match the interface MTU (as long as the
2007 * field isn't locked).
2008 *
2009 * if MTU on the route is 0, we need to fix the MTU.
2010 * this case happens with path MTU discovery timeouts.
2011 */
2012 mtu = ifmtu;
2013 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
2014 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
2015 } else if (mtu < IPV6_MMTU) {
2016 /*
2017 * RFC2460 section 5, last paragraph:
2018 * if we record ICMPv6 too big message with
2019 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
2020 * or smaller, with framgent header attached.
2021 * (fragment header is needed regardless from the
2022 * packet size, for translators to identify packets)
2023 */
2024 alwaysfrag = TRUE;
2025 mtu = IPV6_MMTU;
2026 }
2027 } else {
2028 if (ifp) {
2029 lck_rw_lock_shared(nd_if_rwlock);
2030 /* Don't hold nd_ifinfo lock for performance */
2031 mtu = IN6_LINKMTU(ifp);
2032 lck_rw_done(nd_if_rwlock);
2033 } else {
2034 error = EHOSTUNREACH; /* XXX */
2035 }
2036 }
2037
2038 *mtup = mtu;
2039 if (alwaysfragp != NULL)
2040 *alwaysfragp = alwaysfrag;
2041 return (error);
2042 }
2043
2044 /*
2045 * IP6 socket option processing.
2046 */
2047 int
2048 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
2049 {
2050 int optdatalen, uproto;
2051 void *optdata;
2052 int privileged;
2053 struct inpcb *in6p = sotoinpcb(so);
2054 int error = 0, optval = 0;
2055 int level, op = -1, optname = 0;
2056 int optlen = 0;
2057 struct proc *p;
2058
2059 VERIFY(sopt != NULL);
2060
2061 level = sopt->sopt_level;
2062 op = sopt->sopt_dir;
2063 optname = sopt->sopt_name;
2064 optlen = sopt->sopt_valsize;
2065 p = sopt->sopt_p;
2066 uproto = (int)SOCK_PROTO(so);
2067
2068 privileged = (proc_suser(p) == 0);
2069
2070 if (level == IPPROTO_IPV6) {
2071 switch (op) {
2072 case SOPT_SET:
2073 switch (optname) {
2074 case IPV6_2292PKTOPTIONS: {
2075 struct mbuf *m;
2076
2077 error = soopt_getm(sopt, &m);
2078 if (error != 0)
2079 break;
2080 error = soopt_mcopyin(sopt, m);
2081 if (error != 0)
2082 break;
2083 error = ip6_pcbopts(&in6p->in6p_outputopts,
2084 m, so, sopt);
2085 m_freem(m);
2086 break;
2087 }
2088
2089 /*
2090 * Use of some Hop-by-Hop options or some
2091 * Destination options, might require special
2092 * privilege. That is, normal applications
2093 * (without special privilege) might be forbidden
2094 * from setting certain options in outgoing packets,
2095 * and might never see certain options in received
2096 * packets. [RFC 2292 Section 6]
2097 * KAME specific note:
2098 * KAME prevents non-privileged users from sending or
2099 * receiving ANY hbh/dst options in order to avoid
2100 * overhead of parsing options in the kernel.
2101 */
2102 case IPV6_RECVHOPOPTS:
2103 case IPV6_RECVDSTOPTS:
2104 case IPV6_RECVRTHDRDSTOPTS:
2105 if (!privileged)
2106 break;
2107 /* FALLTHROUGH */
2108 case IPV6_UNICAST_HOPS:
2109 case IPV6_HOPLIMIT:
2110 case IPV6_RECVPKTINFO:
2111 case IPV6_RECVHOPLIMIT:
2112 case IPV6_RECVRTHDR:
2113 case IPV6_RECVPATHMTU:
2114 case IPV6_RECVTCLASS:
2115 case IPV6_V6ONLY:
2116 case IPV6_AUTOFLOWLABEL:
2117 if (optlen != sizeof (int)) {
2118 error = EINVAL;
2119 break;
2120 }
2121 error = sooptcopyin(sopt, &optval,
2122 sizeof (optval), sizeof (optval));
2123 if (error)
2124 break;
2125
2126 switch (optname) {
2127 case IPV6_UNICAST_HOPS:
2128 if (optval < -1 || optval >= 256) {
2129 error = EINVAL;
2130 } else {
2131 /* -1 = kernel default */
2132 in6p->in6p_hops = optval;
2133 if (in6p->inp_vflag &
2134 INP_IPV4) {
2135 in6p->inp_ip_ttl =
2136 optval;
2137 }
2138 }
2139 break;
2140 #define OPTSET(bit) do { \
2141 if (optval) \
2142 in6p->inp_flags |= (bit); \
2143 else \
2144 in6p->inp_flags &= ~(bit); \
2145 } while (0)
2146
2147 #define OPTSET2292(bit) do { \
2148 in6p->inp_flags |= IN6P_RFC2292; \
2149 if (optval) \
2150 in6p->inp_flags |= (bit); \
2151 else \
2152 in6p->inp_flags &= ~(bit); \
2153 } while (0)
2154
2155 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
2156
2157 case IPV6_RECVPKTINFO:
2158 /* cannot mix with RFC2292 */
2159 if (OPTBIT(IN6P_RFC2292)) {
2160 error = EINVAL;
2161 break;
2162 }
2163 OPTSET(IN6P_PKTINFO);
2164 break;
2165
2166 case IPV6_HOPLIMIT: {
2167 struct ip6_pktopts **optp;
2168
2169 /* cannot mix with RFC2292 */
2170 if (OPTBIT(IN6P_RFC2292)) {
2171 error = EINVAL;
2172 break;
2173 }
2174 optp = &in6p->in6p_outputopts;
2175 error = ip6_pcbopt(IPV6_HOPLIMIT,
2176 (u_char *)&optval, sizeof (optval),
2177 optp, uproto);
2178 break;
2179 }
2180
2181 case IPV6_RECVHOPLIMIT:
2182 /* cannot mix with RFC2292 */
2183 if (OPTBIT(IN6P_RFC2292)) {
2184 error = EINVAL;
2185 break;
2186 }
2187 OPTSET(IN6P_HOPLIMIT);
2188 break;
2189
2190 case IPV6_RECVHOPOPTS:
2191 /* cannot mix with RFC2292 */
2192 if (OPTBIT(IN6P_RFC2292)) {
2193 error = EINVAL;
2194 break;
2195 }
2196 OPTSET(IN6P_HOPOPTS);
2197 break;
2198
2199 case IPV6_RECVDSTOPTS:
2200 /* cannot mix with RFC2292 */
2201 if (OPTBIT(IN6P_RFC2292)) {
2202 error = EINVAL;
2203 break;
2204 }
2205 OPTSET(IN6P_DSTOPTS);
2206 break;
2207
2208 case IPV6_RECVRTHDRDSTOPTS:
2209 /* cannot mix with RFC2292 */
2210 if (OPTBIT(IN6P_RFC2292)) {
2211 error = EINVAL;
2212 break;
2213 }
2214 OPTSET(IN6P_RTHDRDSTOPTS);
2215 break;
2216
2217 case IPV6_RECVRTHDR:
2218 /* cannot mix with RFC2292 */
2219 if (OPTBIT(IN6P_RFC2292)) {
2220 error = EINVAL;
2221 break;
2222 }
2223 OPTSET(IN6P_RTHDR);
2224 break;
2225
2226 case IPV6_RECVPATHMTU:
2227 /*
2228 * We ignore this option for TCP
2229 * sockets.
2230 * (RFC3542 leaves this case
2231 * unspecified.)
2232 */
2233 if (uproto != IPPROTO_TCP)
2234 OPTSET(IN6P_MTU);
2235 break;
2236
2237 case IPV6_V6ONLY:
2238 /*
2239 * make setsockopt(IPV6_V6ONLY)
2240 * available only prior to bind(2).
2241 * see ipng mailing list, Jun 22 2001.
2242 */
2243 if (in6p->inp_lport ||
2244 !IN6_IS_ADDR_UNSPECIFIED(
2245 &in6p->in6p_laddr)) {
2246 error = EINVAL;
2247 break;
2248 }
2249 OPTSET(IN6P_IPV6_V6ONLY);
2250 if (optval)
2251 in6p->inp_vflag &= ~INP_IPV4;
2252 else
2253 in6p->inp_vflag |= INP_IPV4;
2254 break;
2255
2256 case IPV6_RECVTCLASS:
2257 /* we can mix with RFC2292 */
2258 OPTSET(IN6P_TCLASS);
2259 break;
2260
2261 case IPV6_AUTOFLOWLABEL:
2262 OPTSET(IN6P_AUTOFLOWLABEL);
2263 break;
2264
2265 }
2266 break;
2267
2268 case IPV6_TCLASS:
2269 case IPV6_DONTFRAG:
2270 case IPV6_USE_MIN_MTU:
2271 case IPV6_PREFER_TEMPADDR: {
2272 struct ip6_pktopts **optp;
2273
2274 if (optlen != sizeof (optval)) {
2275 error = EINVAL;
2276 break;
2277 }
2278 error = sooptcopyin(sopt, &optval,
2279 sizeof (optval), sizeof (optval));
2280 if (error)
2281 break;
2282
2283 optp = &in6p->in6p_outputopts;
2284 error = ip6_pcbopt(optname, (u_char *)&optval,
2285 sizeof (optval), optp, uproto);
2286 break;
2287 }
2288
2289 case IPV6_2292PKTINFO:
2290 case IPV6_2292HOPLIMIT:
2291 case IPV6_2292HOPOPTS:
2292 case IPV6_2292DSTOPTS:
2293 case IPV6_2292RTHDR:
2294 /* RFC 2292 */
2295 if (optlen != sizeof (int)) {
2296 error = EINVAL;
2297 break;
2298 }
2299 error = sooptcopyin(sopt, &optval,
2300 sizeof (optval), sizeof (optval));
2301 if (error)
2302 break;
2303 switch (optname) {
2304 case IPV6_2292PKTINFO:
2305 OPTSET2292(IN6P_PKTINFO);
2306 break;
2307 case IPV6_2292HOPLIMIT:
2308 OPTSET2292(IN6P_HOPLIMIT);
2309 break;
2310 case IPV6_2292HOPOPTS:
2311 /*
2312 * Check super-user privilege.
2313 * See comments for IPV6_RECVHOPOPTS.
2314 */
2315 if (!privileged)
2316 return (EPERM);
2317 OPTSET2292(IN6P_HOPOPTS);
2318 break;
2319 case IPV6_2292DSTOPTS:
2320 if (!privileged)
2321 return (EPERM);
2322 OPTSET2292(IN6P_DSTOPTS|
2323 IN6P_RTHDRDSTOPTS); /* XXX */
2324 break;
2325 case IPV6_2292RTHDR:
2326 OPTSET2292(IN6P_RTHDR);
2327 break;
2328 }
2329 break;
2330
2331 case IPV6_3542PKTINFO:
2332 case IPV6_3542HOPOPTS:
2333 case IPV6_3542RTHDR:
2334 case IPV6_3542DSTOPTS:
2335 case IPV6_RTHDRDSTOPTS:
2336 case IPV6_3542NEXTHOP: {
2337 struct ip6_pktopts **optp;
2338 /* new advanced API (RFC3542) */
2339 struct mbuf *m;
2340
2341 /* cannot mix with RFC2292 */
2342 if (OPTBIT(IN6P_RFC2292)) {
2343 error = EINVAL;
2344 break;
2345 }
2346 error = soopt_getm(sopt, &m);
2347 if (error != 0)
2348 break;
2349 error = soopt_mcopyin(sopt, m);
2350 if (error != 0)
2351 break;
2352
2353 optp = &in6p->in6p_outputopts;
2354 error = ip6_pcbopt(optname, mtod(m, u_char *),
2355 m->m_len, optp, uproto);
2356 m_freem(m);
2357 break;
2358 }
2359 #undef OPTSET
2360 case IPV6_MULTICAST_IF:
2361 case IPV6_MULTICAST_HOPS:
2362 case IPV6_MULTICAST_LOOP:
2363 case IPV6_JOIN_GROUP:
2364 case IPV6_LEAVE_GROUP:
2365 case IPV6_MSFILTER:
2366 case MCAST_BLOCK_SOURCE:
2367 case MCAST_UNBLOCK_SOURCE:
2368 case MCAST_JOIN_GROUP:
2369 case MCAST_LEAVE_GROUP:
2370 case MCAST_JOIN_SOURCE_GROUP:
2371 case MCAST_LEAVE_SOURCE_GROUP:
2372 error = ip6_setmoptions(in6p, sopt);
2373 break;
2374
2375 case IPV6_PORTRANGE:
2376 error = sooptcopyin(sopt, &optval,
2377 sizeof (optval), sizeof (optval));
2378 if (error)
2379 break;
2380
2381 switch (optval) {
2382 case IPV6_PORTRANGE_DEFAULT:
2383 in6p->inp_flags &= ~(INP_LOWPORT);
2384 in6p->inp_flags &= ~(INP_HIGHPORT);
2385 break;
2386
2387 case IPV6_PORTRANGE_HIGH:
2388 in6p->inp_flags &= ~(INP_LOWPORT);
2389 in6p->inp_flags |= INP_HIGHPORT;
2390 break;
2391
2392 case IPV6_PORTRANGE_LOW:
2393 in6p->inp_flags &= ~(INP_HIGHPORT);
2394 in6p->inp_flags |= INP_LOWPORT;
2395 break;
2396
2397 default:
2398 error = EINVAL;
2399 break;
2400 }
2401 break;
2402 #if IPSEC
2403 case IPV6_IPSEC_POLICY: {
2404 caddr_t req = NULL;
2405 size_t len = 0;
2406 struct mbuf *m;
2407
2408 if ((error = soopt_getm(sopt, &m)) != 0)
2409 break;
2410 if ((error = soopt_mcopyin(sopt, m)) != 0)
2411 break;
2412
2413 req = mtod(m, caddr_t);
2414 len = m->m_len;
2415 error = ipsec6_set_policy(in6p, optname, req,
2416 len, privileged);
2417 m_freem(m);
2418 break;
2419 }
2420 #endif /* IPSEC */
2421 #if IPFIREWALL
2422 case IPV6_FW_ADD:
2423 case IPV6_FW_DEL:
2424 case IPV6_FW_FLUSH:
2425 case IPV6_FW_ZERO: {
2426 if (ip6_fw_ctl_ptr == NULL)
2427 load_ip6fw();
2428 if (ip6_fw_ctl_ptr != NULL)
2429 error = (*ip6_fw_ctl_ptr)(sopt);
2430 else
2431 error = ENOPROTOOPT;
2432 break;
2433 }
2434 #endif /* IPFIREWALL */
2435 /*
2436 * IPv6 variant of IP_BOUND_IF; for details see
2437 * comments on IP_BOUND_IF in ip_ctloutput().
2438 */
2439 case IPV6_BOUND_IF:
2440 /* This option is settable only on IPv6 */
2441 if (!(in6p->inp_vflag & INP_IPV6)) {
2442 error = EINVAL;
2443 break;
2444 }
2445
2446 error = sooptcopyin(sopt, &optval,
2447 sizeof (optval), sizeof (optval));
2448
2449 if (error)
2450 break;
2451
2452 error = inp_bindif(in6p, optval, NULL);
2453 break;
2454
2455 case IPV6_NO_IFT_CELLULAR:
2456 /* This option is settable only for IPv6 */
2457 if (!(in6p->inp_vflag & INP_IPV6)) {
2458 error = EINVAL;
2459 break;
2460 }
2461
2462 error = sooptcopyin(sopt, &optval,
2463 sizeof (optval), sizeof (optval));
2464
2465 if (error)
2466 break;
2467
2468 /* once set, it cannot be unset */
2469 if (!optval &&
2470 (in6p->inp_flags & INP_NO_IFT_CELLULAR)) {
2471 error = EINVAL;
2472 break;
2473 }
2474
2475 error = so_set_restrictions(so,
2476 SO_RESTRICT_DENY_CELLULAR);
2477 break;
2478
2479 case IPV6_OUT_IF:
2480 /* This option is not settable */
2481 error = EINVAL;
2482 break;
2483
2484 default:
2485 error = ENOPROTOOPT;
2486 break;
2487 }
2488 break;
2489
2490 case SOPT_GET:
2491 switch (optname) {
2492
2493 case IPV6_2292PKTOPTIONS:
2494 /*
2495 * RFC3542 (effectively) deprecated the
2496 * semantics of the 2292-style pktoptions.
2497 * Since it was not reliable in nature (i.e.,
2498 * applications had to expect the lack of some
2499 * information after all), it would make sense
2500 * to simplify this part by always returning
2501 * empty data.
2502 */
2503 sopt->sopt_valsize = 0;
2504 break;
2505
2506 case IPV6_RECVHOPOPTS:
2507 case IPV6_RECVDSTOPTS:
2508 case IPV6_RECVRTHDRDSTOPTS:
2509 case IPV6_UNICAST_HOPS:
2510 case IPV6_RECVPKTINFO:
2511 case IPV6_RECVHOPLIMIT:
2512 case IPV6_RECVRTHDR:
2513 case IPV6_RECVPATHMTU:
2514 case IPV6_V6ONLY:
2515 case IPV6_PORTRANGE:
2516 case IPV6_RECVTCLASS:
2517 case IPV6_AUTOFLOWLABEL:
2518 switch (optname) {
2519
2520 case IPV6_RECVHOPOPTS:
2521 optval = OPTBIT(IN6P_HOPOPTS);
2522 break;
2523
2524 case IPV6_RECVDSTOPTS:
2525 optval = OPTBIT(IN6P_DSTOPTS);
2526 break;
2527
2528 case IPV6_RECVRTHDRDSTOPTS:
2529 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2530 break;
2531
2532 case IPV6_UNICAST_HOPS:
2533 optval = in6p->in6p_hops;
2534 break;
2535
2536 case IPV6_RECVPKTINFO:
2537 optval = OPTBIT(IN6P_PKTINFO);
2538 break;
2539
2540 case IPV6_RECVHOPLIMIT:
2541 optval = OPTBIT(IN6P_HOPLIMIT);
2542 break;
2543
2544 case IPV6_RECVRTHDR:
2545 optval = OPTBIT(IN6P_RTHDR);
2546 break;
2547
2548 case IPV6_RECVPATHMTU:
2549 optval = OPTBIT(IN6P_MTU);
2550 break;
2551
2552 case IPV6_V6ONLY:
2553 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2554 break;
2555
2556 case IPV6_PORTRANGE: {
2557 int flags;
2558 flags = in6p->inp_flags;
2559 if (flags & INP_HIGHPORT)
2560 optval = IPV6_PORTRANGE_HIGH;
2561 else if (flags & INP_LOWPORT)
2562 optval = IPV6_PORTRANGE_LOW;
2563 else
2564 optval = 0;
2565 break;
2566 }
2567 case IPV6_RECVTCLASS:
2568 optval = OPTBIT(IN6P_TCLASS);
2569 break;
2570
2571 case IPV6_AUTOFLOWLABEL:
2572 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2573 break;
2574 }
2575 if (error)
2576 break;
2577 error = sooptcopyout(sopt, &optval,
2578 sizeof (optval));
2579 break;
2580
2581 case IPV6_PATHMTU: {
2582 u_int32_t pmtu = 0;
2583 struct ip6_mtuinfo mtuinfo;
2584 struct route_in6 sro;
2585
2586 bzero(&sro, sizeof (sro));
2587
2588 if (!(so->so_state & SS_ISCONNECTED))
2589 return (ENOTCONN);
2590 /*
2591 * XXX: we dot not consider the case of source
2592 * routing, or optional information to specify
2593 * the outgoing interface.
2594 */
2595 error = ip6_getpmtu(&sro, NULL, NULL,
2596 &in6p->in6p_faddr, &pmtu, NULL);
2597 ROUTE_RELEASE(&sro);
2598 if (error)
2599 break;
2600 if (pmtu > IPV6_MAXPACKET)
2601 pmtu = IPV6_MAXPACKET;
2602
2603 bzero(&mtuinfo, sizeof (mtuinfo));
2604 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2605 optdata = (void *)&mtuinfo;
2606 optdatalen = sizeof (mtuinfo);
2607 error = sooptcopyout(sopt, optdata,
2608 optdatalen);
2609 break;
2610 }
2611
2612 case IPV6_2292PKTINFO:
2613 case IPV6_2292HOPLIMIT:
2614 case IPV6_2292HOPOPTS:
2615 case IPV6_2292RTHDR:
2616 case IPV6_2292DSTOPTS:
2617 switch (optname) {
2618 case IPV6_2292PKTINFO:
2619 optval = OPTBIT(IN6P_PKTINFO);
2620 break;
2621 case IPV6_2292HOPLIMIT:
2622 optval = OPTBIT(IN6P_HOPLIMIT);
2623 break;
2624 case IPV6_2292HOPOPTS:
2625 optval = OPTBIT(IN6P_HOPOPTS);
2626 break;
2627 case IPV6_2292RTHDR:
2628 optval = OPTBIT(IN6P_RTHDR);
2629 break;
2630 case IPV6_2292DSTOPTS:
2631 optval = OPTBIT(IN6P_DSTOPTS|
2632 IN6P_RTHDRDSTOPTS);
2633 break;
2634 }
2635 error = sooptcopyout(sopt, &optval,
2636 sizeof (optval));
2637 break;
2638
2639 case IPV6_PKTINFO:
2640 case IPV6_HOPOPTS:
2641 case IPV6_RTHDR:
2642 case IPV6_DSTOPTS:
2643 case IPV6_RTHDRDSTOPTS:
2644 case IPV6_NEXTHOP:
2645 case IPV6_TCLASS:
2646 case IPV6_DONTFRAG:
2647 case IPV6_USE_MIN_MTU:
2648 case IPV6_PREFER_TEMPADDR:
2649 error = ip6_getpcbopt(in6p->in6p_outputopts,
2650 optname, sopt);
2651 break;
2652
2653 case IPV6_MULTICAST_IF:
2654 case IPV6_MULTICAST_HOPS:
2655 case IPV6_MULTICAST_LOOP:
2656 case IPV6_MSFILTER:
2657 error = ip6_getmoptions(in6p, sopt);
2658 break;
2659 #if IPSEC
2660 case IPV6_IPSEC_POLICY: {
2661 caddr_t req = NULL;
2662 size_t len = 0;
2663 struct mbuf *m = NULL;
2664 struct mbuf *mp = NULL;
2665
2666 error = soopt_getm(sopt, &m);
2667 if (error != 0)
2668 break;
2669 error = soopt_mcopyin(sopt, m);
2670 if (error != 0)
2671 break;
2672
2673 req = mtod(m, caddr_t);
2674 len = m->m_len;
2675 error = ipsec6_get_policy(in6p, req, len, &mp);
2676 if (error == 0)
2677 error = soopt_mcopyout(sopt, mp);
2678 if (mp != NULL)
2679 m_freem(mp);
2680 m_freem(m);
2681 break;
2682 }
2683 #endif /* IPSEC */
2684 #if IPFIREWALL
2685 case IPV6_FW_GET: {
2686 if (ip6_fw_ctl_ptr == NULL)
2687 load_ip6fw();
2688 if (ip6_fw_ctl_ptr != NULL)
2689 error = (*ip6_fw_ctl_ptr)(sopt);
2690 else
2691 error = ENOPROTOOPT;
2692 break;
2693 }
2694 #endif /* IPFIREWALL */
2695 case IPV6_BOUND_IF:
2696 if (in6p->inp_flags & INP_BOUND_IF)
2697 optval = in6p->inp_boundifp->if_index;
2698 error = sooptcopyout(sopt, &optval,
2699 sizeof (optval));
2700 break;
2701
2702 case IPV6_NO_IFT_CELLULAR:
2703 optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
2704 ? 1 : 0;
2705 error = sooptcopyout(sopt, &optval,
2706 sizeof (optval));
2707 break;
2708
2709 case IPV6_OUT_IF:
2710 optval = (in6p->in6p_last_outifp != NULL) ?
2711 in6p->in6p_last_outifp->if_index : 0;
2712 error = sooptcopyout(sopt, &optval,
2713 sizeof (optval));
2714 break;
2715
2716 default:
2717 error = ENOPROTOOPT;
2718 break;
2719 }
2720 break;
2721 }
2722 } else {
2723 error = EINVAL;
2724 }
2725 return (error);
2726 }
2727
2728 int
2729 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2730 {
2731 int error = 0, optval, optlen;
2732 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2733 struct inpcb *in6p = sotoinpcb(so);
2734 int level, op, optname;
2735
2736 level = sopt->sopt_level;
2737 op = sopt->sopt_dir;
2738 optname = sopt->sopt_name;
2739 optlen = sopt->sopt_valsize;
2740
2741 if (level != IPPROTO_IPV6)
2742 return (EINVAL);
2743
2744 switch (optname) {
2745 case IPV6_CHECKSUM:
2746 /*
2747 * For ICMPv6 sockets, no modification allowed for checksum
2748 * offset, permit "no change" values to help existing apps.
2749 *
2750 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2751 * for an ICMPv6 socket will fail."
2752 * The current behavior does not meet RFC3542.
2753 */
2754 switch (op) {
2755 case SOPT_SET:
2756 if (optlen != sizeof (int)) {
2757 error = EINVAL;
2758 break;
2759 }
2760 error = sooptcopyin(sopt, &optval, sizeof (optval),
2761 sizeof (optval));
2762 if (error)
2763 break;
2764 if ((optval % 2) != 0) {
2765 /* the API assumes even offset values */
2766 error = EINVAL;
2767 } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
2768 if (optval != icmp6off)
2769 error = EINVAL;
2770 } else {
2771 in6p->in6p_cksum = optval;
2772 }
2773 break;
2774
2775 case SOPT_GET:
2776 if (SOCK_PROTO(so) == IPPROTO_ICMPV6)
2777 optval = icmp6off;
2778 else
2779 optval = in6p->in6p_cksum;
2780
2781 error = sooptcopyout(sopt, &optval, sizeof (optval));
2782 break;
2783
2784 default:
2785 error = EINVAL;
2786 break;
2787 }
2788 break;
2789
2790 default:
2791 error = ENOPROTOOPT;
2792 break;
2793 }
2794
2795 return (error);
2796 }
2797
2798 /*
2799 * Set up IP6 options in pcb for insertion in output packets or
2800 * specifying behavior of outgoing packets.
2801 */
2802 static int
2803 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
2804 struct sockopt *sopt)
2805 {
2806 #pragma unused(sopt)
2807 struct ip6_pktopts *opt = *pktopt;
2808 int error = 0;
2809
2810 /* turn off any old options. */
2811 if (opt != NULL) {
2812 #if DIAGNOSTIC
2813 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2814 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2815 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2816 printf("%s: all specified options are cleared.\n",
2817 __func__);
2818 #endif
2819 ip6_clearpktopts(opt, -1);
2820 } else {
2821 opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK);
2822 if (opt == NULL)
2823 return (ENOBUFS);
2824 }
2825 *pktopt = NULL;
2826
2827 if (m == NULL || m->m_len == 0) {
2828 /*
2829 * Only turning off any previous options, regardless of
2830 * whether the opt is just created or given.
2831 */
2832 if (opt != NULL)
2833 FREE(opt, M_IP6OPT);
2834 return (0);
2835 }
2836
2837 /* set options specified by user. */
2838 if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) {
2839 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2840 FREE(opt, M_IP6OPT);
2841 return (error);
2842 }
2843 *pktopt = opt;
2844 return (0);
2845 }
2846
2847 /*
2848 * initialize ip6_pktopts. beware that there are non-zero default values in
2849 * the struct.
2850 */
2851 void
2852 ip6_initpktopts(struct ip6_pktopts *opt)
2853 {
2854
2855 bzero(opt, sizeof (*opt));
2856 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2857 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2858 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2859 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2860 }
2861
2862 static int
2863 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2864 int uproto)
2865 {
2866 struct ip6_pktopts *opt;
2867
2868 opt = *pktopt;
2869 if (opt == NULL) {
2870 opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK);
2871 if (opt == NULL)
2872 return (ENOBUFS);
2873 ip6_initpktopts(opt);
2874 *pktopt = opt;
2875 }
2876
2877 return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
2878 }
2879
2880 static int
2881 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2882 {
2883 void *optdata = NULL;
2884 int optdatalen = 0;
2885 struct ip6_ext *ip6e;
2886 struct in6_pktinfo null_pktinfo;
2887 int deftclass = 0, on;
2888 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2889 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2890
2891
2892 switch (optname) {
2893 case IPV6_PKTINFO:
2894 if (pktopt && pktopt->ip6po_pktinfo)
2895 optdata = (void *)pktopt->ip6po_pktinfo;
2896 else {
2897 /* XXX: we don't have to do this every time... */
2898 bzero(&null_pktinfo, sizeof (null_pktinfo));
2899 optdata = (void *)&null_pktinfo;
2900 }
2901 optdatalen = sizeof (struct in6_pktinfo);
2902 break;
2903
2904 case IPV6_TCLASS:
2905 if (pktopt && pktopt->ip6po_tclass >= 0)
2906 optdata = (void *)&pktopt->ip6po_tclass;
2907 else
2908 optdata = (void *)&deftclass;
2909 optdatalen = sizeof (int);
2910 break;
2911
2912 case IPV6_HOPOPTS:
2913 if (pktopt && pktopt->ip6po_hbh) {
2914 optdata = (void *)pktopt->ip6po_hbh;
2915 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2916 optdatalen = (ip6e->ip6e_len + 1) << 3;
2917 }
2918 break;
2919
2920 case IPV6_RTHDR:
2921 if (pktopt && pktopt->ip6po_rthdr) {
2922 optdata = (void *)pktopt->ip6po_rthdr;
2923 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2924 optdatalen = (ip6e->ip6e_len + 1) << 3;
2925 }
2926 break;
2927
2928 case IPV6_RTHDRDSTOPTS:
2929 if (pktopt && pktopt->ip6po_dest1) {
2930 optdata = (void *)pktopt->ip6po_dest1;
2931 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2932 optdatalen = (ip6e->ip6e_len + 1) << 3;
2933 }
2934 break;
2935
2936 case IPV6_DSTOPTS:
2937 if (pktopt && pktopt->ip6po_dest2) {
2938 optdata = (void *)pktopt->ip6po_dest2;
2939 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2940 optdatalen = (ip6e->ip6e_len + 1) << 3;
2941 }
2942 break;
2943
2944 case IPV6_NEXTHOP:
2945 if (pktopt && pktopt->ip6po_nexthop) {
2946 optdata = (void *)pktopt->ip6po_nexthop;
2947 optdatalen = pktopt->ip6po_nexthop->sa_len;
2948 }
2949 break;
2950
2951 case IPV6_USE_MIN_MTU:
2952 if (pktopt)
2953 optdata = (void *)&pktopt->ip6po_minmtu;
2954 else
2955 optdata = (void *)&defminmtu;
2956 optdatalen = sizeof (int);
2957 break;
2958
2959 case IPV6_DONTFRAG:
2960 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2961 on = 1;
2962 else
2963 on = 0;
2964 optdata = (void *)&on;
2965 optdatalen = sizeof (on);
2966 break;
2967
2968 case IPV6_PREFER_TEMPADDR:
2969 if (pktopt)
2970 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2971 else
2972 optdata = (void *)&defpreftemp;
2973 optdatalen = sizeof (int);
2974 break;
2975
2976 default: /* should not happen */
2977 #ifdef DIAGNOSTIC
2978 panic("ip6_getpcbopt: unexpected option\n");
2979 #endif
2980 return (ENOPROTOOPT);
2981 }
2982
2983 return (sooptcopyout(sopt, optdata, optdatalen));
2984 }
2985
2986 void
2987 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2988 {
2989 if (pktopt == NULL)
2990 return;
2991
2992 if (optname == -1 || optname == IPV6_PKTINFO) {
2993 if (pktopt->ip6po_pktinfo)
2994 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
2995 pktopt->ip6po_pktinfo = NULL;
2996 }
2997 if (optname == -1 || optname == IPV6_HOPLIMIT)
2998 pktopt->ip6po_hlim = -1;
2999 if (optname == -1 || optname == IPV6_TCLASS)
3000 pktopt->ip6po_tclass = -1;
3001 if (optname == -1 || optname == IPV6_NEXTHOP) {
3002 ROUTE_RELEASE(&pktopt->ip6po_nextroute);
3003 if (pktopt->ip6po_nexthop)
3004 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
3005 pktopt->ip6po_nexthop = NULL;
3006 }
3007 if (optname == -1 || optname == IPV6_HOPOPTS) {
3008 if (pktopt->ip6po_hbh)
3009 FREE(pktopt->ip6po_hbh, M_IP6OPT);
3010 pktopt->ip6po_hbh = NULL;
3011 }
3012 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
3013 if (pktopt->ip6po_dest1)
3014 FREE(pktopt->ip6po_dest1, M_IP6OPT);
3015 pktopt->ip6po_dest1 = NULL;
3016 }
3017 if (optname == -1 || optname == IPV6_RTHDR) {
3018 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
3019 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
3020 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
3021 ROUTE_RELEASE(&pktopt->ip6po_route);
3022 }
3023 if (optname == -1 || optname == IPV6_DSTOPTS) {
3024 if (pktopt->ip6po_dest2)
3025 FREE(pktopt->ip6po_dest2, M_IP6OPT);
3026 pktopt->ip6po_dest2 = NULL;
3027 }
3028 }
3029
3030 #define PKTOPT_EXTHDRCPY(type) do { \
3031 if (src->type) { \
3032 int hlen = \
3033 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \
3034 dst->type = _MALLOC(hlen, M_IP6OPT, canwait); \
3035 if (dst->type == NULL && canwait == M_NOWAIT) \
3036 goto bad; \
3037 bcopy(src->type, dst->type, hlen); \
3038 } \
3039 } while (0)
3040
3041 static int
3042 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
3043 {
3044 if (dst == NULL || src == NULL) {
3045 printf("copypktopts: invalid argument\n");
3046 return (EINVAL);
3047 }
3048
3049 dst->ip6po_hlim = src->ip6po_hlim;
3050 dst->ip6po_tclass = src->ip6po_tclass;
3051 dst->ip6po_flags = src->ip6po_flags;
3052 if (src->ip6po_pktinfo) {
3053 dst->ip6po_pktinfo = _MALLOC(sizeof (*dst->ip6po_pktinfo),
3054 M_IP6OPT, canwait);
3055 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
3056 goto bad;
3057 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3058 }
3059 if (src->ip6po_nexthop) {
3060 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
3061 M_IP6OPT, canwait);
3062 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
3063 goto bad;
3064 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
3065 src->ip6po_nexthop->sa_len);
3066 }
3067 PKTOPT_EXTHDRCPY(ip6po_hbh);
3068 PKTOPT_EXTHDRCPY(ip6po_dest1);
3069 PKTOPT_EXTHDRCPY(ip6po_dest2);
3070 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
3071 return (0);
3072
3073 bad:
3074 ip6_clearpktopts(dst, -1);
3075 return (ENOBUFS);
3076 }
3077 #undef PKTOPT_EXTHDRCPY
3078
3079 struct ip6_pktopts *
3080 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
3081 {
3082 int error;
3083 struct ip6_pktopts *dst;
3084
3085 dst = _MALLOC(sizeof (*dst), M_IP6OPT, canwait);
3086 if (dst == NULL)
3087 return (NULL);
3088 ip6_initpktopts(dst);
3089
3090 if ((error = copypktopts(dst, src, canwait)) != 0) {
3091 FREE(dst, M_IP6OPT);
3092 return (NULL);
3093 }
3094
3095 return (dst);
3096 }
3097
3098 void
3099 ip6_freepcbopts(struct ip6_pktopts *pktopt)
3100 {
3101 if (pktopt == NULL)
3102 return;
3103
3104 ip6_clearpktopts(pktopt, -1);
3105
3106 FREE(pktopt, M_IP6OPT);
3107 }
3108
3109 void
3110 ip6_moptions_init(void)
3111 {
3112 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
3113
3114 im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
3115 sizeof (struct ip6_moptions_dbg);
3116
3117 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
3118 IM6O_ZONE_NAME);
3119 if (im6o_zone == NULL) {
3120 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
3121 /* NOTREACHED */
3122 }
3123 zone_change(im6o_zone, Z_EXPAND, TRUE);
3124 }
3125
3126 void
3127 im6o_addref(struct ip6_moptions *im6o, int locked)
3128 {
3129 if (!locked)
3130 IM6O_LOCK(im6o);
3131 else
3132 IM6O_LOCK_ASSERT_HELD(im6o);
3133
3134 if (++im6o->im6o_refcnt == 0) {
3135 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
3136 /* NOTREACHED */
3137 } else if (im6o->im6o_trace != NULL) {
3138 (*im6o->im6o_trace)(im6o, TRUE);
3139 }
3140
3141 if (!locked)
3142 IM6O_UNLOCK(im6o);
3143 }
3144
3145 void
3146 im6o_remref(struct ip6_moptions *im6o)
3147 {
3148 int i;
3149
3150 IM6O_LOCK(im6o);
3151 if (im6o->im6o_refcnt == 0) {
3152 panic("%s: im6o %p negative refcnt", __func__, im6o);
3153 /* NOTREACHED */
3154 } else if (im6o->im6o_trace != NULL) {
3155 (*im6o->im6o_trace)(im6o, FALSE);
3156 }
3157
3158 --im6o->im6o_refcnt;
3159 if (im6o->im6o_refcnt > 0) {
3160 IM6O_UNLOCK(im6o);
3161 return;
3162 }
3163
3164 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3165 struct in6_mfilter *imf;
3166
3167 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
3168 if (imf != NULL)
3169 im6f_leave(imf);
3170
3171 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
3172
3173 if (imf != NULL)
3174 im6f_purge(imf);
3175
3176 IN6M_REMREF(im6o->im6o_membership[i]);
3177 im6o->im6o_membership[i] = NULL;
3178 }
3179 im6o->im6o_num_memberships = 0;
3180 if (im6o->im6o_mfilters != NULL) {
3181 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
3182 im6o->im6o_mfilters = NULL;
3183 }
3184 if (im6o->im6o_membership != NULL) {
3185 FREE(im6o->im6o_membership, M_IP6MOPTS);
3186 im6o->im6o_membership = NULL;
3187 }
3188 IM6O_UNLOCK(im6o);
3189
3190 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
3191
3192 if (!(im6o->im6o_debug & IFD_ALLOC)) {
3193 panic("%s: im6o %p cannot be freed", __func__, im6o);
3194 /* NOTREACHED */
3195 }
3196 zfree(im6o_zone, im6o);
3197 }
3198
3199 static void
3200 im6o_trace(struct ip6_moptions *im6o, int refhold)
3201 {
3202 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3203 ctrace_t *tr;
3204 u_int32_t idx;
3205 u_int16_t *cnt;
3206
3207 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3208 panic("%s: im6o %p has no debug structure", __func__, im6o);
3209 /* NOTREACHED */
3210 }
3211 if (refhold) {
3212 cnt = &im6o_dbg->im6o_refhold_cnt;
3213 tr = im6o_dbg->im6o_refhold;
3214 } else {
3215 cnt = &im6o_dbg->im6o_refrele_cnt;
3216 tr = im6o_dbg->im6o_refrele;
3217 }
3218
3219 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3220 ctrace_record(&tr[idx]);
3221 }
3222
3223 struct ip6_moptions *
3224 ip6_allocmoptions(int how)
3225 {
3226 struct ip6_moptions *im6o;
3227
3228 im6o = (how == M_WAITOK) ?
3229 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3230 if (im6o != NULL) {
3231 bzero(im6o, im6o_size);
3232 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3233 im6o->im6o_debug |= IFD_ALLOC;
3234 if (im6o_debug != 0) {
3235 im6o->im6o_debug |= IFD_DEBUG;
3236 im6o->im6o_trace = im6o_trace;
3237 }
3238 IM6O_ADDREF(im6o);
3239 }
3240
3241 return (im6o);
3242 }
3243
3244 /*
3245 * Set IPv6 outgoing packet options based on advanced API.
3246 */
3247 int
3248 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3249 struct ip6_pktopts *stickyopt, int uproto)
3250 {
3251 struct cmsghdr *cm = NULL;
3252
3253 if (control == NULL || opt == NULL)
3254 return (EINVAL);
3255
3256 ip6_initpktopts(opt);
3257 if (stickyopt) {
3258 int error;
3259
3260 /*
3261 * If stickyopt is provided, make a local copy of the options
3262 * for this particular packet, then override them by ancillary
3263 * objects.
3264 * XXX: copypktopts() does not copy the cached route to a next
3265 * hop (if any). This is not very good in terms of efficiency,
3266 * but we can allow this since this option should be rarely
3267 * used.
3268 */
3269 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3270 return (error);
3271 }
3272
3273 /*
3274 * XXX: Currently, we assume all the optional information is stored
3275 * in a single mbuf.
3276 */
3277 if (control->m_next)
3278 return (EINVAL);
3279
3280 if (control->m_len < CMSG_LEN(0))
3281 return (EINVAL);
3282
3283 for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
3284 cm = M_NXT_CMSGHDR(control, cm)) {
3285 int error;
3286
3287 if (cm->cmsg_len < sizeof (struct cmsghdr) ||
3288 cm->cmsg_len > control->m_len)
3289 return (EINVAL);
3290 if (cm->cmsg_level != IPPROTO_IPV6)
3291 continue;
3292
3293 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3294 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3295 if (error)
3296 return (error);
3297 }
3298
3299 return (0);
3300 }
3301 /*
3302 * Set a particular packet option, as a sticky option or an ancillary data
3303 * item. "len" can be 0 only when it's a sticky option.
3304 * We have 4 cases of combination of "sticky" and "cmsg":
3305 * "sticky=0, cmsg=0": impossible
3306 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3307 * "sticky=1, cmsg=0": RFC3542 socket option
3308 * "sticky=1, cmsg=1": RFC2292 socket option
3309 */
3310 static int
3311 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3312 int sticky, int cmsg, int uproto)
3313 {
3314 int minmtupolicy, preftemp;
3315 int error;
3316
3317 if (!sticky && !cmsg) {
3318 #ifdef DIAGNOSTIC
3319 printf("ip6_setpktopt: impossible case\n");
3320 #endif
3321 return (EINVAL);
3322 }
3323
3324 /*
3325 * Caller must have ensured that the buffer is at least
3326 * aligned on 32-bit boundary.
3327 */
3328 VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t)));
3329
3330 /*
3331 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3332 * not be specified in the context of RFC3542. Conversely,
3333 * RFC3542 types should not be specified in the context of RFC2292.
3334 */
3335 if (!cmsg) {
3336 switch (optname) {
3337 case IPV6_2292PKTINFO:
3338 case IPV6_2292HOPLIMIT:
3339 case IPV6_2292NEXTHOP:
3340 case IPV6_2292HOPOPTS:
3341 case IPV6_2292DSTOPTS:
3342 case IPV6_2292RTHDR:
3343 case IPV6_2292PKTOPTIONS:
3344 return (ENOPROTOOPT);
3345 }
3346 }
3347 if (sticky && cmsg) {
3348 switch (optname) {
3349 case IPV6_PKTINFO:
3350 case IPV6_HOPLIMIT:
3351 case IPV6_NEXTHOP:
3352 case IPV6_HOPOPTS:
3353 case IPV6_DSTOPTS:
3354 case IPV6_RTHDRDSTOPTS:
3355 case IPV6_RTHDR:
3356 case IPV6_USE_MIN_MTU:
3357 case IPV6_DONTFRAG:
3358 case IPV6_TCLASS:
3359 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3360 return (ENOPROTOOPT);
3361 }
3362 }
3363
3364 switch (optname) {
3365 case IPV6_2292PKTINFO:
3366 case IPV6_PKTINFO: {
3367 struct ifnet *ifp = NULL;
3368 struct in6_pktinfo *pktinfo;
3369
3370 if (len != sizeof (struct in6_pktinfo))
3371 return (EINVAL);
3372
3373 pktinfo = (struct in6_pktinfo *)(void *)buf;
3374
3375 /*
3376 * An application can clear any sticky IPV6_PKTINFO option by
3377 * doing a "regular" setsockopt with ipi6_addr being
3378 * in6addr_any and ipi6_ifindex being zero.
3379 * [RFC 3542, Section 6]
3380 */
3381 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3382 pktinfo->ipi6_ifindex == 0 &&
3383 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3384 ip6_clearpktopts(opt, optname);
3385 break;
3386 }
3387
3388 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3389 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3390 return (EINVAL);
3391 }
3392
3393 /* validate the interface index if specified. */
3394 ifnet_head_lock_shared();
3395
3396 if (pktinfo->ipi6_ifindex > if_index) {
3397 ifnet_head_done();
3398 return (ENXIO);
3399 }
3400
3401 if (pktinfo->ipi6_ifindex) {
3402 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3403 if (ifp == NULL) {
3404 ifnet_head_done();
3405 return (ENXIO);
3406 }
3407 }
3408
3409 ifnet_head_done();
3410
3411 /*
3412 * We store the address anyway, and let in6_selectsrc()
3413 * validate the specified address. This is because ipi6_addr
3414 * may not have enough information about its scope zone, and
3415 * we may need additional information (such as outgoing
3416 * interface or the scope zone of a destination address) to
3417 * disambiguate the scope.
3418 * XXX: the delay of the validation may confuse the
3419 * application when it is used as a sticky option.
3420 */
3421 if (opt->ip6po_pktinfo == NULL) {
3422 opt->ip6po_pktinfo = _MALLOC(sizeof (*pktinfo),
3423 M_IP6OPT, M_NOWAIT);
3424 if (opt->ip6po_pktinfo == NULL)
3425 return (ENOBUFS);
3426 }
3427 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof (*pktinfo));
3428 break;
3429 }
3430
3431 case IPV6_2292HOPLIMIT:
3432 case IPV6_HOPLIMIT: {
3433 int *hlimp;
3434
3435 /*
3436 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3437 * to simplify the ordering among hoplimit options.
3438 */
3439 if (optname == IPV6_HOPLIMIT && sticky)
3440 return (ENOPROTOOPT);
3441
3442 if (len != sizeof (int))
3443 return (EINVAL);
3444 hlimp = (int *)(void *)buf;
3445 if (*hlimp < -1 || *hlimp > 255)
3446 return (EINVAL);
3447
3448 opt->ip6po_hlim = *hlimp;
3449 break;
3450 }
3451
3452 case IPV6_TCLASS: {
3453 int tclass;
3454
3455 if (len != sizeof (int))
3456 return (EINVAL);
3457 tclass = *(int *)(void *)buf;
3458 if (tclass < -1 || tclass > 255)
3459 return (EINVAL);
3460
3461 opt->ip6po_tclass = tclass;
3462 break;
3463 }
3464
3465 case IPV6_2292NEXTHOP:
3466 case IPV6_NEXTHOP:
3467 error = suser(kauth_cred_get(), 0);
3468 if (error)
3469 return (EACCES);
3470
3471 if (len == 0) { /* just remove the option */
3472 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3473 break;
3474 }
3475
3476 /* check if cmsg_len is large enough for sa_len */
3477 if (len < sizeof (struct sockaddr) || len < *buf)
3478 return (EINVAL);
3479
3480 switch (SA(buf)->sa_family) {
3481 case AF_INET6: {
3482 struct sockaddr_in6 *sa6 = SIN6(buf);
3483
3484 if (sa6->sin6_len != sizeof (struct sockaddr_in6))
3485 return (EINVAL);
3486
3487 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3488 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3489 return (EINVAL);
3490 }
3491 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3492 != 0) {
3493 return (error);
3494 }
3495 break;
3496 }
3497 case AF_LINK: /* should eventually be supported */
3498 default:
3499 return (EAFNOSUPPORT);
3500 }
3501
3502 /* turn off the previous option, then set the new option. */
3503 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3504 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3505 if (opt->ip6po_nexthop == NULL)
3506 return (ENOBUFS);
3507 bcopy(buf, opt->ip6po_nexthop, *buf);
3508 break;
3509
3510 case IPV6_2292HOPOPTS:
3511 case IPV6_HOPOPTS: {
3512 struct ip6_hbh *hbh;
3513 int hbhlen;
3514
3515 /*
3516 * XXX: We don't allow a non-privileged user to set ANY HbH
3517 * options, since per-option restriction has too much
3518 * overhead.
3519 */
3520 error = suser(kauth_cred_get(), 0);
3521 if (error)
3522 return (EACCES);
3523
3524 if (len == 0) {
3525 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3526 break; /* just remove the option */
3527 }
3528
3529 /* message length validation */
3530 if (len < sizeof (struct ip6_hbh))
3531 return (EINVAL);
3532 hbh = (struct ip6_hbh *)(void *)buf;
3533 hbhlen = (hbh->ip6h_len + 1) << 3;
3534 if (len != hbhlen)
3535 return (EINVAL);
3536
3537 /* turn off the previous option, then set the new option. */
3538 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3539 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3540 if (opt->ip6po_hbh == NULL)
3541 return (ENOBUFS);
3542 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3543
3544 break;
3545 }
3546
3547 case IPV6_2292DSTOPTS:
3548 case IPV6_DSTOPTS:
3549 case IPV6_RTHDRDSTOPTS: {
3550 struct ip6_dest *dest, **newdest = NULL;
3551 int destlen;
3552
3553 error = suser(kauth_cred_get(), 0);
3554 if (error)
3555 return (EACCES);
3556
3557 if (len == 0) {
3558 ip6_clearpktopts(opt, optname);
3559 break; /* just remove the option */
3560 }
3561
3562 /* message length validation */
3563 if (len < sizeof (struct ip6_dest))
3564 return (EINVAL);
3565 dest = (struct ip6_dest *)(void *)buf;
3566 destlen = (dest->ip6d_len + 1) << 3;
3567 if (len != destlen)
3568 return (EINVAL);
3569
3570 /*
3571 * Determine the position that the destination options header
3572 * should be inserted; before or after the routing header.
3573 */
3574 switch (optname) {
3575 case IPV6_2292DSTOPTS:
3576 /*
3577 * The old advacned API is ambiguous on this point.
3578 * Our approach is to determine the position based
3579 * according to the existence of a routing header.
3580 * Note, however, that this depends on the order of the
3581 * extension headers in the ancillary data; the 1st
3582 * part of the destination options header must appear
3583 * before the routing header in the ancillary data,
3584 * too.
3585 * RFC3542 solved the ambiguity by introducing
3586 * separate ancillary data or option types.
3587 */
3588 if (opt->ip6po_rthdr == NULL)
3589 newdest = &opt->ip6po_dest1;
3590 else
3591 newdest = &opt->ip6po_dest2;
3592 break;
3593 case IPV6_RTHDRDSTOPTS:
3594 newdest = &opt->ip6po_dest1;
3595 break;
3596 case IPV6_DSTOPTS:
3597 newdest = &opt->ip6po_dest2;
3598 break;
3599 }
3600
3601 /* turn off the previous option, then set the new option. */
3602 ip6_clearpktopts(opt, optname);
3603 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3604 if (*newdest == NULL)
3605 return (ENOBUFS);
3606 bcopy(dest, *newdest, destlen);
3607 break;
3608 }
3609
3610 case IPV6_2292RTHDR:
3611 case IPV6_RTHDR: {
3612 struct ip6_rthdr *rth;
3613 int rthlen;
3614
3615 if (len == 0) {
3616 ip6_clearpktopts(opt, IPV6_RTHDR);
3617 break; /* just remove the option */
3618 }
3619
3620 /* message length validation */
3621 if (len < sizeof (struct ip6_rthdr))
3622 return (EINVAL);
3623 rth = (struct ip6_rthdr *)(void *)buf;
3624 rthlen = (rth->ip6r_len + 1) << 3;
3625 if (len != rthlen)
3626 return (EINVAL);
3627
3628 switch (rth->ip6r_type) {
3629 case IPV6_RTHDR_TYPE_0:
3630 if (rth->ip6r_len == 0) /* must contain one addr */
3631 return (EINVAL);
3632 if (rth->ip6r_len % 2) /* length must be even */
3633 return (EINVAL);
3634 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3635 return (EINVAL);
3636 break;
3637 default:
3638 return (EINVAL); /* not supported */
3639 }
3640
3641 /* turn off the previous option */
3642 ip6_clearpktopts(opt, IPV6_RTHDR);
3643 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3644 if (opt->ip6po_rthdr == NULL)
3645 return (ENOBUFS);
3646 bcopy(rth, opt->ip6po_rthdr, rthlen);
3647 break;
3648 }
3649
3650 case IPV6_USE_MIN_MTU:
3651 if (len != sizeof (int))
3652 return (EINVAL);
3653 minmtupolicy = *(int *)(void *)buf;
3654 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3655 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3656 minmtupolicy != IP6PO_MINMTU_ALL) {
3657 return (EINVAL);
3658 }
3659 opt->ip6po_minmtu = minmtupolicy;
3660 break;
3661
3662 case IPV6_DONTFRAG:
3663 if (len != sizeof (int))
3664 return (EINVAL);
3665
3666 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
3667 /*
3668 * we ignore this option for TCP sockets.
3669 * (RFC3542 leaves this case unspecified.)
3670 */
3671 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3672 } else {
3673 opt->ip6po_flags |= IP6PO_DONTFRAG;
3674 }
3675 break;
3676
3677 case IPV6_PREFER_TEMPADDR:
3678 if (len != sizeof (int))
3679 return (EINVAL);
3680 preftemp = *(int *)(void *)buf;
3681 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3682 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3683 preftemp != IP6PO_TEMPADDR_PREFER) {
3684 return (EINVAL);
3685 }
3686 opt->ip6po_prefer_tempaddr = preftemp;
3687 break;
3688
3689 default:
3690 return (ENOPROTOOPT);
3691 } /* end of switch */
3692
3693 return (0);
3694 }
3695
3696 /*
3697 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3698 * packet to the input queue of a specified interface. Note that this
3699 * calls the output routine of the loopback "driver", but with an interface
3700 * pointer that might NOT be &loif -- easier than replicating that code here.
3701 */
3702 void
3703 ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
3704 struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
3705 {
3706 struct mbuf *copym;
3707 struct ip6_hdr *ip6;
3708 struct in6_addr src;
3709
3710 if (lo_ifp == NULL)
3711 return;
3712
3713 /*
3714 * Copy the packet header as it's needed for the checksum.
3715 * Make sure to deep-copy IPv6 header portion in case the data
3716 * is in an mbuf cluster, so that we can safely override the IPv6
3717 * header portion later.
3718 */
3719 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
3720 if (copym != NULL && ((copym->m_flags & M_EXT) ||
3721 copym->m_len < sizeof (struct ip6_hdr)))
3722 copym = m_pullup(copym, sizeof (struct ip6_hdr));
3723
3724 if (copym == NULL)
3725 return;
3726
3727 ip6 = mtod(copym, struct ip6_hdr *);
3728 src = ip6->ip6_src;
3729 /*
3730 * clear embedded scope identifiers if necessary.
3731 * in6_clearscope will touch the addresses only when necessary.
3732 */
3733 in6_clearscope(&ip6->ip6_src);
3734 in6_clearscope(&ip6->ip6_dst);
3735
3736 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
3737 in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
3738
3739 /*
3740 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3741 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
3742 * to make the loopback driver compliant with the data link
3743 * requirements.
3744 */
3745 copym->m_pkthdr.rcvif = origifp;
3746
3747 /*
3748 * Also record the source interface (which owns the source address).
3749 * This is basically a stripped down version of ifa_foraddr6().
3750 */
3751 if (srcifp == NULL) {
3752 struct in6_ifaddr *ia;
3753
3754 lck_rw_lock_shared(&in6_ifaddr_rwlock);
3755 for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) {
3756 IFA_LOCK_SPIN(&ia->ia_ifa);
3757 /* compare against src addr with embedded scope */
3758 if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) {
3759 srcifp = ia->ia_ifp;
3760 IFA_UNLOCK(&ia->ia_ifa);
3761 break;
3762 }
3763 IFA_UNLOCK(&ia->ia_ifa);
3764 }
3765 lck_rw_done(&in6_ifaddr_rwlock);
3766 }
3767 if (srcifp != NULL)
3768 ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
3769 ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
3770
3771 dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
3772 }
3773
3774 /*
3775 * Chop IPv6 header off from the payload.
3776 */
3777 static int
3778 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
3779 {
3780 struct mbuf *mh;
3781 struct ip6_hdr *ip6;
3782
3783 ip6 = mtod(m, struct ip6_hdr *);
3784 if (m->m_len > sizeof (*ip6)) {
3785 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
3786 if (mh == NULL) {
3787 m_freem(m);
3788 return (ENOBUFS);
3789 }
3790 M_COPY_PKTHDR(mh, m);
3791 MH_ALIGN(mh, sizeof (*ip6));
3792 m->m_flags &= ~M_PKTHDR;
3793 m->m_len -= sizeof (*ip6);
3794 m->m_data += sizeof (*ip6);
3795 mh->m_next = m;
3796 m = mh;
3797 m->m_len = sizeof (*ip6);
3798 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof (*ip6));
3799 }
3800 exthdrs->ip6e_ip6 = m;
3801 return (0);
3802 }
3803
3804 static void
3805 ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
3806 int nxt0, uint32_t tlen, uint32_t optlen)
3807 {
3808 uint32_t sw_csum, hwcap = ifp->if_hwassist;
3809 int tso = TSO_IPV6_OK(ifp, m);
3810
3811 if (!hwcksum_tx) {
3812 /* do all in software; checksum offload is disabled */
3813 sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
3814 } else {
3815 /* do in software what the hardware cannot */
3816 sw_csum = m->m_pkthdr.csum_flags &
3817 ~IF_HWASSIST_CSUM_FLAGS(hwcap);
3818 }
3819
3820 if (optlen != 0) {
3821 sw_csum |= (CSUM_DELAY_IPV6_DATA &
3822 m->m_pkthdr.csum_flags);
3823 } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
3824 (hwcap & CSUM_PARTIAL)) {
3825 /*
3826 * Partial checksum offload, ere), if no extension
3827 * headers, and TCP only (no UDP support, as the
3828 * hardware may not be able to convert +0 to
3829 * -0 (0xffff) per RFC1122 4.1.3.4.)
3830 */
3831 if (hwcksum_tx && !tso &&
3832 (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) &&
3833 tlen <= mtu) {
3834 uint16_t start = sizeof (struct ip6_hdr);
3835 uint16_t ulpoff =
3836 m->m_pkthdr.csum_data & 0xffff;
3837 m->m_pkthdr.csum_flags |=
3838 (CSUM_DATA_VALID | CSUM_PARTIAL);
3839 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
3840 m->m_pkthdr.csum_tx_start = start;
3841 sw_csum = 0;
3842 } else {
3843 sw_csum |= (CSUM_DELAY_IPV6_DATA &
3844 m->m_pkthdr.csum_flags);
3845 }
3846 }
3847
3848 if (sw_csum & CSUM_DELAY_IPV6_DATA) {
3849 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
3850 sw_csum &= ~CSUM_DELAY_IPV6_DATA;
3851 }
3852
3853 if (hwcksum_tx) {
3854 /*
3855 * Drop off bits that aren't supported by hardware;
3856 * also make sure to preserve non-checksum related bits.
3857 */
3858 m->m_pkthdr.csum_flags =
3859 ((m->m_pkthdr.csum_flags &
3860 (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
3861 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
3862 } else {
3863 /* drop all bits; checksum offload is disabled */
3864 m->m_pkthdr.csum_flags = 0;
3865 }
3866 }
3867
3868 /*
3869 * Compute IPv6 extension header length.
3870 */
3871 int
3872 ip6_optlen(struct in6pcb *in6p)
3873 {
3874 int len;
3875
3876 if (!in6p->in6p_outputopts)
3877 return (0);
3878
3879 len = 0;
3880 #define elen(x) \
3881 (((struct ip6_ext *)(x)) ? \
3882 (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3883
3884 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3885 if (in6p->in6p_outputopts->ip6po_rthdr) {
3886 /* dest1 is valid with rthdr only */
3887 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3888 }
3889 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3890 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3891 return (len);
3892 #undef elen
3893 }