]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/ip6_output.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58 /*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
92 /*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections. This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
98
99 #include <sys/param.h>
100 #include <sys/malloc.h>
101 #include <sys/mbuf.h>
102 #include <sys/errno.h>
103 #include <sys/protosw.h>
104 #include <sys/socket.h>
105 #include <sys/socketvar.h>
106 #include <sys/systm.h>
107 #include <sys/kernel.h>
108 #include <sys/proc.h>
109 #include <sys/kauth.h>
110 #include <sys/mcache.h>
111 #include <sys/sysctl.h>
112 #include <kern/zalloc.h>
113 #include <libkern/OSByteOrder.h>
114
115 #include <pexpert/pexpert.h>
116 #include <mach/sdt.h>
117
118 #include <net/if.h>
119 #include <net/route.h>
120 #include <net/dlil.h>
121 #include <net/net_api_stats.h>
122 #include <net/net_osdep.h>
123 #include <net/net_perf.h>
124
125 #include <netinet/ip.h>
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet/ip_var.h>
129 #include <netinet6/in6_var.h>
130 #include <netinet/ip6.h>
131 #include <netinet/kpi_ipfilter_var.h>
132 #include <netinet/in_tclass.h>
133
134 #include <netinet6/ip6protosw.h>
135 #include <netinet/icmp6.h>
136 #include <netinet6/ip6_var.h>
137 #include <netinet/in_pcb.h>
138 #include <netinet6/nd6.h>
139 #include <netinet6/scope6_var.h>
140 #if IPSEC
141 #include <netinet6/ipsec.h>
142 #include <netinet6/ipsec6.h>
143 #include <netkey/key.h>
144 extern int ipsec_bypass;
145 #endif /* IPSEC */
146
147 #if NECP
148 #include <net/necp.h>
149 #endif /* NECP */
150
151 #if CONFIG_MACF_NET
152 #include <security/mac.h>
153 #endif /* CONFIG_MACF_NET */
154
155 #if DUMMYNET
156 #include <netinet/ip_fw.h>
157 #include <netinet/ip_dummynet.h>
158 #endif /* DUMMYNET */
159
160 #if PF
161 #include <net/pfvar.h>
162 #endif /* PF */
163
164 static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
165 static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
166 static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
167 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
168 static void ip6_out_cksum_stats(int, u_int32_t);
169 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
170 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
171 struct ip6_frag **);
172 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
173 struct ifnet *, struct in6_addr *, u_int32_t *);
174 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
175 struct sockopt *sopt);
176 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
177 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
178 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
179 static void im6o_trace(struct ip6_moptions *, int);
180 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
181 int, int);
182 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
183 static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
184 int, uint32_t, uint32_t);
185 extern int udp_ctloutput(struct socket *, struct sockopt *);
186 static int ip6_fragment_packet(struct mbuf **m,
187 struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
188 uint32_t mtu, uint32_t unfragpartlen,
189 struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
190
191 SYSCTL_DECL(_net_inet6_ip6);
192
193 static int ip6_output_measure = 0;
194 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
195 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
196 &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
197
198 static uint64_t ip6_output_measure_bins = 0;
199 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
200 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
201 sysctl_ip6_output_measure_bins, "I",
202 "bins for chaining performance data histogram");
203
204 static net_perf_t net_perf;
205 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
206 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
207 0, 0, sysctl_ip6_output_getperf, "S,net_perf",
208 "IP6 output performance data (struct net_perf, net/net_perf.h)");
209
210 #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
211
212 /* For gdb */
213 __private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
214
215 struct ip6_moptions_dbg {
216 struct ip6_moptions im6o; /* ip6_moptions */
217 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
218 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
219 /*
220 * Alloc and free callers.
221 */
222 ctrace_t im6o_alloc;
223 ctrace_t im6o_free;
224 /*
225 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
226 */
227 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
228 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
229 };
230
231 #if DEBUG
232 static unsigned int im6o_debug = 1; /* debugging (enabled) */
233 #else
234 static unsigned int im6o_debug; /* debugging (disabled) */
235 #endif /* !DEBUG */
236
237 static unsigned int im6o_size; /* size of zone element */
238 static struct zone *im6o_zone; /* zone for ip6_moptions */
239
240 #define IM6O_ZONE_MAX 64 /* maximum elements in zone */
241 #define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
242
243 /*
244 * ip6_output() calls ip6_output_list() to do the work
245 */
246 int
247 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
248 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
249 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
250 {
251 return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
252 }
253
254 /*
255 * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
256 * header (with pri, len, nxt, hlim, src, dst).
257 * This function may modify ver and hlim only.
258 * The mbuf chain containing the packet will be freed.
259 * The mbuf opt, if present, will not be freed.
260 *
261 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
262 * skipped and ro->ro_rt would be used. Otherwise the result of route
263 * lookup is stored in ro->ro_rt.
264 *
265 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
266 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
267 * which is rt_rmx.rmx_mtu.
268 */
269 int
270 ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
271 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
272 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
273 {
274 struct ip6_hdr *ip6;
275 u_char *nexthdrp;
276 struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
277 struct ifnet **ifpp_save = ifpp;
278 struct mbuf *m, *mprev;
279 struct mbuf *sendchain = NULL, *sendchain_last = NULL;
280 struct mbuf *inputchain = NULL;
281 int nxt0 = 0;
282 struct route_in6 *ro_pmtu = NULL;
283 struct rtentry *rt = NULL;
284 struct sockaddr_in6 *dst = NULL, src_sa, dst_sa;
285 int error = 0;
286 struct in6_ifaddr *ia = NULL, *src_ia = NULL;
287 u_int32_t mtu = 0;
288 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
289 struct ip6_rthdr *rh;
290 struct in6_addr finaldst;
291 ipfilter_t inject_filter_ref;
292 struct ipf_pktopts *ippo = NULL;
293 struct flowadv *adv = NULL;
294 uint32_t pktcnt = 0;
295 uint32_t packets_processed = 0;
296 struct timeval start_tv;
297 #if PF
298 boolean_t skip_pf = (ip6oa != NULL) &&
299 (ip6oa->ip6oa_flags & IP6OAF_SKIP_PF);
300 #endif
301
302 #if DUMMYNET
303 struct m_tag *tag;
304 struct ip6_out_args saved_ip6oa;
305 struct sockaddr_in6 dst_buf;
306 #endif /* DUMMYNET */
307 #if IPSEC
308 struct socket *so = NULL;
309 struct secpolicy *sp = NULL;
310 struct route_in6 *ipsec_saved_route = NULL;
311 boolean_t needipsectun = FALSE;
312 #endif /* IPSEC */
313 #if NECP
314 necp_kernel_policy_result necp_result = 0;
315 necp_kernel_policy_result_parameter necp_result_parameter;
316 necp_kernel_policy_id necp_matched_policy_id = 0;
317 #endif /* NECP */
318 struct {
319 struct ipf_pktopts ipf_pktopts;
320 struct ip6_exthdrs exthdrs;
321 struct route_in6 ip6route;
322 #if IPSEC
323 struct ipsec_output_state ipsec_state;
324 #endif /* IPSEC */
325 #if NECP
326 struct route_in6 necp_route;
327 #endif /* NECP */
328 #if DUMMYNET
329 struct route_in6 saved_route;
330 struct route_in6 saved_ro_pmtu;
331 struct ip_fw_args args;
332 #endif /* DUMMYNET */
333 } ip6obz;
334 #define ipf_pktopts ip6obz.ipf_pktopts
335 #define exthdrs ip6obz.exthdrs
336 #define ip6route ip6obz.ip6route
337 #define ipsec_state ip6obz.ipsec_state
338 #define necp_route ip6obz.necp_route
339 #define saved_route ip6obz.saved_route
340 #define saved_ro_pmtu ip6obz.saved_ro_pmtu
341 #define args ip6obz.args
342 union {
343 struct {
344 boolean_t select_srcif : 1;
345 boolean_t hdrsplit : 1;
346 boolean_t route_selected : 1;
347 boolean_t dontfrag : 1;
348 #if IPSEC
349 boolean_t needipsec : 1;
350 boolean_t noipsec : 1;
351 #endif /* IPSEC */
352 };
353 uint32_t raw;
354 } ip6obf = { .raw = 0 };
355
356 if (ip6_output_measure) {
357 net_perf_start_time(&net_perf, &start_tv);
358 }
359
360 VERIFY(m0->m_flags & M_PKTHDR);
361
362 /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
363 bzero(&ip6obz, sizeof(ip6obz));
364
365 #if DUMMYNET
366 if (SLIST_EMPTY(&m0->m_pkthdr.tags)) {
367 goto tags_done;
368 }
369
370 /* Grab info from mtags prepended to the chain */
371 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
372 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
373 struct dn_pkt_tag *dn_tag;
374
375 /*
376 * ip6_output_list() cannot handle chains of packets reinjected
377 * by dummynet. The same restriction applies to
378 * ip_output_list().
379 */
380 VERIFY(0 == packetchain);
381
382 dn_tag = (struct dn_pkt_tag *)(tag + 1);
383 args.fwa_pf_rule = dn_tag->dn_pf_rule;
384
385 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
386 dst = &dst_buf;
387 ifp = dn_tag->dn_ifp;
388 if (ifp != NULL) {
389 ifnet_reference(ifp);
390 }
391 flags = dn_tag->dn_flags;
392 if (dn_tag->dn_flags & IPV6_OUTARGS) {
393 saved_ip6oa = dn_tag->dn_ip6oa;
394 ip6oa = &saved_ip6oa;
395 }
396
397 saved_route = dn_tag->dn_ro6;
398 ro = &saved_route;
399 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
400 ro_pmtu = &saved_ro_pmtu;
401 origifp = dn_tag->dn_origifp;
402 if (origifp != NULL) {
403 ifnet_reference(origifp);
404 }
405 mtu = dn_tag->dn_mtu;
406 unfragpartlen = dn_tag->dn_unfragpartlen;
407
408 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs));
409
410 m_tag_delete(m0, tag);
411 }
412
413 tags_done:
414 #endif /* DUMMYNET */
415
416 m = m0;
417
418 #if IPSEC
419 if (ipsec_bypass == 0) {
420 so = ipsec_getsocket(m);
421 if (so != NULL) {
422 (void) ipsec_setsocket(m, NULL);
423 }
424 /* If packet is bound to an interface, check bound policies */
425 if ((flags & IPV6_OUTARGS) &&
426 (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
427 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
428 /* ip6obf.noipsec is a bitfield, use temp integer */
429 int noipsec = 0;
430
431 if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
432 flags, ip6oa, &noipsec, &sp) != 0) {
433 goto bad;
434 }
435
436 ip6obf.noipsec = (noipsec != 0);
437 }
438 }
439 #endif /* IPSEC */
440
441 ippo = &ipf_pktopts;
442
443 if (flags & IPV6_OUTARGS) {
444 /*
445 * In the forwarding case, only the ifscope value is used,
446 * as source interface selection doesn't take place.
447 */
448 if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
449 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
450 (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF)))) {
451 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
452 }
453
454 if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
455 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
456 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
457 (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
458 }
459
460 if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR) {
461 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
462 }
463 } else {
464 ip6obf.select_srcif = FALSE;
465 if (flags & IPV6_OUTARGS) {
466 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
467 ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
468 IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
469 }
470 }
471
472 if (flags & IPV6_OUTARGS) {
473 if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) {
474 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
475 }
476 if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE) {
477 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
478 }
479 if (ip6oa->ip6oa_flags & IP6OAF_NO_CONSTRAINED) {
480 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_CONSTRAINED;
481 }
482 adv = &ip6oa->ip6oa_flowadv;
483 adv->code = FADV_SUCCESS;
484 ip6oa->ip6oa_retflags = 0;
485 }
486
487 /*
488 * Clear out ifpp to be filled in after determining route. ifpp_save is
489 * used to keep old value to release reference properly and dtrace
490 * ipsec tunnel traffic properly.
491 */
492 if (ifpp != NULL && *ifpp != NULL) {
493 *ifpp = NULL;
494 }
495
496 #if DUMMYNET
497 if (args.fwa_pf_rule) {
498 ip6 = mtod(m, struct ip6_hdr *);
499 VERIFY(ro != NULL); /* ro == saved_route */
500 goto check_with_pf;
501 }
502 #endif /* DUMMYNET */
503
504 #if NECP
505 /*
506 * Since all packets are assumed to come from same socket, necp lookup
507 * only needs to happen once per function entry.
508 */
509 necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
510 (flags & IPV6_OUTARGS) ? ip6oa : NULL, ro ? ro->ro_rt : NULL, &necp_result,
511 &necp_result_parameter);
512 #endif /* NECP */
513
514 /*
515 * If a chain was passed in, prepare for ther first iteration. For all
516 * other iterations, this work will be done at evaluateloop: label.
517 */
518 if (packetchain) {
519 /*
520 * Remove m from the chain during processing to avoid
521 * accidental frees on entire list.
522 */
523 inputchain = m->m_nextpkt;
524 m->m_nextpkt = NULL;
525 }
526
527 loopit:
528 packets_processed++;
529 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP | PKTF_IFAINFO);
530 ip6 = mtod(m, struct ip6_hdr *);
531 nxt0 = ip6->ip6_nxt;
532 finaldst = ip6->ip6_dst;
533 ip6obf.hdrsplit = FALSE;
534 ro_pmtu = NULL;
535
536 if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
537 inject_filter_ref = ipf_get_inject_filter(m);
538 } else {
539 inject_filter_ref = NULL;
540 }
541
542 #define MAKE_EXTHDR(hp, mp) do { \
543 if (hp != NULL) { \
544 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
545 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
546 ((eh)->ip6e_len + 1) << 3); \
547 if (error) \
548 goto freehdrs; \
549 } \
550 } while (0)
551
552 if (opt != NULL) {
553 /* Hop-by-Hop options header */
554 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
555 /* Destination options header(1st part) */
556 if (opt->ip6po_rthdr) {
557 /*
558 * Destination options header(1st part)
559 * This only makes sense with a routing header.
560 * See Section 9.2 of RFC 3542.
561 * Disabling this part just for MIP6 convenience is
562 * a bad idea. We need to think carefully about a
563 * way to make the advanced API coexist with MIP6
564 * options, which might automatically be inserted in
565 * the kernel.
566 */
567 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
568 }
569 /* Routing header */
570 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
571 /* Destination options header(2nd part) */
572 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
573 }
574
575 #undef MAKE_EXTHDR
576
577 #if NECP
578 if (necp_matched_policy_id) {
579 necp_mark_packet_from_ip(m, necp_matched_policy_id);
580
581 switch (necp_result) {
582 case NECP_KERNEL_POLICY_RESULT_PASS:
583 goto skip_ipsec;
584 case NECP_KERNEL_POLICY_RESULT_DROP:
585 error = EHOSTUNREACH;
586 ip6stat.ip6s_necp_policy_drop++;
587 goto freehdrs;
588 case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
589 /*
590 * Flow divert packets should be blocked at the IP
591 * layer.
592 */
593 error = EHOSTUNREACH;
594 ip6stat.ip6s_necp_policy_drop++;
595 goto freehdrs;
596 case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
597 /*
598 * Verify that the packet is being routed to the tunnel
599 */
600 struct ifnet *policy_ifp =
601 necp_get_ifnet_from_result_parameter(
602 &necp_result_parameter);
603
604 if (policy_ifp == ifp) {
605 goto skip_ipsec;
606 } else {
607 if (necp_packet_can_rebind_to_ifnet(m,
608 policy_ifp, (struct route *)&necp_route,
609 AF_INET6)) {
610 /*
611 * Set scoped index to the tunnel
612 * interface, since it is compatible
613 * with the packet. This will only work
614 * for callers who pass IPV6_OUTARGS,
615 * but that covers all of the clients
616 * we care about today.
617 */
618 if (flags & IPV6_OUTARGS) {
619 ip6oa->ip6oa_boundif =
620 policy_ifp->if_index;
621 ip6oa->ip6oa_flags |=
622 IP6OAF_BOUND_IF;
623 }
624 if (opt != NULL
625 && opt->ip6po_pktinfo != NULL) {
626 opt->ip6po_pktinfo->
627 ipi6_ifindex =
628 policy_ifp->if_index;
629 }
630 ro = &necp_route;
631 goto skip_ipsec;
632 } else {
633 error = ENETUNREACH;
634 ip6stat.ip6s_necp_policy_drop++;
635 goto freehdrs;
636 }
637 }
638 }
639 default:
640 break;
641 }
642 }
643 #endif /* NECP */
644
645 #if IPSEC
646 if (ipsec_bypass != 0 || ip6obf.noipsec) {
647 goto skip_ipsec;
648 }
649
650 if (sp == NULL) {
651 /* get a security policy for this packet */
652 if (so != NULL) {
653 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
654 so, &error);
655 } else {
656 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
657 0, &error);
658 }
659 if (sp == NULL) {
660 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
661 goto freehdrs;
662 }
663 }
664
665 error = 0;
666
667 /* check policy */
668 switch (sp->policy) {
669 case IPSEC_POLICY_DISCARD:
670 case IPSEC_POLICY_GENERATE:
671 /*
672 * This packet is just discarded.
673 */
674 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
675 goto freehdrs;
676
677 case IPSEC_POLICY_BYPASS:
678 case IPSEC_POLICY_NONE:
679 /* no need to do IPsec. */
680 ip6obf.needipsec = FALSE;
681 break;
682
683 case IPSEC_POLICY_IPSEC:
684 if (sp->req == NULL) {
685 /* acquire a policy */
686 error = key_spdacquire(sp);
687 goto freehdrs;
688 }
689 if (sp->ipsec_if) {
690 goto skip_ipsec;
691 } else {
692 ip6obf.needipsec = TRUE;
693 }
694 break;
695
696 case IPSEC_POLICY_ENTRUST:
697 default:
698 printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
699 break;
700 }
701 skip_ipsec:
702 #endif /* IPSEC */
703
704 /*
705 * Calculate the total length of the extension header chain.
706 * Keep the length of the unfragmentable part for fragmentation.
707 */
708 optlen = 0;
709 if (exthdrs.ip6e_hbh != NULL) {
710 optlen += exthdrs.ip6e_hbh->m_len;
711 }
712 if (exthdrs.ip6e_dest1 != NULL) {
713 optlen += exthdrs.ip6e_dest1->m_len;
714 }
715 if (exthdrs.ip6e_rthdr != NULL) {
716 optlen += exthdrs.ip6e_rthdr->m_len;
717 }
718 unfragpartlen = optlen + sizeof(struct ip6_hdr);
719
720 /* NOTE: we don't add AH/ESP length here. do that later. */
721 if (exthdrs.ip6e_dest2 != NULL) {
722 optlen += exthdrs.ip6e_dest2->m_len;
723 }
724
725 /*
726 * If we need IPsec, or there is at least one extension header,
727 * separate IP6 header from the payload.
728 */
729 if ((
730 #if IPSEC
731 ip6obf.needipsec ||
732 #endif /* IPSEC */
733 optlen) && !ip6obf.hdrsplit) {
734 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
735 m = NULL;
736 goto freehdrs;
737 }
738 m = exthdrs.ip6e_ip6;
739 ip6obf.hdrsplit = TRUE;
740 }
741
742 /* adjust pointer */
743 ip6 = mtod(m, struct ip6_hdr *);
744
745 /* adjust mbuf packet header length */
746 m->m_pkthdr.len += optlen;
747 plen = m->m_pkthdr.len - sizeof(*ip6);
748
749 /* If this is a jumbo payload, insert a jumbo payload option. */
750 if (plen > IPV6_MAXPACKET) {
751 if (!ip6obf.hdrsplit) {
752 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
753 m = NULL;
754 goto freehdrs;
755 }
756 m = exthdrs.ip6e_ip6;
757 ip6obf.hdrsplit = TRUE;
758 }
759 /* adjust pointer */
760 ip6 = mtod(m, struct ip6_hdr *);
761 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) {
762 goto freehdrs;
763 }
764 ip6->ip6_plen = 0;
765 } else {
766 ip6->ip6_plen = htons(plen);
767 }
768 /*
769 * Concatenate headers and fill in next header fields.
770 * Here we have, on "m"
771 * IPv6 payload
772 * and we insert headers accordingly. Finally, we should be getting:
773 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
774 *
775 * during the header composing process, "m" points to IPv6 header.
776 * "mprev" points to an extension header prior to esp.
777 */
778 nexthdrp = &ip6->ip6_nxt;
779 mprev = m;
780
781 /*
782 * we treat dest2 specially. this makes IPsec processing
783 * much easier. the goal here is to make mprev point the
784 * mbuf prior to dest2.
785 *
786 * result: IPv6 dest2 payload
787 * m and mprev will point to IPv6 header.
788 */
789 if (exthdrs.ip6e_dest2 != NULL) {
790 if (!ip6obf.hdrsplit) {
791 panic("assumption failed: hdr not split");
792 /* NOTREACHED */
793 }
794 exthdrs.ip6e_dest2->m_next = m->m_next;
795 m->m_next = exthdrs.ip6e_dest2;
796 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
797 ip6->ip6_nxt = IPPROTO_DSTOPTS;
798 }
799
800 #define MAKE_CHAIN(m, mp, p, i) do { \
801 if (m != NULL) { \
802 if (!ip6obf.hdrsplit) { \
803 panic("assumption failed: hdr not split"); \
804 /* NOTREACHED */ \
805 } \
806 *mtod((m), u_char *) = *(p); \
807 *(p) = (i); \
808 p = mtod((m), u_char *); \
809 (m)->m_next = (mp)->m_next; \
810 (mp)->m_next = (m); \
811 (mp) = (m); \
812 } \
813 } while (0)
814 /*
815 * result: IPv6 hbh dest1 rthdr dest2 payload
816 * m will point to IPv6 header. mprev will point to the
817 * extension header prior to dest2 (rthdr in the above case).
818 */
819 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
820 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
821 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
822
823 /* It is no longer safe to free the pointers in exthdrs. */
824 exthdrs.merged = TRUE;
825
826 #undef MAKE_CHAIN
827
828 #if IPSEC
829 if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
830 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
831 }
832 #endif /* IPSEC */
833
834 if (!TAILQ_EMPTY(&ipv6_filters) &&
835 !((flags & IPV6_OUTARGS) &&
836 (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED)
837 #if NECP
838 && !necp_packet_should_skip_filters(m)
839 #endif // NECP
840 )) {
841 struct ipfilter *filter;
842 int seen = (inject_filter_ref == NULL);
843 int fixscope = 0;
844
845 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
846 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
847 IM6O_LOCK(im6o);
848 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
849 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
850 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
851 IM6O_UNLOCK(im6o);
852 }
853
854 /* Hack: embed the scope_id in the destination */
855 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
856 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
857 fixscope = 1;
858 ip6->ip6_dst.s6_addr16[1] =
859 htons(ro->ro_dst.sin6_scope_id);
860 }
861
862 ipf_ref();
863 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
864 /*
865 * Don't process packet twice if we've already seen it.
866 */
867 if (seen == 0) {
868 if ((struct ipfilter *)inject_filter_ref ==
869 filter) {
870 seen = 1;
871 }
872 } else if (filter->ipf_filter.ipf_output != NULL) {
873 errno_t result;
874
875 result = filter->ipf_filter.ipf_output(
876 filter->ipf_filter.cookie,
877 (mbuf_t *)&m, ippo);
878 if (result == EJUSTRETURN) {
879 ipf_unref();
880 m = NULL;
881 goto evaluateloop;
882 }
883 if (result != 0) {
884 ipf_unref();
885 goto bad;
886 }
887 }
888 }
889 ipf_unref();
890
891 ip6 = mtod(m, struct ip6_hdr *);
892 /* Hack: cleanup embedded scope_id if we put it there */
893 if (fixscope) {
894 ip6->ip6_dst.s6_addr16[1] = 0;
895 }
896 }
897
898 #if IPSEC
899 if (ip6obf.needipsec) {
900 int segleft_org;
901
902 /*
903 * pointers after IPsec headers are not valid any more.
904 * other pointers need a great care too.
905 * (IPsec routines should not mangle mbufs prior to AH/ESP)
906 */
907 exthdrs.ip6e_dest2 = NULL;
908
909 if (exthdrs.ip6e_rthdr != NULL) {
910 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
911 segleft_org = rh->ip6r_segleft;
912 rh->ip6r_segleft = 0;
913 } else {
914 rh = NULL;
915 segleft_org = 0;
916 }
917
918 ipsec_state.m = m;
919 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
920 sp, flags, &needipsectun);
921 m = ipsec_state.m;
922 if (error) {
923 /* mbuf is already reclaimed in ipsec6_output_trans. */
924 m = NULL;
925 switch (error) {
926 case EHOSTUNREACH:
927 case ENETUNREACH:
928 case EMSGSIZE:
929 case ENOBUFS:
930 case ENOMEM:
931 break;
932 default:
933 printf("ip6_output (ipsec): error code %d\n",
934 error);
935 /* FALLTHRU */
936 case ENOENT:
937 /* don't show these error codes to the user */
938 error = 0;
939 break;
940 }
941 goto bad;
942 }
943 if (exthdrs.ip6e_rthdr != NULL) {
944 /* ah6_output doesn't modify mbuf chain */
945 rh->ip6r_segleft = segleft_org;
946 }
947 }
948 #endif /* IPSEC */
949
950 /* If there is a routing header, discard the packet. */
951 if (exthdrs.ip6e_rthdr != NULL) {
952 error = EINVAL;
953 goto bad;
954 }
955
956 /* Source address validation */
957 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
958 !(flags & IPV6_UNSPECSRC)) {
959 error = EOPNOTSUPP;
960 ip6stat.ip6s_badscope++;
961 goto bad;
962 }
963 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
964 error = EOPNOTSUPP;
965 ip6stat.ip6s_badscope++;
966 goto bad;
967 }
968
969 ip6stat.ip6s_localout++;
970
971 /*
972 * Route packet.
973 */
974 if (ro == NULL) {
975 ro = &ip6route;
976 bzero((caddr_t)ro, sizeof(*ro));
977 }
978 ro_pmtu = ro;
979 if (opt != NULL && opt->ip6po_rthdr) {
980 ro = &opt->ip6po_route;
981 }
982 dst = SIN6(&ro->ro_dst);
983
984 if (ro->ro_rt != NULL) {
985 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
986 }
987 /*
988 * if specified, try to fill in the traffic class field.
989 * do not override if a non-zero value is already set.
990 * we check the diffserv field and the ecn field separately.
991 */
992 if (opt != NULL && opt->ip6po_tclass >= 0) {
993 int mask = 0;
994
995 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) {
996 mask |= 0xfc;
997 }
998 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) {
999 mask |= 0x03;
1000 }
1001 if (mask != 0) {
1002 ip6->ip6_flow |=
1003 htonl((opt->ip6po_tclass & mask) << 20);
1004 }
1005 }
1006
1007 /* fill in or override the hop limit field, if necessary. */
1008 if (opt && opt->ip6po_hlim != -1) {
1009 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
1010 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1011 if (im6o != NULL) {
1012 IM6O_LOCK(im6o);
1013 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
1014 IM6O_UNLOCK(im6o);
1015 } else {
1016 ip6->ip6_hlim = ip6_defmcasthlim;
1017 }
1018 }
1019
1020 /*
1021 * If there is a cached route, check that it is to the same
1022 * destination and is still up. If not, free it and try again.
1023 * Test rt_flags without holding rt_lock for performance reasons;
1024 * if the route is down it will hopefully be caught by the layer
1025 * below (since it uses this route as a hint) or during the
1026 * next transmit.
1027 */
1028 if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
1029 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst)) {
1030 ROUTE_RELEASE(ro);
1031 }
1032
1033 if (ro->ro_rt == NULL) {
1034 bzero(dst, sizeof(*dst));
1035 dst->sin6_family = AF_INET6;
1036 dst->sin6_len = sizeof(struct sockaddr_in6);
1037 dst->sin6_addr = ip6->ip6_dst;
1038 }
1039 #if IPSEC
1040 if (ip6obf.needipsec && needipsectun) {
1041 #if CONFIG_DTRACE
1042 struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
1043 #endif /* CONFIG_DTRACE */
1044 /*
1045 * All the extension headers will become inaccessible
1046 * (since they can be encrypted).
1047 * Don't panic, we need no more updates to extension headers
1048 * on inner IPv6 packet (since they are now encapsulated).
1049 *
1050 * IPv6 [ESP|AH] IPv6 [extension headers] payload
1051 */
1052 bzero(&exthdrs, sizeof(exthdrs));
1053 exthdrs.ip6e_ip6 = m;
1054
1055 ipsec_state.m = m;
1056 route_copyout((struct route *)&ipsec_state.ro, (struct route *)ro,
1057 sizeof(struct route_in6));
1058 ipsec_state.dst = SA(dst);
1059
1060 /* So that we can see packets inside the tunnel */
1061 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
1062 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
1063 struct ip *, NULL, struct ip6_hdr *, ip6);
1064
1065 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
1066 /* tunneled in IPv4? packet is gone */
1067 if (ipsec_state.tunneled == 4) {
1068 m = NULL;
1069 goto evaluateloop;
1070 }
1071 m = ipsec_state.m;
1072 ipsec_saved_route = ro;
1073 ro = (struct route_in6 *)&ipsec_state.ro;
1074 dst = SIN6(ipsec_state.dst);
1075 if (error) {
1076 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
1077 m = NULL;
1078 switch (error) {
1079 case EHOSTUNREACH:
1080 case ENETUNREACH:
1081 case EMSGSIZE:
1082 case ENOBUFS:
1083 case ENOMEM:
1084 break;
1085 default:
1086 printf("ip6_output (ipsec): error code %d\n",
1087 error);
1088 /* FALLTHRU */
1089 case ENOENT:
1090 /* don't show these error codes to the user */
1091 error = 0;
1092 break;
1093 }
1094 goto bad;
1095 }
1096 /*
1097 * The packet has been encapsulated so the ifscope
1098 * is no longer valid since it does not apply to the
1099 * outer address: ignore the ifscope.
1100 */
1101 if (flags & IPV6_OUTARGS) {
1102 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
1103 ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
1104 }
1105 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
1106 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE) {
1107 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
1108 }
1109 }
1110 exthdrs.ip6e_ip6 = m;
1111 }
1112 #endif /* IPSEC */
1113
1114 /*
1115 * ifp should only be filled in for dummy net packets which will jump
1116 * to check_with_pf label.
1117 */
1118 if (ifp != NULL) {
1119 VERIFY(ip6obf.route_selected);
1120 }
1121
1122 /* adjust pointer */
1123 ip6 = mtod(m, struct ip6_hdr *);
1124
1125 if (ip6obf.select_srcif) {
1126 bzero(&src_sa, sizeof(src_sa));
1127 src_sa.sin6_family = AF_INET6;
1128 src_sa.sin6_len = sizeof(src_sa);
1129 src_sa.sin6_addr = ip6->ip6_src;
1130 }
1131 bzero(&dst_sa, sizeof(dst_sa));
1132 dst_sa.sin6_family = AF_INET6;
1133 dst_sa.sin6_len = sizeof(dst_sa);
1134 dst_sa.sin6_addr = ip6->ip6_dst;
1135
1136 /*
1137 * Only call in6_selectroute() on first iteration to avoid taking
1138 * multiple references on ifp and rt.
1139 *
1140 * in6_selectroute() might return an ifp with its reference held
1141 * even in the error case, so make sure to release its reference.
1142 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
1143 */
1144 if (!ip6obf.route_selected) {
1145 error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
1146 &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
1147
1148 if (error != 0) {
1149 switch (error) {
1150 case EHOSTUNREACH:
1151 ip6stat.ip6s_noroute++;
1152 break;
1153 case EADDRNOTAVAIL:
1154 default:
1155 break; /* XXX statistics? */
1156 }
1157 if (ifp != NULL) {
1158 in6_ifstat_inc(ifp, ifs6_out_discard);
1159 }
1160 /* ifp (if non-NULL) will be released at the end */
1161 goto bad;
1162 }
1163 ip6obf.route_selected = TRUE;
1164 }
1165 if (rt == NULL) {
1166 /*
1167 * If in6_selectroute() does not return a route entry,
1168 * dst may not have been updated.
1169 */
1170 *dst = dst_sa; /* XXX */
1171 }
1172
1173 #if NECP
1174 /* Catch-all to check if the interface is allowed */
1175 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1176 error = EHOSTUNREACH;
1177 ip6stat.ip6s_necp_policy_drop++;
1178 goto bad;
1179 }
1180 #endif /* NECP */
1181
1182 /*
1183 * then rt (for unicast) and ifp must be non-NULL valid values.
1184 */
1185 if (!(flags & IPV6_FORWARDING)) {
1186 in6_ifstat_inc_na(ifp, ifs6_out_request);
1187 }
1188 if (rt != NULL) {
1189 RT_LOCK(rt);
1190 if (ia == NULL) {
1191 ia = (struct in6_ifaddr *)(rt->rt_ifa);
1192 if (ia != NULL) {
1193 IFA_ADDREF(&ia->ia_ifa);
1194 }
1195 }
1196 rt->rt_use++;
1197 RT_UNLOCK(rt);
1198 }
1199
1200 /*
1201 * The outgoing interface must be in the zone of source and
1202 * destination addresses (except local/loopback). We should
1203 * use ia_ifp to support the case of sending packets to an
1204 * address of our own.
1205 */
1206 if (ia != NULL && ia->ia_ifp) {
1207 ifnet_reference(ia->ia_ifp); /* for origifp */
1208 if (origifp != NULL) {
1209 ifnet_release(origifp);
1210 }
1211 origifp = ia->ia_ifp;
1212 } else {
1213 if (ifp != NULL) {
1214 ifnet_reference(ifp); /* for origifp */
1215 }
1216 if (origifp != NULL) {
1217 ifnet_release(origifp);
1218 }
1219 origifp = ifp;
1220 }
1221
1222 /* skip scope enforcements for local/loopback route */
1223 if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1224 struct in6_addr src0, dst0;
1225 u_int32_t zone;
1226
1227 src0 = ip6->ip6_src;
1228 if (in6_setscope(&src0, origifp, &zone)) {
1229 goto badscope;
1230 }
1231 bzero(&src_sa, sizeof(src_sa));
1232 src_sa.sin6_family = AF_INET6;
1233 src_sa.sin6_len = sizeof(src_sa);
1234 src_sa.sin6_addr = ip6->ip6_src;
1235 if ((sa6_recoverscope(&src_sa, TRUE) ||
1236 zone != src_sa.sin6_scope_id)) {
1237 goto badscope;
1238 }
1239
1240 dst0 = ip6->ip6_dst;
1241 if ((in6_setscope(&dst0, origifp, &zone))) {
1242 goto badscope;
1243 }
1244 /* re-initialize to be sure */
1245 bzero(&dst_sa, sizeof(dst_sa));
1246 dst_sa.sin6_family = AF_INET6;
1247 dst_sa.sin6_len = sizeof(dst_sa);
1248 dst_sa.sin6_addr = ip6->ip6_dst;
1249 if ((sa6_recoverscope(&dst_sa, TRUE) ||
1250 zone != dst_sa.sin6_scope_id)) {
1251 goto badscope;
1252 }
1253
1254 /* scope check is done. */
1255 goto routefound;
1256
1257 badscope:
1258 ip6stat.ip6s_badscope++;
1259 in6_ifstat_inc(origifp, ifs6_out_discard);
1260 if (error == 0) {
1261 error = EHOSTUNREACH; /* XXX */
1262 }
1263 goto bad;
1264 }
1265
1266 routefound:
1267 if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1268 if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1269 /*
1270 * The nexthop is explicitly specified by the
1271 * application. We assume the next hop is an IPv6
1272 * address.
1273 */
1274 dst = SIN6(opt->ip6po_nexthop);
1275 } else if ((rt->rt_flags & RTF_GATEWAY)) {
1276 dst = SIN6(rt->rt_gateway);
1277 }
1278 /*
1279 * For packets destined to local/loopback, record the
1280 * source the source interface (which owns the source
1281 * address), as well as the output interface. This is
1282 * needed to reconstruct the embedded zone for the
1283 * link-local address case in ip6_input().
1284 */
1285 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1286 uint32_t srcidx;
1287
1288 if (src_ia != NULL) {
1289 srcidx = src_ia->ia_ifp->if_index;
1290 } else if (ro->ro_srcia != NULL) {
1291 srcidx = ro->ro_srcia->ifa_ifp->if_index;
1292 } else {
1293 srcidx = 0;
1294 }
1295
1296 ip6_setsrcifaddr_info(m, srcidx, NULL);
1297 ip6_setdstifaddr_info(m, 0, ia);
1298 }
1299 }
1300
1301 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1302 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1303 } else {
1304 struct in6_multi *in6m;
1305
1306 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1307 in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1308
1309 /*
1310 * Confirm that the outgoing interface supports multicast.
1311 */
1312 if (!(ifp->if_flags & IFF_MULTICAST)) {
1313 ip6stat.ip6s_noroute++;
1314 in6_ifstat_inc(ifp, ifs6_out_discard);
1315 error = ENETUNREACH;
1316 goto bad;
1317 }
1318 in6_multihead_lock_shared();
1319 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1320 in6_multihead_lock_done();
1321 if (im6o != NULL) {
1322 IM6O_LOCK(im6o);
1323 }
1324 if (in6m != NULL &&
1325 (im6o == NULL || im6o->im6o_multicast_loop)) {
1326 if (im6o != NULL) {
1327 IM6O_UNLOCK(im6o);
1328 }
1329 /*
1330 * If we belong to the destination multicast group
1331 * on the outgoing interface, and the caller did not
1332 * forbid loopback, loop back a copy.
1333 */
1334 ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
1335 } else if (im6o != NULL) {
1336 IM6O_UNLOCK(im6o);
1337 }
1338 if (in6m != NULL) {
1339 IN6M_REMREF(in6m);
1340 }
1341 /*
1342 * Multicasts with a hoplimit of zero may be looped back,
1343 * above, but must not be transmitted on a network.
1344 * Also, multicasts addressed to the loopback interface
1345 * are not sent -- the above call to ip6_mloopback() will
1346 * loop back a copy if this host actually belongs to the
1347 * destination group on the loopback interface.
1348 */
1349 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1350 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1351 /* remove m from the packetchain and continue looping */
1352 if (m != NULL) {
1353 m_freem(m);
1354 }
1355 m = NULL;
1356 goto evaluateloop;
1357 }
1358 }
1359
1360 /*
1361 * Fill the outgoing inteface to tell the upper layer
1362 * to increment per-interface statistics.
1363 */
1364 if (ifpp != NULL && *ifpp == NULL) {
1365 ifnet_reference(ifp); /* for caller */
1366 *ifpp = ifp;
1367 }
1368
1369 /* Determine path MTU. */
1370 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu)) != 0) {
1371 goto bad;
1372 }
1373
1374 /*
1375 * The caller of this function may specify to use the minimum MTU
1376 * in some cases.
1377 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1378 * setting. The logic is a bit complicated; by default, unicast
1379 * packets will follow path MTU while multicast packets will be sent at
1380 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1381 * including unicast ones will be sent at the minimum MTU. Multicast
1382 * packets will always be sent at the minimum MTU unless
1383 * IP6PO_MINMTU_DISABLE is explicitly specified.
1384 * See RFC 3542 for more details.
1385 */
1386 if (mtu > IPV6_MMTU) {
1387 if ((flags & IPV6_MINMTU)) {
1388 mtu = IPV6_MMTU;
1389 } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
1390 mtu = IPV6_MMTU;
1391 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1392 (opt == NULL ||
1393 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1394 mtu = IPV6_MMTU;
1395 }
1396 }
1397
1398 /*
1399 * clear embedded scope identifiers if necessary.
1400 * in6_clearscope will touch the addresses only when necessary.
1401 */
1402 in6_clearscope(&ip6->ip6_src);
1403 in6_clearscope(&ip6->ip6_dst);
1404 /*
1405 * If the outgoing packet contains a hop-by-hop options header,
1406 * it must be examined and processed even by the source node.
1407 * (RFC 2460, section 4.)
1408 */
1409 if (exthdrs.ip6e_hbh != NULL) {
1410 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1411 u_int32_t dummy; /* XXX unused */
1412 uint32_t oplen = 0; /* for ip6_process_hopopts() */
1413 #if DIAGNOSTIC
1414 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) {
1415 panic("ip6e_hbh is not continuous");
1416 }
1417 #endif
1418 /*
1419 * XXX: If we have to send an ICMPv6 error to the sender,
1420 * we need the M_LOOP flag since icmp6_error() expects
1421 * the IPv6 and the hop-by-hop options header are
1422 * continuous unless the flag is set.
1423 */
1424 m->m_flags |= M_LOOP;
1425 m->m_pkthdr.rcvif = ifp;
1426 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1427 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1428 &dummy, &oplen) < 0) {
1429 /*
1430 * m was already freed at this point. Set to NULL so it
1431 * is not re-freed at end of ip6_output_list.
1432 */
1433 m = NULL;
1434 error = EINVAL; /* better error? */
1435 goto bad;
1436 }
1437 m->m_flags &= ~M_LOOP; /* XXX */
1438 m->m_pkthdr.rcvif = NULL;
1439 }
1440
1441 #if DUMMYNET
1442 check_with_pf:
1443 #endif /* DUMMYNET */
1444 #if PF
1445 if (PF_IS_ENABLED && !skip_pf) {
1446 #if DUMMYNET
1447
1448 /*
1449 * TODO: Need to save opt->ip6po_flags for reinjection
1450 * rdar://10434993
1451 */
1452 args.fwa_m = m;
1453 args.fwa_oif = ifp;
1454 args.fwa_oflags = flags;
1455 if (flags & IPV6_OUTARGS) {
1456 args.fwa_ip6oa = ip6oa;
1457 }
1458 args.fwa_ro6 = ro;
1459 args.fwa_dst6 = dst;
1460 args.fwa_ro6_pmtu = ro_pmtu;
1461 args.fwa_origifp = origifp;
1462 args.fwa_mtu = mtu;
1463 args.fwa_unfragpartlen = unfragpartlen;
1464 args.fwa_exthdrs = &exthdrs;
1465 /* Invoke outbound packet filter */
1466 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1467 #else /* !DUMMYNET */
1468 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1469 #endif /* !DUMMYNET */
1470
1471 if (error != 0 || m == NULL) {
1472 if (m != NULL) {
1473 panic("%s: unexpected packet %p\n",
1474 __func__, m);
1475 /* NOTREACHED */
1476 }
1477 /* m was already freed by callee and is now NULL. */
1478 goto evaluateloop;
1479 }
1480 ip6 = mtod(m, struct ip6_hdr *);
1481 }
1482 #endif /* PF */
1483
1484 #ifdef IPSEC
1485 /* clean ipsec history before fragmentation */
1486 ipsec_delaux(m);
1487 #endif /* IPSEC */
1488
1489 if (ip6oa != NULL) {
1490 u_int8_t dscp;
1491
1492 dscp = (ntohl(ip6->ip6_flow) & IP6FLOW_DSCP_MASK) >> IP6FLOW_DSCP_SHIFT;
1493
1494 error = set_packet_qos(m, ifp,
1495 ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
1496 ip6oa->ip6oa_sotc, ip6oa->ip6oa_netsvctype, &dscp);
1497 if (error == 0) {
1498 ip6->ip6_flow &= ~htonl(IP6FLOW_DSCP_MASK);
1499 ip6->ip6_flow |= htonl((u_int32_t)dscp << IP6FLOW_DSCP_SHIFT);
1500 } else {
1501 printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
1502 error = 0;
1503 }
1504 }
1505 /*
1506 * Determine whether fragmentation is necessary. If so, m is passed
1507 * back as a chain of packets and original mbuf is freed. Otherwise, m
1508 * is unchanged.
1509 */
1510 error = ip6_fragment_packet(&m, opt,
1511 &exthdrs, ifp, mtu, unfragpartlen, ro_pmtu, nxt0,
1512 optlen);
1513
1514 if (error) {
1515 goto bad;
1516 }
1517
1518 /*
1519 * The evaluateloop label is where we decide whether to continue looping over
1520 * packets or call into nd code to send.
1521 */
1522 evaluateloop:
1523
1524 /*
1525 * m may be NULL when we jump to the evaluateloop label from PF or
1526 * other code that can drop packets.
1527 */
1528 if (m != NULL) {
1529 /*
1530 * If we already have a chain to send, tack m onto the end.
1531 * Otherwise make m the start and end of the to-be-sent chain.
1532 */
1533 if (sendchain != NULL) {
1534 sendchain_last->m_nextpkt = m;
1535 } else {
1536 sendchain = m;
1537 }
1538
1539 /* Fragmentation may mean m is a chain. Find the last packet. */
1540 while (m->m_nextpkt) {
1541 m = m->m_nextpkt;
1542 }
1543 sendchain_last = m;
1544 pktcnt++;
1545 }
1546
1547 /* Fill in next m from inputchain as appropriate. */
1548 m = inputchain;
1549 if (m != NULL) {
1550 /* Isolate m from rest of input chain. */
1551 inputchain = m->m_nextpkt;
1552 m->m_nextpkt = NULL;
1553
1554 /*
1555 * Clear exthdrs and ipsec_state so stale contents are not
1556 * reused. Note this also clears the exthdrs.merged flag.
1557 */
1558 bzero(&exthdrs, sizeof(exthdrs));
1559 bzero(&ipsec_state, sizeof(ipsec_state));
1560
1561 /* Continue looping. */
1562 goto loopit;
1563 }
1564
1565 /*
1566 * If we get here, there's no more mbufs in inputchain, so send the
1567 * sendchain if there is one.
1568 */
1569 if (pktcnt > 0) {
1570 error = nd6_output_list(ifp, origifp, sendchain, dst,
1571 ro->ro_rt, adv);
1572 /*
1573 * Fall through to done label even in error case because
1574 * nd6_output_list frees packetchain in both success and
1575 * failure cases.
1576 */
1577 }
1578
1579 done:
1580 if (ifpp_save != NULL && *ifpp_save != NULL) {
1581 ifnet_release(*ifpp_save);
1582 *ifpp_save = NULL;
1583 }
1584 ROUTE_RELEASE(&ip6route);
1585 #if IPSEC
1586 ROUTE_RELEASE(&ipsec_state.ro);
1587 if (sp != NULL) {
1588 key_freesp(sp, KEY_SADB_UNLOCKED);
1589 }
1590 #endif /* IPSEC */
1591 #if NECP
1592 ROUTE_RELEASE(&necp_route);
1593 #endif /* NECP */
1594 #if DUMMYNET
1595 ROUTE_RELEASE(&saved_route);
1596 ROUTE_RELEASE(&saved_ro_pmtu);
1597 #endif /* DUMMYNET */
1598
1599 if (ia != NULL) {
1600 IFA_REMREF(&ia->ia_ifa);
1601 }
1602 if (src_ia != NULL) {
1603 IFA_REMREF(&src_ia->ia_ifa);
1604 }
1605 if (ifp != NULL) {
1606 ifnet_release(ifp);
1607 }
1608 if (origifp != NULL) {
1609 ifnet_release(origifp);
1610 }
1611 if (ip6_output_measure) {
1612 net_perf_measure_time(&net_perf, &start_tv, packets_processed);
1613 net_perf_histogram(&net_perf, packets_processed);
1614 }
1615 return error;
1616
1617 freehdrs:
1618 if (exthdrs.ip6e_hbh != NULL) {
1619 if (exthdrs.merged) {
1620 panic("Double free of ip6e_hbh");
1621 }
1622 m_freem(exthdrs.ip6e_hbh);
1623 }
1624 if (exthdrs.ip6e_dest1 != NULL) {
1625 if (exthdrs.merged) {
1626 panic("Double free of ip6e_dest1");
1627 }
1628 m_freem(exthdrs.ip6e_dest1);
1629 }
1630 if (exthdrs.ip6e_rthdr != NULL) {
1631 if (exthdrs.merged) {
1632 panic("Double free of ip6e_rthdr");
1633 }
1634 m_freem(exthdrs.ip6e_rthdr);
1635 }
1636 if (exthdrs.ip6e_dest2 != NULL) {
1637 if (exthdrs.merged) {
1638 panic("Double free of ip6e_dest2");
1639 }
1640 m_freem(exthdrs.ip6e_dest2);
1641 }
1642 /* FALLTHRU */
1643 bad:
1644 if (inputchain != NULL) {
1645 m_freem_list(inputchain);
1646 }
1647 if (sendchain != NULL) {
1648 m_freem_list(sendchain);
1649 }
1650 if (m != NULL) {
1651 m_freem(m);
1652 }
1653
1654 goto done;
1655
1656 #undef ipf_pktopts
1657 #undef exthdrs
1658 #undef ip6route
1659 #undef ipsec_state
1660 #undef saved_route
1661 #undef saved_ro_pmtu
1662 #undef args
1663 }
1664
1665 /* ip6_fragment_packet
1666 *
1667 * The fragmentation logic is rather complex:
1668 * 1: normal case (dontfrag == 0)
1669 * 1-a: send as is if tlen <= path mtu
1670 * 1-b: fragment if tlen > path mtu
1671 *
1672 * 2: if user asks us not to fragment (dontfrag == 1)
1673 * 2-a: send as is if tlen <= interface mtu
1674 * 2-b: error if tlen > interface mtu
1675 */
1676
1677 static int
1678 ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
1679 struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
1680 uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
1681 int nxt0, uint32_t optlen)
1682 {
1683 VERIFY(NULL != mptr);
1684 struct mbuf *m = *mptr;
1685 int error = 0;
1686 size_t tlen = m->m_pkthdr.len;
1687 boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
1688
1689 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
1690 dontfrag = TRUE;
1691 /*
1692 * Discard partial sum information if this packet originated
1693 * from another interface; the packet would already have the
1694 * final checksum and we shouldn't recompute it.
1695 */
1696 if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
1697 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
1698 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1699 m->m_pkthdr.csum_data = 0;
1700 }
1701 }
1702
1703 /* Access without acquiring nd_ifinfo lock for performance */
1704 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
1705 /*
1706 * Even if the DONTFRAG option is specified, we cannot send the
1707 * packet when the data length is larger than the MTU of the
1708 * outgoing interface.
1709 * Notify the error by sending IPV6_PATHMTU ancillary data as
1710 * well as returning an error code (the latter is not described
1711 * in the API spec.)
1712 */
1713 u_int32_t mtu32;
1714 struct ip6ctlparam ip6cp;
1715
1716 mtu32 = (u_int32_t)mtu;
1717 bzero(&ip6cp, sizeof(ip6cp));
1718 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1719 pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
1720 return EMSGSIZE;
1721 }
1722
1723 /*
1724 * transmit packet without fragmentation
1725 */
1726 if (dontfrag ||
1727 (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
1728 (ifp->if_hwassist & CSUM_FRAGMENT_IPV6))) {
1729 /*
1730 * mppn not updated in this case because no new chain is formed
1731 * and inserted
1732 */
1733 ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
1734 } else {
1735 /*
1736 * time to fragment - cases 1-b is handled inside
1737 * ip6_do_fragmentation().
1738 * mppn is passed down to be updated to point at fragment chain.
1739 */
1740 u_int8_t *lexthdrsp;
1741
1742 if (exthdrsp->ip6e_rthdr != NULL) {
1743 lexthdrsp = mtod(exthdrsp->ip6e_rthdr, uint8_t *);
1744 } else if (exthdrsp->ip6e_dest1 != NULL) {
1745 lexthdrsp = mtod(exthdrsp->ip6e_dest1, uint8_t *);
1746 } else if (exthdrsp->ip6e_hbh != NULL) {
1747 lexthdrsp = mtod(exthdrsp->ip6e_hbh, uint8_t *);
1748 } else {
1749 lexthdrsp = NULL;
1750 }
1751 error = ip6_do_fragmentation(mptr, optlen, ifp,
1752 unfragpartlen, mtod(m, struct ip6_hdr *), lexthdrsp, mtu,
1753 nxt0, htonl(ip6_randomid()));
1754 }
1755
1756 return error;
1757 }
1758
1759 /*
1760 * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
1761 * the packet needs to be fragmented. on success, morig is freed and a chain
1762 * of fragments is linked into the packet chain where morig existed. Otherwise,
1763 * an errno is returned.
1764 * optlen: total length of all extension headers (excludes the IPv6 header).
1765 * unfragpartlen: length of the per-fragment headers which consist of the IPv6
1766 * header plus any extension headers that must be processed by nodes
1767 * en route to the destination.
1768 * lexthdrsp: pointer to the last extension header in the unfragmentable part
1769 * or NULL.
1770 * nxt0: upper-layer protocol number.
1771 * id: Identification value to be used in the fragment header.
1772 */
1773 int
1774 ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
1775 uint32_t unfragpartlen, struct ip6_hdr *ip6, uint8_t *lexthdrsp,
1776 uint32_t mtu, int nxt0, uint32_t id)
1777 {
1778 VERIFY(NULL != mptr);
1779 int error = 0;
1780
1781 struct mbuf *morig = *mptr;
1782 struct mbuf *first_mbufp = NULL;
1783 struct mbuf *last_mbufp = NULL;
1784
1785 size_t tlen = morig->m_pkthdr.len;
1786
1787 /* try to fragment the packet. case 1-b */
1788 if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
1789 /* TSO and fragment aren't compatible */
1790 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1791 return EMSGSIZE;
1792 } else if (mtu < IPV6_MMTU) {
1793 /* path MTU cannot be less than IPV6_MMTU */
1794 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1795 return EMSGSIZE;
1796 } else if (ip6->ip6_plen == 0) {
1797 /* jumbo payload cannot be fragmented */
1798 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1799 return EMSGSIZE;
1800 } else {
1801 size_t hlen, len, off;
1802 struct mbuf **mnext = NULL;
1803 struct ip6_frag *ip6f;
1804 u_char nextproto;
1805
1806 /*
1807 * Too large for the destination or interface;
1808 * fragment if possible.
1809 * Must be able to put at least 8 bytes per fragment.
1810 */
1811 hlen = unfragpartlen;
1812 if (mtu > IPV6_MAXPACKET) {
1813 mtu = IPV6_MAXPACKET;
1814 }
1815
1816 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1817 if (len < 8) {
1818 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1819 return EMSGSIZE;
1820 }
1821
1822 /*
1823 * Change the next header field of the last header in the
1824 * unfragmentable part.
1825 */
1826 if (lexthdrsp != NULL) {
1827 nextproto = *lexthdrsp;
1828 *lexthdrsp = IPPROTO_FRAGMENT;
1829 } else {
1830 nextproto = ip6->ip6_nxt;
1831 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1832 }
1833
1834 if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
1835 in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
1836 }
1837
1838 /*
1839 * Loop through length of segment after first fragment,
1840 * make new header and copy data of each part and link onto
1841 * chain.
1842 */
1843 for (off = hlen; off < tlen; off += len) {
1844 struct ip6_hdr *new_mhip6;
1845 struct mbuf *new_m;
1846 struct mbuf *m_frgpart;
1847
1848 MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1849 if (new_m == NULL) {
1850 error = ENOBUFS;
1851 ip6stat.ip6s_odropped++;
1852 break;
1853 }
1854 new_m->m_pkthdr.rcvif = NULL;
1855 new_m->m_flags = morig->m_flags & M_COPYFLAGS;
1856
1857 if (first_mbufp != NULL) {
1858 /* Every pass through loop but first */
1859 *mnext = new_m;
1860 last_mbufp = new_m;
1861 } else {
1862 /* This is the first element of the fragment chain */
1863 first_mbufp = new_m;
1864 last_mbufp = new_m;
1865 }
1866 mnext = &new_m->m_nextpkt;
1867
1868 new_m->m_data += max_linkhdr;
1869 new_mhip6 = mtod(new_m, struct ip6_hdr *);
1870 *new_mhip6 = *ip6;
1871 new_m->m_len = sizeof(*new_mhip6);
1872
1873 error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
1874 if (error) {
1875 ip6stat.ip6s_odropped++;
1876 break;
1877 }
1878
1879 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1880 if (off + len >= tlen) {
1881 len = tlen - off;
1882 } else {
1883 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1884 }
1885 new_mhip6->ip6_plen = htons((u_short)(len + hlen +
1886 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1887
1888 if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
1889 error = ENOBUFS;
1890 ip6stat.ip6s_odropped++;
1891 break;
1892 }
1893 m_cat(new_m, m_frgpart);
1894 new_m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1895 new_m->m_pkthdr.rcvif = NULL;
1896
1897 M_COPY_CLASSIFIER(new_m, morig);
1898 M_COPY_PFTAG(new_m, morig);
1899
1900 #ifdef notyet
1901 #if CONFIG_MACF_NET
1902 mac_create_fragment(morig, new_m);
1903 #endif /* CONFIG_MACF_NET */
1904 #endif /* notyet */
1905
1906 ip6f->ip6f_reserved = 0;
1907 ip6f->ip6f_ident = id;
1908 ip6f->ip6f_nxt = nextproto;
1909 ip6stat.ip6s_ofragments++;
1910 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1911 }
1912
1913 if (error) {
1914 /* free all the fragments created */
1915 if (first_mbufp != NULL) {
1916 m_freem_list(first_mbufp);
1917 first_mbufp = NULL;
1918 }
1919 last_mbufp = NULL;
1920 } else {
1921 /* successful fragmenting */
1922 m_freem(morig);
1923 *mptr = first_mbufp;
1924 last_mbufp->m_nextpkt = NULL;
1925 ip6stat.ip6s_fragmented++;
1926 in6_ifstat_inc(ifp, ifs6_out_fragok);
1927 }
1928 }
1929 return error;
1930 }
1931
1932 static int
1933 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1934 {
1935 struct mbuf *m;
1936
1937 if (hlen > MCLBYTES) {
1938 return ENOBUFS; /* XXX */
1939 }
1940 MGET(m, M_DONTWAIT, MT_DATA);
1941 if (m == NULL) {
1942 return ENOBUFS;
1943 }
1944
1945 if (hlen > MLEN) {
1946 MCLGET(m, M_DONTWAIT);
1947 if (!(m->m_flags & M_EXT)) {
1948 m_free(m);
1949 return ENOBUFS;
1950 }
1951 }
1952 m->m_len = hlen;
1953 if (hdr != NULL) {
1954 bcopy(hdr, mtod(m, caddr_t), hlen);
1955 }
1956
1957 *mp = m;
1958 return 0;
1959 }
1960
1961 static void
1962 ip6_out_cksum_stats(int proto, u_int32_t len)
1963 {
1964 switch (proto) {
1965 case IPPROTO_TCP:
1966 tcp_out6_cksum_stats(len);
1967 break;
1968 case IPPROTO_UDP:
1969 udp_out6_cksum_stats(len);
1970 break;
1971 default:
1972 /* keep only TCP or UDP stats for now */
1973 break;
1974 }
1975 }
1976
1977 /*
1978 * Process a delayed payload checksum calculation (outbound path.)
1979 *
1980 * hoff is the number of bytes beyond the mbuf data pointer which
1981 * points to the IPv6 header. optlen is the number of bytes, if any,
1982 * between the end of IPv6 header and the beginning of the ULP payload
1983 * header, which represents the extension headers. If optlen is less
1984 * than zero, this routine will bail when it detects extension headers.
1985 *
1986 * Returns a bitmask representing all the work done in software.
1987 */
1988 uint32_t
1989 in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
1990 int32_t nxt0, uint32_t csum_flags)
1991 {
1992 unsigned char buf[sizeof(struct ip6_hdr)] __attribute__((aligned(8)));
1993 struct ip6_hdr *ip6;
1994 uint32_t offset, mlen, hlen, olen, sw_csum;
1995 uint16_t csum, ulpoff, plen;
1996 uint8_t nxt;
1997
1998 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1999 VERIFY(m->m_flags & M_PKTHDR);
2000
2001 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
2002
2003 if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0) {
2004 goto done;
2005 }
2006
2007 mlen = m->m_pkthdr.len; /* total mbuf len */
2008 hlen = sizeof(*ip6); /* IPv6 header len */
2009
2010 /* sanity check (need at least IPv6 header) */
2011 if (mlen < (hoff + hlen)) {
2012 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
2013 "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
2014 /* NOTREACHED */
2015 }
2016
2017 /*
2018 * In case the IPv6 header is not contiguous, or not 32-bit
2019 * aligned, copy it to a local buffer.
2020 */
2021 if ((hoff + hlen) > m->m_len ||
2022 !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
2023 m_copydata(m, hoff, hlen, (caddr_t)buf);
2024 ip6 = (struct ip6_hdr *)(void *)buf;
2025 } else {
2026 ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
2027 }
2028
2029 nxt = ip6->ip6_nxt;
2030 plen = ntohs(ip6->ip6_plen);
2031 if (plen != (mlen - (hoff + hlen))) {
2032 plen = OSSwapInt16(plen);
2033 if (plen != (mlen - (hoff + hlen))) {
2034 /* Don't complain for jumbograms */
2035 if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
2036 printf("%s: mbuf 0x%llx proto %d IPv6 "
2037 "plen %d (%x) [swapped %d (%x)] doesn't "
2038 "match actual packet length; %d is used "
2039 "instead\n", __func__,
2040 (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
2041 ip6->ip6_plen, ip6->ip6_plen, plen, plen,
2042 (mlen - (hoff + hlen)));
2043 }
2044 plen = mlen - (hoff + hlen);
2045 }
2046 }
2047
2048 if (optlen < 0) {
2049 /* next header isn't TCP/UDP and we don't know optlen, bail */
2050 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2051 sw_csum = 0;
2052 goto done;
2053 }
2054 olen = 0;
2055 } else {
2056 /* caller supplied the original transport number; use it */
2057 if (nxt0 >= 0) {
2058 nxt = nxt0;
2059 }
2060 olen = optlen;
2061 }
2062
2063 offset = hoff + hlen + olen; /* ULP header */
2064
2065 /* sanity check */
2066 if (mlen < offset) {
2067 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
2068 "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
2069 /* NOTREACHED */
2070 }
2071
2072 /*
2073 * offset is added to the lower 16-bit value of csum_data,
2074 * which is expected to contain the ULP offset; therefore
2075 * CSUM_PARTIAL offset adjustment must be undone.
2076 */
2077 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL | CSUM_DATA_VALID)) ==
2078 (CSUM_PARTIAL | CSUM_DATA_VALID)) {
2079 /*
2080 * Get back the original ULP offset (this will
2081 * undo the CSUM_PARTIAL logic in ip6_output.)
2082 */
2083 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
2084 m->m_pkthdr.csum_tx_start);
2085 }
2086
2087 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
2088
2089 if (mlen < (ulpoff + sizeof(csum))) {
2090 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2091 "cksum offset (%u) cksum flags 0x%x\n", __func__,
2092 m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
2093 /* NOTREACHED */
2094 }
2095
2096 csum = inet6_cksum(m, 0, offset, plen - olen);
2097
2098 /* Update stats */
2099 ip6_out_cksum_stats(nxt, plen - olen);
2100
2101 /* RFC1122 4.1.3.4 */
2102 if (csum == 0 &&
2103 (m->m_pkthdr.csum_flags & (CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
2104 csum = 0xffff;
2105 }
2106
2107 /* Insert the checksum in the ULP csum field */
2108 offset += ulpoff;
2109 if ((offset + sizeof(csum)) > m->m_len) {
2110 m_copyback(m, offset, sizeof(csum), &csum);
2111 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
2112 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
2113 } else {
2114 bcopy(&csum, (mtod(m, char *) + offset), sizeof(csum));
2115 }
2116 m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID |
2117 CSUM_PARTIAL | CSUM_ZERO_INVERT);
2118
2119 done:
2120 return sw_csum;
2121 }
2122
2123 /*
2124 * Insert jumbo payload option.
2125 */
2126 static int
2127 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
2128 {
2129 struct mbuf *mopt;
2130 u_char *optbuf;
2131 u_int32_t v;
2132
2133 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
2134
2135 /*
2136 * If there is no hop-by-hop options header, allocate new one.
2137 * If there is one but it doesn't have enough space to store the
2138 * jumbo payload option, allocate a cluster to store the whole options.
2139 * Otherwise, use it to store the options.
2140 */
2141 if (exthdrs->ip6e_hbh == NULL) {
2142 MGET(mopt, M_DONTWAIT, MT_DATA);
2143 if (mopt == NULL) {
2144 return ENOBUFS;
2145 }
2146 mopt->m_len = JUMBOOPTLEN;
2147 optbuf = mtod(mopt, u_char *);
2148 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
2149 exthdrs->ip6e_hbh = mopt;
2150 } else {
2151 struct ip6_hbh *hbh;
2152
2153 mopt = exthdrs->ip6e_hbh;
2154 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
2155 /*
2156 * XXX assumption:
2157 * - exthdrs->ip6e_hbh is not referenced from places
2158 * other than exthdrs.
2159 * - exthdrs->ip6e_hbh is not an mbuf chain.
2160 */
2161 u_int32_t oldoptlen = mopt->m_len;
2162 struct mbuf *n;
2163
2164 /*
2165 * XXX: give up if the whole (new) hbh header does
2166 * not fit even in an mbuf cluster.
2167 */
2168 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) {
2169 return ENOBUFS;
2170 }
2171
2172 /*
2173 * As a consequence, we must always prepare a cluster
2174 * at this point.
2175 */
2176 MGET(n, M_DONTWAIT, MT_DATA);
2177 if (n != NULL) {
2178 MCLGET(n, M_DONTWAIT);
2179 if (!(n->m_flags & M_EXT)) {
2180 m_freem(n);
2181 n = NULL;
2182 }
2183 }
2184 if (n == NULL) {
2185 return ENOBUFS;
2186 }
2187 n->m_len = oldoptlen + JUMBOOPTLEN;
2188 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
2189 oldoptlen);
2190 optbuf = mtod(n, u_char *) + oldoptlen;
2191 m_freem(mopt);
2192 mopt = exthdrs->ip6e_hbh = n;
2193 } else {
2194 optbuf = mtod(mopt, u_char *) + mopt->m_len;
2195 mopt->m_len += JUMBOOPTLEN;
2196 }
2197 optbuf[0] = IP6OPT_PADN;
2198 optbuf[1] = 1;
2199
2200 /*
2201 * Adjust the header length according to the pad and
2202 * the jumbo payload option.
2203 */
2204 hbh = mtod(mopt, struct ip6_hbh *);
2205 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
2206 }
2207
2208 /* fill in the option. */
2209 optbuf[2] = IP6OPT_JUMBO;
2210 optbuf[3] = 4;
2211 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
2212 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
2213
2214 /* finally, adjust the packet header length */
2215 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
2216
2217 return 0;
2218 #undef JUMBOOPTLEN
2219 }
2220
2221 /*
2222 * Insert fragment header and copy unfragmentable header portions.
2223 */
2224 static int
2225 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
2226 struct ip6_frag **frghdrp)
2227 {
2228 struct mbuf *n, *mlast;
2229
2230 if (hlen > sizeof(struct ip6_hdr)) {
2231 n = m_copym(m0, sizeof(struct ip6_hdr),
2232 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
2233 if (n == NULL) {
2234 return ENOBUFS;
2235 }
2236 m->m_next = n;
2237 } else {
2238 n = m;
2239 }
2240
2241 /* Search for the last mbuf of unfragmentable part. */
2242 for (mlast = n; mlast->m_next; mlast = mlast->m_next) {
2243 ;
2244 }
2245
2246 if (!(mlast->m_flags & M_EXT) &&
2247 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
2248 /* use the trailing space of the last mbuf for the frag hdr */
2249 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
2250 mlast->m_len);
2251 mlast->m_len += sizeof(struct ip6_frag);
2252 m->m_pkthdr.len += sizeof(struct ip6_frag);
2253 } else {
2254 /* allocate a new mbuf for the fragment header */
2255 struct mbuf *mfrg;
2256
2257 MGET(mfrg, M_DONTWAIT, MT_DATA);
2258 if (mfrg == NULL) {
2259 return ENOBUFS;
2260 }
2261 mfrg->m_len = sizeof(struct ip6_frag);
2262 *frghdrp = mtod(mfrg, struct ip6_frag *);
2263 mlast->m_next = mfrg;
2264 }
2265
2266 return 0;
2267 }
2268
2269 static int
2270 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
2271 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup)
2272 {
2273 u_int32_t mtu = 0;
2274 int error = 0;
2275
2276
2277 if (ro_pmtu != ro) {
2278 /* The first hop and the final destination may differ. */
2279 struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
2280 if (ROUTE_UNUSABLE(ro_pmtu) ||
2281 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) {
2282 ROUTE_RELEASE(ro_pmtu);
2283 }
2284
2285 if (ro_pmtu->ro_rt == NULL) {
2286 bzero(sa6_dst, sizeof(*sa6_dst));
2287 sa6_dst->sin6_family = AF_INET6;
2288 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
2289 sa6_dst->sin6_addr = *dst;
2290
2291 rtalloc_scoped((struct route *)ro_pmtu,
2292 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
2293 }
2294 }
2295
2296 if (ro_pmtu->ro_rt != NULL) {
2297 u_int32_t ifmtu;
2298
2299 if (ifp == NULL) {
2300 ifp = ro_pmtu->ro_rt->rt_ifp;
2301 }
2302 /* Access without acquiring nd_ifinfo lock for performance */
2303 ifmtu = IN6_LINKMTU(ifp);
2304
2305 /*
2306 * Access rmx_mtu without holding the route entry lock,
2307 * for performance; this isn't something that changes
2308 * often, so optimize.
2309 */
2310 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2311 if (mtu > ifmtu || mtu == 0) {
2312 /*
2313 * The MTU on the route is larger than the MTU on
2314 * the interface! This shouldn't happen, unless the
2315 * MTU of the interface has been changed after the
2316 * interface was brought up. Change the MTU in the
2317 * route to match the interface MTU (as long as the
2318 * field isn't locked).
2319 *
2320 * if MTU on the route is 0, we need to fix the MTU.
2321 * this case happens with path MTU discovery timeouts.
2322 */
2323 mtu = ifmtu;
2324 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) {
2325 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
2326 }
2327 }
2328 } else {
2329 if (ifp) {
2330 /* Don't hold nd_ifinfo lock for performance */
2331 mtu = IN6_LINKMTU(ifp);
2332 } else {
2333 error = EHOSTUNREACH; /* XXX */
2334 }
2335 }
2336
2337 *mtup = mtu;
2338 return error;
2339 }
2340
2341 /*
2342 * IP6 socket option processing.
2343 */
2344 int
2345 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
2346 {
2347 int optdatalen, uproto;
2348 void *optdata;
2349 int privileged;
2350 struct inpcb *in6p = sotoinpcb(so);
2351 int error = 0, optval = 0;
2352 int level, op = -1, optname = 0;
2353 int optlen = 0;
2354 struct proc *p;
2355 lck_mtx_t *mutex_held = NULL;
2356
2357 VERIFY(sopt != NULL);
2358
2359 level = sopt->sopt_level;
2360 op = sopt->sopt_dir;
2361 optname = sopt->sopt_name;
2362 optlen = sopt->sopt_valsize;
2363 p = sopt->sopt_p;
2364 uproto = (int)SOCK_PROTO(so);
2365
2366 privileged = (proc_suser(p) == 0);
2367
2368 if (level == IPPROTO_IPV6) {
2369 boolean_t capture_exthdrstat_in = FALSE;
2370 switch (op) {
2371 case SOPT_SET:
2372 mutex_held = socket_getlock(so, PR_F_WILLUNLOCK);
2373 /*
2374 * Wait if we are in the middle of ip6_output
2375 * as we unlocked the socket there and don't
2376 * want to overwrite the IP options
2377 */
2378 if (in6p->inp_sndinprog_cnt > 0) {
2379 in6p->inp_sndingprog_waiters++;
2380
2381 while (in6p->inp_sndinprog_cnt > 0) {
2382 msleep(&in6p->inp_sndinprog_cnt, mutex_held,
2383 PSOCK | PCATCH, "inp_sndinprog_cnt",
2384 NULL);
2385 }
2386 in6p->inp_sndingprog_waiters--;
2387 }
2388 switch (optname) {
2389 case IPV6_2292PKTOPTIONS: {
2390 struct mbuf *m;
2391
2392 error = soopt_getm(sopt, &m);
2393 if (error != 0) {
2394 break;
2395 }
2396 error = soopt_mcopyin(sopt, m);
2397 if (error != 0) {
2398 break;
2399 }
2400 error = ip6_pcbopts(&in6p->in6p_outputopts,
2401 m, so, sopt);
2402 m_freem(m);
2403 break;
2404 }
2405
2406 /*
2407 * Use of some Hop-by-Hop options or some
2408 * Destination options, might require special
2409 * privilege. That is, normal applications
2410 * (without special privilege) might be forbidden
2411 * from setting certain options in outgoing packets,
2412 * and might never see certain options in received
2413 * packets. [RFC 2292 Section 6]
2414 * KAME specific note:
2415 * KAME prevents non-privileged users from sending or
2416 * receiving ANY hbh/dst options in order to avoid
2417 * overhead of parsing options in the kernel.
2418 */
2419 case IPV6_RECVHOPOPTS:
2420 case IPV6_RECVDSTOPTS:
2421 case IPV6_RECVRTHDRDSTOPTS:
2422 if (!privileged) {
2423 break;
2424 }
2425 /* FALLTHROUGH */
2426 case IPV6_UNICAST_HOPS:
2427 case IPV6_HOPLIMIT:
2428 case IPV6_RECVPKTINFO:
2429 case IPV6_RECVHOPLIMIT:
2430 case IPV6_RECVRTHDR:
2431 case IPV6_RECVPATHMTU:
2432 case IPV6_RECVTCLASS:
2433 case IPV6_V6ONLY:
2434 case IPV6_AUTOFLOWLABEL:
2435 if (optlen != sizeof(int)) {
2436 error = EINVAL;
2437 break;
2438 }
2439 error = sooptcopyin(sopt, &optval,
2440 sizeof(optval), sizeof(optval));
2441 if (error) {
2442 break;
2443 }
2444
2445 switch (optname) {
2446 case IPV6_UNICAST_HOPS:
2447 if (optval < -1 || optval >= 256) {
2448 error = EINVAL;
2449 } else {
2450 /* -1 = kernel default */
2451 in6p->in6p_hops = optval;
2452 if (in6p->inp_vflag &
2453 INP_IPV4) {
2454 in6p->inp_ip_ttl =
2455 optval;
2456 }
2457 }
2458 break;
2459 #define OPTSET(bit) do { \
2460 if (optval) \
2461 in6p->inp_flags |= (bit); \
2462 else \
2463 in6p->inp_flags &= ~(bit); \
2464 } while (0)
2465
2466 #define OPTSET2292(bit) do { \
2467 in6p->inp_flags |= IN6P_RFC2292; \
2468 if (optval) \
2469 in6p->inp_flags |= (bit); \
2470 else \
2471 in6p->inp_flags &= ~(bit); \
2472 } while (0)
2473
2474 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
2475
2476 case IPV6_RECVPKTINFO:
2477 /* cannot mix with RFC2292 */
2478 if (OPTBIT(IN6P_RFC2292)) {
2479 error = EINVAL;
2480 break;
2481 }
2482 OPTSET(IN6P_PKTINFO);
2483 break;
2484
2485 case IPV6_HOPLIMIT: {
2486 struct ip6_pktopts **optp;
2487
2488 /* cannot mix with RFC2292 */
2489 if (OPTBIT(IN6P_RFC2292)) {
2490 error = EINVAL;
2491 break;
2492 }
2493 optp = &in6p->in6p_outputopts;
2494 error = ip6_pcbopt(IPV6_HOPLIMIT,
2495 (u_char *)&optval, sizeof(optval),
2496 optp, uproto);
2497 break;
2498 }
2499
2500 case IPV6_RECVHOPLIMIT:
2501 /* cannot mix with RFC2292 */
2502 if (OPTBIT(IN6P_RFC2292)) {
2503 error = EINVAL;
2504 break;
2505 }
2506 OPTSET(IN6P_HOPLIMIT);
2507 break;
2508
2509 case IPV6_RECVHOPOPTS:
2510 /* cannot mix with RFC2292 */
2511 if (OPTBIT(IN6P_RFC2292)) {
2512 error = EINVAL;
2513 break;
2514 }
2515 OPTSET(IN6P_HOPOPTS);
2516 capture_exthdrstat_in = TRUE;
2517 break;
2518
2519 case IPV6_RECVDSTOPTS:
2520 /* cannot mix with RFC2292 */
2521 if (OPTBIT(IN6P_RFC2292)) {
2522 error = EINVAL;
2523 break;
2524 }
2525 OPTSET(IN6P_DSTOPTS);
2526 capture_exthdrstat_in = TRUE;
2527 break;
2528
2529 case IPV6_RECVRTHDRDSTOPTS:
2530 /* cannot mix with RFC2292 */
2531 if (OPTBIT(IN6P_RFC2292)) {
2532 error = EINVAL;
2533 break;
2534 }
2535 OPTSET(IN6P_RTHDRDSTOPTS);
2536 capture_exthdrstat_in = TRUE;
2537 break;
2538
2539 case IPV6_RECVRTHDR:
2540 /* cannot mix with RFC2292 */
2541 if (OPTBIT(IN6P_RFC2292)) {
2542 error = EINVAL;
2543 break;
2544 }
2545 OPTSET(IN6P_RTHDR);
2546 capture_exthdrstat_in = TRUE;
2547 break;
2548
2549 case IPV6_RECVPATHMTU:
2550 /*
2551 * We ignore this option for TCP
2552 * sockets.
2553 * (RFC3542 leaves this case
2554 * unspecified.)
2555 */
2556 if (uproto != IPPROTO_TCP) {
2557 OPTSET(IN6P_MTU);
2558 }
2559 break;
2560
2561 case IPV6_V6ONLY:
2562 /*
2563 * make setsockopt(IPV6_V6ONLY)
2564 * available only prior to bind(2).
2565 * see ipng mailing list, Jun 22 2001.
2566 */
2567 if (in6p->inp_lport ||
2568 !IN6_IS_ADDR_UNSPECIFIED(
2569 &in6p->in6p_laddr)) {
2570 error = EINVAL;
2571 break;
2572 }
2573 OPTSET(IN6P_IPV6_V6ONLY);
2574 if (optval) {
2575 in6p->inp_vflag &= ~INP_IPV4;
2576 } else {
2577 in6p->inp_vflag |= INP_IPV4;
2578 }
2579 break;
2580
2581 case IPV6_RECVTCLASS:
2582 /* we can mix with RFC2292 */
2583 OPTSET(IN6P_TCLASS);
2584 break;
2585
2586 case IPV6_AUTOFLOWLABEL:
2587 OPTSET(IN6P_AUTOFLOWLABEL);
2588 break;
2589 }
2590 break;
2591
2592 case IPV6_TCLASS:
2593 case IPV6_DONTFRAG:
2594 case IPV6_USE_MIN_MTU:
2595 case IPV6_PREFER_TEMPADDR: {
2596 struct ip6_pktopts **optp;
2597
2598 if (optlen != sizeof(optval)) {
2599 error = EINVAL;
2600 break;
2601 }
2602 error = sooptcopyin(sopt, &optval,
2603 sizeof(optval), sizeof(optval));
2604 if (error) {
2605 break;
2606 }
2607
2608 optp = &in6p->in6p_outputopts;
2609 error = ip6_pcbopt(optname, (u_char *)&optval,
2610 sizeof(optval), optp, uproto);
2611
2612 if (optname == IPV6_TCLASS) {
2613 // Add in the ECN flags
2614 u_int8_t tos = (in6p->inp_ip_tos & ~IPTOS_ECN_MASK);
2615 u_int8_t ecn = optval & IPTOS_ECN_MASK;
2616 in6p->inp_ip_tos = tos | ecn;
2617 }
2618 break;
2619 }
2620
2621 case IPV6_2292PKTINFO:
2622 case IPV6_2292HOPLIMIT:
2623 case IPV6_2292HOPOPTS:
2624 case IPV6_2292DSTOPTS:
2625 case IPV6_2292RTHDR:
2626 /* RFC 2292 */
2627 if (optlen != sizeof(int)) {
2628 error = EINVAL;
2629 break;
2630 }
2631 error = sooptcopyin(sopt, &optval,
2632 sizeof(optval), sizeof(optval));
2633 if (error) {
2634 break;
2635 }
2636 switch (optname) {
2637 case IPV6_2292PKTINFO:
2638 OPTSET2292(IN6P_PKTINFO);
2639 break;
2640 case IPV6_2292HOPLIMIT:
2641 OPTSET2292(IN6P_HOPLIMIT);
2642 break;
2643 case IPV6_2292HOPOPTS:
2644 /*
2645 * Check super-user privilege.
2646 * See comments for IPV6_RECVHOPOPTS.
2647 */
2648 if (!privileged) {
2649 return EPERM;
2650 }
2651 OPTSET2292(IN6P_HOPOPTS);
2652 capture_exthdrstat_in = TRUE;
2653 break;
2654 case IPV6_2292DSTOPTS:
2655 if (!privileged) {
2656 return EPERM;
2657 }
2658 OPTSET2292(IN6P_DSTOPTS |
2659 IN6P_RTHDRDSTOPTS); /* XXX */
2660 capture_exthdrstat_in = TRUE;
2661 break;
2662 case IPV6_2292RTHDR:
2663 OPTSET2292(IN6P_RTHDR);
2664 capture_exthdrstat_in = TRUE;
2665 break;
2666 }
2667 break;
2668
2669 case IPV6_3542PKTINFO:
2670 case IPV6_3542HOPOPTS:
2671 case IPV6_3542RTHDR:
2672 case IPV6_3542DSTOPTS:
2673 case IPV6_RTHDRDSTOPTS:
2674 case IPV6_3542NEXTHOP: {
2675 struct ip6_pktopts **optp;
2676 /* new advanced API (RFC3542) */
2677 struct mbuf *m;
2678
2679 /* cannot mix with RFC2292 */
2680 if (OPTBIT(IN6P_RFC2292)) {
2681 error = EINVAL;
2682 break;
2683 }
2684 error = soopt_getm(sopt, &m);
2685 if (error != 0) {
2686 break;
2687 }
2688 error = soopt_mcopyin(sopt, m);
2689 if (error != 0) {
2690 break;
2691 }
2692
2693 optp = &in6p->in6p_outputopts;
2694 error = ip6_pcbopt(optname, mtod(m, u_char *),
2695 m->m_len, optp, uproto);
2696 m_freem(m);
2697 break;
2698 }
2699 #undef OPTSET
2700 case IPV6_MULTICAST_IF:
2701 case IPV6_MULTICAST_HOPS:
2702 case IPV6_MULTICAST_LOOP:
2703 case IPV6_JOIN_GROUP:
2704 case IPV6_LEAVE_GROUP:
2705 case IPV6_MSFILTER:
2706 case MCAST_BLOCK_SOURCE:
2707 case MCAST_UNBLOCK_SOURCE:
2708 case MCAST_JOIN_GROUP:
2709 case MCAST_LEAVE_GROUP:
2710 case MCAST_JOIN_SOURCE_GROUP:
2711 case MCAST_LEAVE_SOURCE_GROUP:
2712 error = ip6_setmoptions(in6p, sopt);
2713 break;
2714
2715 case IPV6_PORTRANGE:
2716 error = sooptcopyin(sopt, &optval,
2717 sizeof(optval), sizeof(optval));
2718 if (error) {
2719 break;
2720 }
2721
2722 switch (optval) {
2723 case IPV6_PORTRANGE_DEFAULT:
2724 in6p->inp_flags &= ~(INP_LOWPORT);
2725 in6p->inp_flags &= ~(INP_HIGHPORT);
2726 break;
2727
2728 case IPV6_PORTRANGE_HIGH:
2729 in6p->inp_flags &= ~(INP_LOWPORT);
2730 in6p->inp_flags |= INP_HIGHPORT;
2731 break;
2732
2733 case IPV6_PORTRANGE_LOW:
2734 in6p->inp_flags &= ~(INP_HIGHPORT);
2735 in6p->inp_flags |= INP_LOWPORT;
2736 break;
2737
2738 default:
2739 error = EINVAL;
2740 break;
2741 }
2742 break;
2743 #if IPSEC
2744 case IPV6_IPSEC_POLICY: {
2745 caddr_t req = NULL;
2746 size_t len = 0;
2747 struct mbuf *m;
2748
2749 if ((error = soopt_getm(sopt, &m)) != 0) {
2750 break;
2751 }
2752 if ((error = soopt_mcopyin(sopt, m)) != 0) {
2753 break;
2754 }
2755
2756 req = mtod(m, caddr_t);
2757 len = m->m_len;
2758 error = ipsec6_set_policy(in6p, optname, req,
2759 len, privileged);
2760 m_freem(m);
2761 break;
2762 }
2763 #endif /* IPSEC */
2764 /*
2765 * IPv6 variant of IP_BOUND_IF; for details see
2766 * comments on IP_BOUND_IF in ip_ctloutput().
2767 */
2768 case IPV6_BOUND_IF:
2769 /* This option is settable only on IPv6 */
2770 if (!(in6p->inp_vflag & INP_IPV6)) {
2771 error = EINVAL;
2772 break;
2773 }
2774
2775 error = sooptcopyin(sopt, &optval,
2776 sizeof(optval), sizeof(optval));
2777
2778 if (error) {
2779 break;
2780 }
2781
2782 error = inp_bindif(in6p, optval, NULL);
2783 break;
2784
2785 case IPV6_NO_IFT_CELLULAR:
2786 /* This option is settable only for IPv6 */
2787 if (!(in6p->inp_vflag & INP_IPV6)) {
2788 error = EINVAL;
2789 break;
2790 }
2791
2792 error = sooptcopyin(sopt, &optval,
2793 sizeof(optval), sizeof(optval));
2794
2795 if (error) {
2796 break;
2797 }
2798
2799 /* once set, it cannot be unset */
2800 if (!optval && INP_NO_CELLULAR(in6p)) {
2801 error = EINVAL;
2802 break;
2803 }
2804
2805 error = so_set_restrictions(so,
2806 SO_RESTRICT_DENY_CELLULAR);
2807 break;
2808
2809 case IPV6_OUT_IF:
2810 /* This option is not settable */
2811 error = EINVAL;
2812 break;
2813
2814 default:
2815 error = ENOPROTOOPT;
2816 break;
2817 }
2818 if (capture_exthdrstat_in) {
2819 if (uproto == IPPROTO_TCP) {
2820 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_in);
2821 } else if (uproto == IPPROTO_UDP) {
2822 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_in);
2823 }
2824 }
2825 break;
2826
2827 case SOPT_GET:
2828 switch (optname) {
2829 case IPV6_2292PKTOPTIONS:
2830 /*
2831 * RFC3542 (effectively) deprecated the
2832 * semantics of the 2292-style pktoptions.
2833 * Since it was not reliable in nature (i.e.,
2834 * applications had to expect the lack of some
2835 * information after all), it would make sense
2836 * to simplify this part by always returning
2837 * empty data.
2838 */
2839 sopt->sopt_valsize = 0;
2840 break;
2841
2842 case IPV6_RECVHOPOPTS:
2843 case IPV6_RECVDSTOPTS:
2844 case IPV6_RECVRTHDRDSTOPTS:
2845 case IPV6_UNICAST_HOPS:
2846 case IPV6_RECVPKTINFO:
2847 case IPV6_RECVHOPLIMIT:
2848 case IPV6_RECVRTHDR:
2849 case IPV6_RECVPATHMTU:
2850 case IPV6_V6ONLY:
2851 case IPV6_PORTRANGE:
2852 case IPV6_RECVTCLASS:
2853 case IPV6_AUTOFLOWLABEL:
2854 switch (optname) {
2855 case IPV6_RECVHOPOPTS:
2856 optval = OPTBIT(IN6P_HOPOPTS);
2857 break;
2858
2859 case IPV6_RECVDSTOPTS:
2860 optval = OPTBIT(IN6P_DSTOPTS);
2861 break;
2862
2863 case IPV6_RECVRTHDRDSTOPTS:
2864 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2865 break;
2866
2867 case IPV6_UNICAST_HOPS:
2868 optval = in6p->in6p_hops;
2869 break;
2870
2871 case IPV6_RECVPKTINFO:
2872 optval = OPTBIT(IN6P_PKTINFO);
2873 break;
2874
2875 case IPV6_RECVHOPLIMIT:
2876 optval = OPTBIT(IN6P_HOPLIMIT);
2877 break;
2878
2879 case IPV6_RECVRTHDR:
2880 optval = OPTBIT(IN6P_RTHDR);
2881 break;
2882
2883 case IPV6_RECVPATHMTU:
2884 optval = OPTBIT(IN6P_MTU);
2885 break;
2886
2887 case IPV6_V6ONLY:
2888 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2889 break;
2890
2891 case IPV6_PORTRANGE: {
2892 int flags;
2893 flags = in6p->inp_flags;
2894 if (flags & INP_HIGHPORT) {
2895 optval = IPV6_PORTRANGE_HIGH;
2896 } else if (flags & INP_LOWPORT) {
2897 optval = IPV6_PORTRANGE_LOW;
2898 } else {
2899 optval = 0;
2900 }
2901 break;
2902 }
2903 case IPV6_RECVTCLASS:
2904 optval = OPTBIT(IN6P_TCLASS);
2905 break;
2906
2907 case IPV6_AUTOFLOWLABEL:
2908 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2909 break;
2910 }
2911 if (error) {
2912 break;
2913 }
2914 error = sooptcopyout(sopt, &optval,
2915 sizeof(optval));
2916 break;
2917
2918 case IPV6_PATHMTU: {
2919 u_int32_t pmtu = 0;
2920 struct ip6_mtuinfo mtuinfo;
2921 struct route_in6 sro;
2922
2923 bzero(&sro, sizeof(sro));
2924
2925 if (!(so->so_state & SS_ISCONNECTED)) {
2926 return ENOTCONN;
2927 }
2928 /*
2929 * XXX: we dot not consider the case of source
2930 * routing, or optional information to specify
2931 * the outgoing interface.
2932 */
2933 error = ip6_getpmtu(&sro, NULL, NULL,
2934 &in6p->in6p_faddr, &pmtu);
2935 ROUTE_RELEASE(&sro);
2936 if (error) {
2937 break;
2938 }
2939 if (pmtu > IPV6_MAXPACKET) {
2940 pmtu = IPV6_MAXPACKET;
2941 }
2942
2943 bzero(&mtuinfo, sizeof(mtuinfo));
2944 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2945 optdata = (void *)&mtuinfo;
2946 optdatalen = sizeof(mtuinfo);
2947 error = sooptcopyout(sopt, optdata,
2948 optdatalen);
2949 break;
2950 }
2951
2952 case IPV6_2292PKTINFO:
2953 case IPV6_2292HOPLIMIT:
2954 case IPV6_2292HOPOPTS:
2955 case IPV6_2292RTHDR:
2956 case IPV6_2292DSTOPTS:
2957 switch (optname) {
2958 case IPV6_2292PKTINFO:
2959 optval = OPTBIT(IN6P_PKTINFO);
2960 break;
2961 case IPV6_2292HOPLIMIT:
2962 optval = OPTBIT(IN6P_HOPLIMIT);
2963 break;
2964 case IPV6_2292HOPOPTS:
2965 optval = OPTBIT(IN6P_HOPOPTS);
2966 break;
2967 case IPV6_2292RTHDR:
2968 optval = OPTBIT(IN6P_RTHDR);
2969 break;
2970 case IPV6_2292DSTOPTS:
2971 optval = OPTBIT(IN6P_DSTOPTS |
2972 IN6P_RTHDRDSTOPTS);
2973 break;
2974 }
2975 error = sooptcopyout(sopt, &optval,
2976 sizeof(optval));
2977 break;
2978
2979 case IPV6_PKTINFO:
2980 case IPV6_HOPOPTS:
2981 case IPV6_RTHDR:
2982 case IPV6_DSTOPTS:
2983 case IPV6_RTHDRDSTOPTS:
2984 case IPV6_NEXTHOP:
2985 case IPV6_TCLASS:
2986 case IPV6_DONTFRAG:
2987 case IPV6_USE_MIN_MTU:
2988 case IPV6_PREFER_TEMPADDR:
2989 error = ip6_getpcbopt(in6p->in6p_outputopts,
2990 optname, sopt);
2991 break;
2992
2993 case IPV6_MULTICAST_IF:
2994 case IPV6_MULTICAST_HOPS:
2995 case IPV6_MULTICAST_LOOP:
2996 case IPV6_MSFILTER:
2997 error = ip6_getmoptions(in6p, sopt);
2998 break;
2999 #if IPSEC
3000 case IPV6_IPSEC_POLICY: {
3001 error = 0; /* This option is no longer supported */
3002 break;
3003 }
3004 #endif /* IPSEC */
3005 case IPV6_BOUND_IF:
3006 if (in6p->inp_flags & INP_BOUND_IF) {
3007 optval = in6p->inp_boundifp->if_index;
3008 }
3009 error = sooptcopyout(sopt, &optval,
3010 sizeof(optval));
3011 break;
3012
3013 case IPV6_NO_IFT_CELLULAR:
3014 optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
3015 error = sooptcopyout(sopt, &optval,
3016 sizeof(optval));
3017 break;
3018
3019 case IPV6_OUT_IF:
3020 optval = (in6p->in6p_last_outifp != NULL) ?
3021 in6p->in6p_last_outifp->if_index : 0;
3022 error = sooptcopyout(sopt, &optval,
3023 sizeof(optval));
3024 break;
3025
3026 default:
3027 error = ENOPROTOOPT;
3028 break;
3029 }
3030 break;
3031 }
3032 } else if (level == IPPROTO_UDP) {
3033 error = udp_ctloutput(so, sopt);
3034 } else {
3035 error = EINVAL;
3036 }
3037 return error;
3038 }
3039
3040 int
3041 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
3042 {
3043 int error = 0, optval, optlen;
3044 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
3045 struct inpcb *in6p = sotoinpcb(so);
3046 int level, op, optname;
3047
3048 level = sopt->sopt_level;
3049 op = sopt->sopt_dir;
3050 optname = sopt->sopt_name;
3051 optlen = sopt->sopt_valsize;
3052
3053 if (level != IPPROTO_IPV6) {
3054 return EINVAL;
3055 }
3056
3057 switch (optname) {
3058 case IPV6_CHECKSUM:
3059 /*
3060 * For ICMPv6 sockets, no modification allowed for checksum
3061 * offset, permit "no change" values to help existing apps.
3062 *
3063 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
3064 * for an ICMPv6 socket will fail."
3065 * The current behavior does not meet RFC3542.
3066 */
3067 switch (op) {
3068 case SOPT_SET:
3069 if (optlen != sizeof(int)) {
3070 error = EINVAL;
3071 break;
3072 }
3073 error = sooptcopyin(sopt, &optval, sizeof(optval),
3074 sizeof(optval));
3075 if (error) {
3076 break;
3077 }
3078 if ((optval % 2) != 0) {
3079 /* the API assumes even offset values */
3080 error = EINVAL;
3081 } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
3082 if (optval != icmp6off) {
3083 error = EINVAL;
3084 }
3085 } else {
3086 in6p->in6p_cksum = optval;
3087 }
3088 break;
3089
3090 case SOPT_GET:
3091 if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
3092 optval = icmp6off;
3093 } else {
3094 optval = in6p->in6p_cksum;
3095 }
3096
3097 error = sooptcopyout(sopt, &optval, sizeof(optval));
3098 break;
3099
3100 default:
3101 error = EINVAL;
3102 break;
3103 }
3104 break;
3105
3106 default:
3107 error = ENOPROTOOPT;
3108 break;
3109 }
3110
3111 return error;
3112 }
3113
3114 /*
3115 * Set up IP6 options in pcb for insertion in output packets or
3116 * specifying behavior of outgoing packets.
3117 */
3118 static int
3119 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
3120 struct sockopt *sopt)
3121 {
3122 #pragma unused(sopt)
3123 struct ip6_pktopts *opt = *pktopt;
3124 int error = 0;
3125
3126 /* turn off any old options. */
3127 if (opt != NULL) {
3128 #if DIAGNOSTIC
3129 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
3130 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
3131 opt->ip6po_rhinfo.ip6po_rhi_rthdr) {
3132 printf("%s: all specified options are cleared.\n",
3133 __func__);
3134 }
3135 #endif
3136 ip6_clearpktopts(opt, -1);
3137 } else {
3138 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
3139 if (opt == NULL) {
3140 return ENOBUFS;
3141 }
3142 }
3143 *pktopt = NULL;
3144
3145 if (m == NULL || m->m_len == 0) {
3146 /*
3147 * Only turning off any previous options, regardless of
3148 * whether the opt is just created or given.
3149 */
3150 if (opt != NULL) {
3151 FREE(opt, M_IP6OPT);
3152 }
3153 return 0;
3154 }
3155
3156 /* set options specified by user. */
3157 if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) {
3158 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
3159 FREE(opt, M_IP6OPT);
3160 return error;
3161 }
3162 *pktopt = opt;
3163 return 0;
3164 }
3165
3166 /*
3167 * initialize ip6_pktopts. beware that there are non-zero default values in
3168 * the struct.
3169 */
3170 void
3171 ip6_initpktopts(struct ip6_pktopts *opt)
3172 {
3173 bzero(opt, sizeof(*opt));
3174 opt->ip6po_hlim = -1; /* -1 means default hop limit */
3175 opt->ip6po_tclass = -1; /* -1 means default traffic class */
3176 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
3177 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
3178 }
3179
3180 static int
3181 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
3182 int uproto)
3183 {
3184 struct ip6_pktopts *opt;
3185
3186 opt = *pktopt;
3187 if (opt == NULL) {
3188 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
3189 if (opt == NULL) {
3190 return ENOBUFS;
3191 }
3192 ip6_initpktopts(opt);
3193 *pktopt = opt;
3194 }
3195
3196 return ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto);
3197 }
3198
3199 static int
3200 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
3201 {
3202 void *optdata = NULL;
3203 int optdatalen = 0;
3204 struct ip6_ext *ip6e;
3205 struct in6_pktinfo null_pktinfo;
3206 int deftclass = 0, on;
3207 int defminmtu = IP6PO_MINMTU_MCASTONLY;
3208 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
3209
3210
3211 switch (optname) {
3212 case IPV6_PKTINFO:
3213 if (pktopt && pktopt->ip6po_pktinfo) {
3214 optdata = (void *)pktopt->ip6po_pktinfo;
3215 } else {
3216 /* XXX: we don't have to do this every time... */
3217 bzero(&null_pktinfo, sizeof(null_pktinfo));
3218 optdata = (void *)&null_pktinfo;
3219 }
3220 optdatalen = sizeof(struct in6_pktinfo);
3221 break;
3222
3223 case IPV6_TCLASS:
3224 if (pktopt && pktopt->ip6po_tclass >= 0) {
3225 optdata = (void *)&pktopt->ip6po_tclass;
3226 } else {
3227 optdata = (void *)&deftclass;
3228 }
3229 optdatalen = sizeof(int);
3230 break;
3231
3232 case IPV6_HOPOPTS:
3233 if (pktopt && pktopt->ip6po_hbh) {
3234 optdata = (void *)pktopt->ip6po_hbh;
3235 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
3236 optdatalen = (ip6e->ip6e_len + 1) << 3;
3237 }
3238 break;
3239
3240 case IPV6_RTHDR:
3241 if (pktopt && pktopt->ip6po_rthdr) {
3242 optdata = (void *)pktopt->ip6po_rthdr;
3243 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
3244 optdatalen = (ip6e->ip6e_len + 1) << 3;
3245 }
3246 break;
3247
3248 case IPV6_RTHDRDSTOPTS:
3249 if (pktopt && pktopt->ip6po_dest1) {
3250 optdata = (void *)pktopt->ip6po_dest1;
3251 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
3252 optdatalen = (ip6e->ip6e_len + 1) << 3;
3253 }
3254 break;
3255
3256 case IPV6_DSTOPTS:
3257 if (pktopt && pktopt->ip6po_dest2) {
3258 optdata = (void *)pktopt->ip6po_dest2;
3259 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
3260 optdatalen = (ip6e->ip6e_len + 1) << 3;
3261 }
3262 break;
3263
3264 case IPV6_NEXTHOP:
3265 if (pktopt && pktopt->ip6po_nexthop) {
3266 optdata = (void *)pktopt->ip6po_nexthop;
3267 optdatalen = pktopt->ip6po_nexthop->sa_len;
3268 }
3269 break;
3270
3271 case IPV6_USE_MIN_MTU:
3272 if (pktopt) {
3273 optdata = (void *)&pktopt->ip6po_minmtu;
3274 } else {
3275 optdata = (void *)&defminmtu;
3276 }
3277 optdatalen = sizeof(int);
3278 break;
3279
3280 case IPV6_DONTFRAG:
3281 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) {
3282 on = 1;
3283 } else {
3284 on = 0;
3285 }
3286 optdata = (void *)&on;
3287 optdatalen = sizeof(on);
3288 break;
3289
3290 case IPV6_PREFER_TEMPADDR:
3291 if (pktopt) {
3292 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
3293 } else {
3294 optdata = (void *)&defpreftemp;
3295 }
3296 optdatalen = sizeof(int);
3297 break;
3298
3299 default: /* should not happen */
3300 #ifdef DIAGNOSTIC
3301 panic("ip6_getpcbopt: unexpected option\n");
3302 #endif
3303 return ENOPROTOOPT;
3304 }
3305
3306 return sooptcopyout(sopt, optdata, optdatalen);
3307 }
3308
3309 void
3310 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
3311 {
3312 if (pktopt == NULL) {
3313 return;
3314 }
3315
3316 if (optname == -1 || optname == IPV6_PKTINFO) {
3317 if (pktopt->ip6po_pktinfo) {
3318 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
3319 }
3320 pktopt->ip6po_pktinfo = NULL;
3321 }
3322 if (optname == -1 || optname == IPV6_HOPLIMIT) {
3323 pktopt->ip6po_hlim = -1;
3324 }
3325 if (optname == -1 || optname == IPV6_TCLASS) {
3326 pktopt->ip6po_tclass = -1;
3327 }
3328 if (optname == -1 || optname == IPV6_NEXTHOP) {
3329 ROUTE_RELEASE(&pktopt->ip6po_nextroute);
3330 if (pktopt->ip6po_nexthop) {
3331 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
3332 }
3333 pktopt->ip6po_nexthop = NULL;
3334 }
3335 if (optname == -1 || optname == IPV6_HOPOPTS) {
3336 if (pktopt->ip6po_hbh) {
3337 FREE(pktopt->ip6po_hbh, M_IP6OPT);
3338 }
3339 pktopt->ip6po_hbh = NULL;
3340 }
3341 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
3342 if (pktopt->ip6po_dest1) {
3343 FREE(pktopt->ip6po_dest1, M_IP6OPT);
3344 }
3345 pktopt->ip6po_dest1 = NULL;
3346 }
3347 if (optname == -1 || optname == IPV6_RTHDR) {
3348 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) {
3349 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
3350 }
3351 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
3352 ROUTE_RELEASE(&pktopt->ip6po_route);
3353 }
3354 if (optname == -1 || optname == IPV6_DSTOPTS) {
3355 if (pktopt->ip6po_dest2) {
3356 FREE(pktopt->ip6po_dest2, M_IP6OPT);
3357 }
3358 pktopt->ip6po_dest2 = NULL;
3359 }
3360 }
3361
3362 #define PKTOPT_EXTHDRCPY(type) do { \
3363 if (src->type) { \
3364 int hlen = \
3365 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \
3366 dst->type = _MALLOC(hlen, M_IP6OPT, canwait); \
3367 if (dst->type == NULL && canwait == M_NOWAIT) \
3368 goto bad; \
3369 bcopy(src->type, dst->type, hlen); \
3370 } \
3371 } while (0)
3372
3373 static int
3374 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
3375 {
3376 if (dst == NULL || src == NULL) {
3377 printf("copypktopts: invalid argument\n");
3378 return EINVAL;
3379 }
3380
3381 dst->ip6po_hlim = src->ip6po_hlim;
3382 dst->ip6po_tclass = src->ip6po_tclass;
3383 dst->ip6po_flags = src->ip6po_flags;
3384 if (src->ip6po_pktinfo) {
3385 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
3386 M_IP6OPT, canwait);
3387 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) {
3388 goto bad;
3389 }
3390 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3391 }
3392 if (src->ip6po_nexthop) {
3393 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
3394 M_IP6OPT, canwait);
3395 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) {
3396 goto bad;
3397 }
3398 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
3399 src->ip6po_nexthop->sa_len);
3400 }
3401 PKTOPT_EXTHDRCPY(ip6po_hbh);
3402 PKTOPT_EXTHDRCPY(ip6po_dest1);
3403 PKTOPT_EXTHDRCPY(ip6po_dest2);
3404 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
3405 return 0;
3406
3407 bad:
3408 ip6_clearpktopts(dst, -1);
3409 return ENOBUFS;
3410 }
3411 #undef PKTOPT_EXTHDRCPY
3412
3413 struct ip6_pktopts *
3414 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
3415 {
3416 int error;
3417 struct ip6_pktopts *dst;
3418
3419 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
3420 if (dst == NULL) {
3421 return NULL;
3422 }
3423 ip6_initpktopts(dst);
3424
3425 if ((error = copypktopts(dst, src, canwait)) != 0) {
3426 FREE(dst, M_IP6OPT);
3427 return NULL;
3428 }
3429
3430 return dst;
3431 }
3432
3433 void
3434 ip6_freepcbopts(struct ip6_pktopts *pktopt)
3435 {
3436 if (pktopt == NULL) {
3437 return;
3438 }
3439
3440 ip6_clearpktopts(pktopt, -1);
3441
3442 FREE(pktopt, M_IP6OPT);
3443 }
3444
3445 void
3446 ip6_moptions_init(void)
3447 {
3448 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof(im6o_debug));
3449
3450 im6o_size = (im6o_debug == 0) ? sizeof(struct ip6_moptions) :
3451 sizeof(struct ip6_moptions_dbg);
3452
3453 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
3454 IM6O_ZONE_NAME);
3455 if (im6o_zone == NULL) {
3456 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
3457 /* NOTREACHED */
3458 }
3459 zone_change(im6o_zone, Z_EXPAND, TRUE);
3460 }
3461
3462 void
3463 im6o_addref(struct ip6_moptions *im6o, int locked)
3464 {
3465 if (!locked) {
3466 IM6O_LOCK(im6o);
3467 } else {
3468 IM6O_LOCK_ASSERT_HELD(im6o);
3469 }
3470
3471 if (++im6o->im6o_refcnt == 0) {
3472 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
3473 /* NOTREACHED */
3474 } else if (im6o->im6o_trace != NULL) {
3475 (*im6o->im6o_trace)(im6o, TRUE);
3476 }
3477
3478 if (!locked) {
3479 IM6O_UNLOCK(im6o);
3480 }
3481 }
3482
3483 void
3484 im6o_remref(struct ip6_moptions *im6o)
3485 {
3486 int i;
3487
3488 IM6O_LOCK(im6o);
3489 if (im6o->im6o_refcnt == 0) {
3490 panic("%s: im6o %p negative refcnt", __func__, im6o);
3491 /* NOTREACHED */
3492 } else if (im6o->im6o_trace != NULL) {
3493 (*im6o->im6o_trace)(im6o, FALSE);
3494 }
3495
3496 --im6o->im6o_refcnt;
3497 if (im6o->im6o_refcnt > 0) {
3498 IM6O_UNLOCK(im6o);
3499 return;
3500 }
3501
3502 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3503 struct in6_mfilter *imf;
3504
3505 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
3506 if (imf != NULL) {
3507 im6f_leave(imf);
3508 }
3509
3510 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
3511
3512 if (imf != NULL) {
3513 im6f_purge(imf);
3514 }
3515
3516 IN6M_REMREF(im6o->im6o_membership[i]);
3517 im6o->im6o_membership[i] = NULL;
3518 }
3519 im6o->im6o_num_memberships = 0;
3520 if (im6o->im6o_mfilters != NULL) {
3521 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
3522 im6o->im6o_mfilters = NULL;
3523 }
3524 if (im6o->im6o_membership != NULL) {
3525 FREE(im6o->im6o_membership, M_IP6MOPTS);
3526 im6o->im6o_membership = NULL;
3527 }
3528 IM6O_UNLOCK(im6o);
3529
3530 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
3531
3532 if (!(im6o->im6o_debug & IFD_ALLOC)) {
3533 panic("%s: im6o %p cannot be freed", __func__, im6o);
3534 /* NOTREACHED */
3535 }
3536 zfree(im6o_zone, im6o);
3537 }
3538
3539 static void
3540 im6o_trace(struct ip6_moptions *im6o, int refhold)
3541 {
3542 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3543 ctrace_t *tr;
3544 u_int32_t idx;
3545 u_int16_t *cnt;
3546
3547 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3548 panic("%s: im6o %p has no debug structure", __func__, im6o);
3549 /* NOTREACHED */
3550 }
3551 if (refhold) {
3552 cnt = &im6o_dbg->im6o_refhold_cnt;
3553 tr = im6o_dbg->im6o_refhold;
3554 } else {
3555 cnt = &im6o_dbg->im6o_refrele_cnt;
3556 tr = im6o_dbg->im6o_refrele;
3557 }
3558
3559 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3560 ctrace_record(&tr[idx]);
3561 }
3562
3563 struct ip6_moptions *
3564 ip6_allocmoptions(int how)
3565 {
3566 struct ip6_moptions *im6o;
3567
3568 im6o = (how == M_WAITOK) ?
3569 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3570 if (im6o != NULL) {
3571 bzero(im6o, im6o_size);
3572 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3573 im6o->im6o_debug |= IFD_ALLOC;
3574 if (im6o_debug != 0) {
3575 im6o->im6o_debug |= IFD_DEBUG;
3576 im6o->im6o_trace = im6o_trace;
3577 }
3578 IM6O_ADDREF(im6o);
3579 }
3580
3581 return im6o;
3582 }
3583
3584 /*
3585 * Set IPv6 outgoing packet options based on advanced API.
3586 */
3587 int
3588 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3589 struct ip6_pktopts *stickyopt, int uproto)
3590 {
3591 struct cmsghdr *cm = NULL;
3592
3593 if (control == NULL || opt == NULL) {
3594 return EINVAL;
3595 }
3596
3597 ip6_initpktopts(opt);
3598 if (stickyopt) {
3599 int error;
3600
3601 /*
3602 * If stickyopt is provided, make a local copy of the options
3603 * for this particular packet, then override them by ancillary
3604 * objects.
3605 * XXX: copypktopts() does not copy the cached route to a next
3606 * hop (if any). This is not very good in terms of efficiency,
3607 * but we can allow this since this option should be rarely
3608 * used.
3609 */
3610 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) {
3611 return error;
3612 }
3613 }
3614
3615 /*
3616 * XXX: Currently, we assume all the optional information is stored
3617 * in a single mbuf.
3618 */
3619 if (control->m_next) {
3620 return EINVAL;
3621 }
3622
3623 if (control->m_len < CMSG_LEN(0)) {
3624 return EINVAL;
3625 }
3626
3627 for (cm = M_FIRST_CMSGHDR(control);
3628 is_cmsg_valid(control, cm);
3629 cm = M_NXT_CMSGHDR(control, cm)) {
3630 int error;
3631
3632 if (cm->cmsg_level != IPPROTO_IPV6) {
3633 continue;
3634 }
3635
3636 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3637 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3638 if (error) {
3639 return error;
3640 }
3641 }
3642
3643 return 0;
3644 }
3645 /*
3646 * Set a particular packet option, as a sticky option or an ancillary data
3647 * item. "len" can be 0 only when it's a sticky option.
3648 * We have 4 cases of combination of "sticky" and "cmsg":
3649 * "sticky=0, cmsg=0": impossible
3650 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3651 * "sticky=1, cmsg=0": RFC3542 socket option
3652 * "sticky=1, cmsg=1": RFC2292 socket option
3653 */
3654 static int
3655 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3656 int sticky, int cmsg, int uproto)
3657 {
3658 int minmtupolicy, preftemp;
3659 int error;
3660 boolean_t capture_exthdrstat_out = FALSE;
3661
3662 if (!sticky && !cmsg) {
3663 #ifdef DIAGNOSTIC
3664 printf("ip6_setpktopt: impossible case\n");
3665 #endif
3666 return EINVAL;
3667 }
3668
3669 /*
3670 * Caller must have ensured that the buffer is at least
3671 * aligned on 32-bit boundary.
3672 */
3673 VERIFY(IS_P2ALIGNED(buf, sizeof(u_int32_t)));
3674
3675 /*
3676 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3677 * not be specified in the context of RFC3542. Conversely,
3678 * RFC3542 types should not be specified in the context of RFC2292.
3679 */
3680 if (!cmsg) {
3681 switch (optname) {
3682 case IPV6_2292PKTINFO:
3683 case IPV6_2292HOPLIMIT:
3684 case IPV6_2292NEXTHOP:
3685 case IPV6_2292HOPOPTS:
3686 case IPV6_2292DSTOPTS:
3687 case IPV6_2292RTHDR:
3688 case IPV6_2292PKTOPTIONS:
3689 return ENOPROTOOPT;
3690 }
3691 }
3692 if (sticky && cmsg) {
3693 switch (optname) {
3694 case IPV6_PKTINFO:
3695 case IPV6_HOPLIMIT:
3696 case IPV6_NEXTHOP:
3697 case IPV6_HOPOPTS:
3698 case IPV6_DSTOPTS:
3699 case IPV6_RTHDRDSTOPTS:
3700 case IPV6_RTHDR:
3701 case IPV6_USE_MIN_MTU:
3702 case IPV6_DONTFRAG:
3703 case IPV6_TCLASS:
3704 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3705 return ENOPROTOOPT;
3706 }
3707 }
3708
3709 switch (optname) {
3710 case IPV6_2292PKTINFO:
3711 case IPV6_PKTINFO: {
3712 struct ifnet *ifp = NULL;
3713 struct in6_pktinfo *pktinfo;
3714
3715 if (len != sizeof(struct in6_pktinfo)) {
3716 return EINVAL;
3717 }
3718
3719 pktinfo = (struct in6_pktinfo *)(void *)buf;
3720
3721 /*
3722 * An application can clear any sticky IPV6_PKTINFO option by
3723 * doing a "regular" setsockopt with ipi6_addr being
3724 * in6addr_any and ipi6_ifindex being zero.
3725 * [RFC 3542, Section 6]
3726 */
3727 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3728 pktinfo->ipi6_ifindex == 0 &&
3729 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3730 ip6_clearpktopts(opt, optname);
3731 break;
3732 }
3733
3734 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3735 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3736 return EINVAL;
3737 }
3738
3739 /* validate the interface index if specified. */
3740 ifnet_head_lock_shared();
3741
3742 if (pktinfo->ipi6_ifindex > if_index) {
3743 ifnet_head_done();
3744 return ENXIO;
3745 }
3746
3747 if (pktinfo->ipi6_ifindex) {
3748 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3749 if (ifp == NULL) {
3750 ifnet_head_done();
3751 return ENXIO;
3752 }
3753 }
3754
3755 ifnet_head_done();
3756
3757 /*
3758 * We store the address anyway, and let in6_selectsrc()
3759 * validate the specified address. This is because ipi6_addr
3760 * may not have enough information about its scope zone, and
3761 * we may need additional information (such as outgoing
3762 * interface or the scope zone of a destination address) to
3763 * disambiguate the scope.
3764 * XXX: the delay of the validation may confuse the
3765 * application when it is used as a sticky option.
3766 */
3767 if (opt->ip6po_pktinfo == NULL) {
3768 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
3769 M_IP6OPT, M_NOWAIT);
3770 if (opt->ip6po_pktinfo == NULL) {
3771 return ENOBUFS;
3772 }
3773 }
3774 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3775 break;
3776 }
3777
3778 case IPV6_2292HOPLIMIT:
3779 case IPV6_HOPLIMIT: {
3780 int *hlimp;
3781
3782 /*
3783 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3784 * to simplify the ordering among hoplimit options.
3785 */
3786 if (optname == IPV6_HOPLIMIT && sticky) {
3787 return ENOPROTOOPT;
3788 }
3789
3790 if (len != sizeof(int)) {
3791 return EINVAL;
3792 }
3793 hlimp = (int *)(void *)buf;
3794 if (*hlimp < -1 || *hlimp > IPV6_MAXHLIM) {
3795 return EINVAL;
3796 }
3797
3798 opt->ip6po_hlim = *hlimp;
3799 break;
3800 }
3801
3802 case IPV6_TCLASS: {
3803 int tclass;
3804
3805 if (len != sizeof(int)) {
3806 return EINVAL;
3807 }
3808 tclass = *(int *)(void *)buf;
3809 if (tclass < -1 || tclass > 255) {
3810 return EINVAL;
3811 }
3812
3813 opt->ip6po_tclass = tclass;
3814 break;
3815 }
3816
3817 case IPV6_2292NEXTHOP:
3818 case IPV6_NEXTHOP:
3819 error = suser(kauth_cred_get(), 0);
3820 if (error) {
3821 return EACCES;
3822 }
3823
3824 if (len == 0) { /* just remove the option */
3825 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3826 break;
3827 }
3828
3829 /* check if cmsg_len is large enough for sa_len */
3830 if (len < sizeof(struct sockaddr) || len < *buf) {
3831 return EINVAL;
3832 }
3833
3834 switch (SA(buf)->sa_family) {
3835 case AF_INET6: {
3836 struct sockaddr_in6 *sa6 = SIN6(buf);
3837
3838 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) {
3839 return EINVAL;
3840 }
3841
3842 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3843 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3844 return EINVAL;
3845 }
3846 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3847 != 0) {
3848 return error;
3849 }
3850 break;
3851 }
3852 case AF_LINK: /* should eventually be supported */
3853 default:
3854 return EAFNOSUPPORT;
3855 }
3856
3857 /* turn off the previous option, then set the new option. */
3858 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3859 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3860 if (opt->ip6po_nexthop == NULL) {
3861 return ENOBUFS;
3862 }
3863 bcopy(buf, opt->ip6po_nexthop, *buf);
3864 break;
3865
3866 case IPV6_2292HOPOPTS:
3867 case IPV6_HOPOPTS: {
3868 struct ip6_hbh *hbh;
3869 int hbhlen;
3870
3871 /*
3872 * XXX: We don't allow a non-privileged user to set ANY HbH
3873 * options, since per-option restriction has too much
3874 * overhead.
3875 */
3876 error = suser(kauth_cred_get(), 0);
3877 if (error) {
3878 return EACCES;
3879 }
3880
3881 if (len == 0) {
3882 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3883 break; /* just remove the option */
3884 }
3885
3886 /* message length validation */
3887 if (len < sizeof(struct ip6_hbh)) {
3888 return EINVAL;
3889 }
3890 hbh = (struct ip6_hbh *)(void *)buf;
3891 hbhlen = (hbh->ip6h_len + 1) << 3;
3892 if (len != hbhlen) {
3893 return EINVAL;
3894 }
3895
3896 /* turn off the previous option, then set the new option. */
3897 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3898 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3899 if (opt->ip6po_hbh == NULL) {
3900 return ENOBUFS;
3901 }
3902 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3903 capture_exthdrstat_out = TRUE;
3904 break;
3905 }
3906
3907 case IPV6_2292DSTOPTS:
3908 case IPV6_DSTOPTS:
3909 case IPV6_RTHDRDSTOPTS: {
3910 struct ip6_dest *dest, **newdest = NULL;
3911 int destlen;
3912
3913 error = suser(kauth_cred_get(), 0);
3914 if (error) {
3915 return EACCES;
3916 }
3917
3918 if (len == 0) {
3919 ip6_clearpktopts(opt, optname);
3920 break; /* just remove the option */
3921 }
3922
3923 /* message length validation */
3924 if (len < sizeof(struct ip6_dest)) {
3925 return EINVAL;
3926 }
3927 dest = (struct ip6_dest *)(void *)buf;
3928 destlen = (dest->ip6d_len + 1) << 3;
3929 if (len != destlen) {
3930 return EINVAL;
3931 }
3932
3933 /*
3934 * Determine the position that the destination options header
3935 * should be inserted; before or after the routing header.
3936 */
3937 switch (optname) {
3938 case IPV6_2292DSTOPTS:
3939 /*
3940 * The old advacned API is ambiguous on this point.
3941 * Our approach is to determine the position based
3942 * according to the existence of a routing header.
3943 * Note, however, that this depends on the order of the
3944 * extension headers in the ancillary data; the 1st
3945 * part of the destination options header must appear
3946 * before the routing header in the ancillary data,
3947 * too.
3948 * RFC3542 solved the ambiguity by introducing
3949 * separate ancillary data or option types.
3950 */
3951 if (opt->ip6po_rthdr == NULL) {
3952 newdest = &opt->ip6po_dest1;
3953 } else {
3954 newdest = &opt->ip6po_dest2;
3955 }
3956 break;
3957 case IPV6_RTHDRDSTOPTS:
3958 newdest = &opt->ip6po_dest1;
3959 break;
3960 case IPV6_DSTOPTS:
3961 newdest = &opt->ip6po_dest2;
3962 break;
3963 }
3964
3965 /* turn off the previous option, then set the new option. */
3966 ip6_clearpktopts(opt, optname);
3967 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3968 if (*newdest == NULL) {
3969 return ENOBUFS;
3970 }
3971 bcopy(dest, *newdest, destlen);
3972 capture_exthdrstat_out = TRUE;
3973 break;
3974 }
3975
3976 case IPV6_2292RTHDR:
3977 case IPV6_RTHDR: {
3978 struct ip6_rthdr *rth;
3979 int rthlen;
3980
3981 if (len == 0) {
3982 ip6_clearpktopts(opt, IPV6_RTHDR);
3983 break; /* just remove the option */
3984 }
3985
3986 /* message length validation */
3987 if (len < sizeof(struct ip6_rthdr)) {
3988 return EINVAL;
3989 }
3990 rth = (struct ip6_rthdr *)(void *)buf;
3991 rthlen = (rth->ip6r_len + 1) << 3;
3992 if (len != rthlen) {
3993 return EINVAL;
3994 }
3995
3996 switch (rth->ip6r_type) {
3997 case IPV6_RTHDR_TYPE_0:
3998 if (rth->ip6r_len == 0) { /* must contain one addr */
3999 return EINVAL;
4000 }
4001 if (rth->ip6r_len % 2) { /* length must be even */
4002 return EINVAL;
4003 }
4004 if (rth->ip6r_len / 2 != rth->ip6r_segleft) {
4005 return EINVAL;
4006 }
4007 break;
4008 default:
4009 return EINVAL; /* not supported */
4010 }
4011
4012 /* turn off the previous option */
4013 ip6_clearpktopts(opt, IPV6_RTHDR);
4014 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
4015 if (opt->ip6po_rthdr == NULL) {
4016 return ENOBUFS;
4017 }
4018 bcopy(rth, opt->ip6po_rthdr, rthlen);
4019 capture_exthdrstat_out = TRUE;
4020 break;
4021 }
4022
4023 case IPV6_USE_MIN_MTU:
4024 if (len != sizeof(int)) {
4025 return EINVAL;
4026 }
4027 minmtupolicy = *(int *)(void *)buf;
4028 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
4029 minmtupolicy != IP6PO_MINMTU_DISABLE &&
4030 minmtupolicy != IP6PO_MINMTU_ALL) {
4031 return EINVAL;
4032 }
4033 opt->ip6po_minmtu = minmtupolicy;
4034 break;
4035
4036 case IPV6_DONTFRAG:
4037 if (len != sizeof(int)) {
4038 return EINVAL;
4039 }
4040
4041 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
4042 /*
4043 * we ignore this option for TCP sockets.
4044 * (RFC3542 leaves this case unspecified.)
4045 */
4046 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
4047 } else {
4048 opt->ip6po_flags |= IP6PO_DONTFRAG;
4049 }
4050 break;
4051
4052 case IPV6_PREFER_TEMPADDR:
4053 if (len != sizeof(int)) {
4054 return EINVAL;
4055 }
4056 preftemp = *(int *)(void *)buf;
4057 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
4058 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
4059 preftemp != IP6PO_TEMPADDR_PREFER) {
4060 return EINVAL;
4061 }
4062 opt->ip6po_prefer_tempaddr = preftemp;
4063 break;
4064
4065 default:
4066 return ENOPROTOOPT;
4067 } /* end of switch */
4068
4069 if (capture_exthdrstat_out) {
4070 if (uproto == IPPROTO_TCP) {
4071 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_out);
4072 } else if (uproto == IPPROTO_UDP) {
4073 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_out);
4074 }
4075 }
4076
4077 return 0;
4078 }
4079
4080 /*
4081 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
4082 * packet to the input queue of a specified interface. Note that this
4083 * calls the output routine of the loopback "driver", but with an interface
4084 * pointer that might NOT be &loif -- easier than replicating that code here.
4085 */
4086 void
4087 ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
4088 struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
4089 {
4090 struct mbuf *copym;
4091 struct ip6_hdr *ip6;
4092 struct in6_addr src;
4093
4094 if (lo_ifp == NULL) {
4095 return;
4096 }
4097
4098 /*
4099 * Copy the packet header as it's needed for the checksum.
4100 * Make sure to deep-copy IPv6 header portion in case the data
4101 * is in an mbuf cluster, so that we can safely override the IPv6
4102 * header portion later.
4103 */
4104 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
4105 if (copym != NULL && ((copym->m_flags & M_EXT) ||
4106 copym->m_len < sizeof(struct ip6_hdr))) {
4107 copym = m_pullup(copym, sizeof(struct ip6_hdr));
4108 }
4109
4110 if (copym == NULL) {
4111 return;
4112 }
4113
4114 ip6 = mtod(copym, struct ip6_hdr *);
4115 src = ip6->ip6_src;
4116 /*
4117 * clear embedded scope identifiers if necessary.
4118 * in6_clearscope will touch the addresses only when necessary.
4119 */
4120 in6_clearscope(&ip6->ip6_src);
4121 in6_clearscope(&ip6->ip6_dst);
4122
4123 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
4124 in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
4125 }
4126
4127 /*
4128 * Stuff the 'real' ifp into the pkthdr, to be used in matching
4129 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
4130 * to make the loopback driver compliant with the data link
4131 * requirements.
4132 */
4133 copym->m_pkthdr.rcvif = origifp;
4134
4135 /*
4136 * Also record the source interface (which owns the source address).
4137 * This is basically a stripped down version of ifa_foraddr6().
4138 */
4139 if (srcifp == NULL) {
4140 struct in6_ifaddr *ia;
4141
4142 lck_rw_lock_shared(&in6_ifaddr_rwlock);
4143 for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) {
4144 IFA_LOCK_SPIN(&ia->ia_ifa);
4145 /* compare against src addr with embedded scope */
4146 if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) {
4147 srcifp = ia->ia_ifp;
4148 IFA_UNLOCK(&ia->ia_ifa);
4149 break;
4150 }
4151 IFA_UNLOCK(&ia->ia_ifa);
4152 }
4153 lck_rw_done(&in6_ifaddr_rwlock);
4154 }
4155 if (srcifp != NULL) {
4156 ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
4157 }
4158 ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
4159
4160 dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
4161 }
4162
4163 /*
4164 * Chop IPv6 header off from the payload.
4165 */
4166 static int
4167 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
4168 {
4169 struct mbuf *mh;
4170 struct ip6_hdr *ip6;
4171
4172 ip6 = mtod(m, struct ip6_hdr *);
4173 if (m->m_len > sizeof(*ip6)) {
4174 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
4175 if (mh == NULL) {
4176 m_freem(m);
4177 return ENOBUFS;
4178 }
4179 M_COPY_PKTHDR(mh, m);
4180 MH_ALIGN(mh, sizeof(*ip6));
4181 m->m_flags &= ~M_PKTHDR;
4182 m->m_len -= sizeof(*ip6);
4183 m->m_data += sizeof(*ip6);
4184 mh->m_next = m;
4185 m = mh;
4186 m->m_len = sizeof(*ip6);
4187 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
4188 }
4189 exthdrs->ip6e_ip6 = m;
4190 return 0;
4191 }
4192
4193 static void
4194 ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
4195 int nxt0, uint32_t tlen, uint32_t optlen)
4196 {
4197 uint32_t sw_csum, hwcap = ifp->if_hwassist;
4198 int tso = TSO_IPV6_OK(ifp, m);
4199
4200 if (!hwcksum_tx) {
4201 /* do all in software; checksum offload is disabled */
4202 sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
4203 } else {
4204 /* do in software what the hardware cannot */
4205 sw_csum = m->m_pkthdr.csum_flags &
4206 ~IF_HWASSIST_CSUM_FLAGS(hwcap);
4207 }
4208
4209 if (optlen != 0) {
4210 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4211 m->m_pkthdr.csum_flags);
4212 } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
4213 (hwcap & CSUM_PARTIAL)) {
4214 /*
4215 * Partial checksum offload, ere), if no extension headers,
4216 * and TCP only (no UDP support, as the hardware may not be
4217 * able to convert +0 to -0 (0xffff) per RFC1122 4.1.3.4.
4218 * unless the interface supports "invert zero" capability.)
4219 */
4220 if (hwcksum_tx && !tso &&
4221 ((m->m_pkthdr.csum_flags & CSUM_TCPIPV6) ||
4222 ((hwcap & CSUM_ZERO_INVERT) &&
4223 (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
4224 tlen <= mtu) {
4225 uint16_t start = sizeof(struct ip6_hdr);
4226 uint16_t ulpoff =
4227 m->m_pkthdr.csum_data & 0xffff;
4228 m->m_pkthdr.csum_flags |=
4229 (CSUM_DATA_VALID | CSUM_PARTIAL);
4230 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
4231 m->m_pkthdr.csum_tx_start = start;
4232 sw_csum = 0;
4233 } else {
4234 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4235 m->m_pkthdr.csum_flags);
4236 }
4237 }
4238
4239 if (sw_csum & CSUM_DELAY_IPV6_DATA) {
4240 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
4241 sw_csum &= ~CSUM_DELAY_IPV6_DATA;
4242 }
4243
4244 if (hwcksum_tx) {
4245 /*
4246 * Drop off bits that aren't supported by hardware;
4247 * also make sure to preserve non-checksum related bits.
4248 */
4249 m->m_pkthdr.csum_flags =
4250 ((m->m_pkthdr.csum_flags &
4251 (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
4252 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
4253 } else {
4254 /* drop all bits; checksum offload is disabled */
4255 m->m_pkthdr.csum_flags = 0;
4256 }
4257 }
4258
4259 /*
4260 * Compute IPv6 extension header length.
4261 */
4262 int
4263 ip6_optlen(struct in6pcb *in6p)
4264 {
4265 int len;
4266
4267 if (!in6p->in6p_outputopts) {
4268 return 0;
4269 }
4270
4271 len = 0;
4272 #define elen(x) \
4273 (((struct ip6_ext *)(x)) ? \
4274 (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
4275
4276 len += elen(in6p->in6p_outputopts->ip6po_hbh);
4277 if (in6p->in6p_outputopts->ip6po_rthdr) {
4278 /* dest1 is valid with rthdr only */
4279 len += elen(in6p->in6p_outputopts->ip6po_dest1);
4280 }
4281 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
4282 len += elen(in6p->in6p_outputopts->ip6po_dest2);
4283 return len;
4284 #undef elen
4285 }
4286
4287 static int
4288 sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
4289 {
4290 #pragma unused(arg1, arg2)
4291 int error, i;
4292
4293 i = ip6_output_measure;
4294 error = sysctl_handle_int(oidp, &i, 0, req);
4295 if (error || req->newptr == USER_ADDR_NULL) {
4296 goto done;
4297 }
4298 /* impose bounds */
4299 if (i < 0 || i > 1) {
4300 error = EINVAL;
4301 goto done;
4302 }
4303 if (ip6_output_measure != i && i == 1) {
4304 net_perf_initialize(&net_perf, ip6_output_measure_bins);
4305 }
4306 ip6_output_measure = i;
4307 done:
4308 return error;
4309 }
4310
4311 static int
4312 sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
4313 {
4314 #pragma unused(arg1, arg2)
4315 int error;
4316 uint64_t i;
4317
4318 i = ip6_output_measure_bins;
4319 error = sysctl_handle_quad(oidp, &i, 0, req);
4320 if (error || req->newptr == USER_ADDR_NULL) {
4321 goto done;
4322 }
4323 /* validate data */
4324 if (!net_perf_validate_bins(i)) {
4325 error = EINVAL;
4326 goto done;
4327 }
4328 ip6_output_measure_bins = i;
4329 done:
4330 return error;
4331 }
4332
4333 static int
4334 sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
4335 {
4336 #pragma unused(oidp, arg1, arg2)
4337 if (req->oldptr == USER_ADDR_NULL) {
4338 req->oldlen = (size_t)sizeof(struct ipstat);
4339 }
4340
4341 return SYSCTL_OUT(req, &net_perf, MIN(sizeof(net_perf), req->oldlen));
4342 }