]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/ip6_output.c
a468a93f60837f2e7ef7c7768497e3d94534c77f
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58 /*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
92 /*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections. This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
98
99 #include <sys/param.h>
100 #include <sys/malloc.h>
101 #include <sys/mbuf.h>
102 #include <sys/errno.h>
103 #include <sys/protosw.h>
104 #include <sys/socket.h>
105 #include <sys/socketvar.h>
106 #include <sys/systm.h>
107 #include <sys/kernel.h>
108 #include <sys/proc.h>
109 #include <sys/kauth.h>
110 #include <sys/mcache.h>
111 #include <sys/sysctl.h>
112 #include <kern/zalloc.h>
113 #include <libkern/OSByteOrder.h>
114
115 #include <pexpert/pexpert.h>
116 #include <mach/sdt.h>
117
118 #include <net/if.h>
119 #include <net/route.h>
120 #include <net/dlil.h>
121 #include <net/net_api_stats.h>
122 #include <net/net_osdep.h>
123 #include <net/net_perf.h>
124
125 #include <netinet/ip.h>
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet/ip_var.h>
129 #include <netinet6/in6_var.h>
130 #include <netinet/ip6.h>
131 #include <netinet/kpi_ipfilter_var.h>
132 #include <netinet/in_tclass.h>
133
134 #include <netinet6/ip6protosw.h>
135 #include <netinet/icmp6.h>
136 #include <netinet6/ip6_var.h>
137 #include <netinet/in_pcb.h>
138 #include <netinet6/nd6.h>
139 #include <netinet6/scope6_var.h>
140 #if IPSEC
141 #include <netinet6/ipsec.h>
142 #include <netinet6/ipsec6.h>
143 #include <netkey/key.h>
144 extern int ipsec_bypass;
145 #endif /* IPSEC */
146
147 #if NECP
148 #include <net/necp.h>
149 #endif /* NECP */
150
151 #if CONFIG_MACF_NET
152 #include <security/mac.h>
153 #endif /* CONFIG_MACF_NET */
154
155 #if DUMMYNET
156 #include <netinet/ip_fw.h>
157 #include <netinet/ip_dummynet.h>
158 #endif /* DUMMYNET */
159
160 #if PF
161 #include <net/pfvar.h>
162 #endif /* PF */
163
164 static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
165 static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
166 static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
167 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
168 static void ip6_out_cksum_stats(int, u_int32_t);
169 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
170 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
171 struct ip6_frag **);
172 static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
173 struct ifnet *, struct in6_addr *, u_int32_t *, boolean_t *);
174 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
175 struct sockopt *sopt);
176 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
177 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
178 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
179 static void im6o_trace(struct ip6_moptions *, int);
180 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
181 int, int);
182 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
183 static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
184 int, uint32_t, uint32_t);
185 extern int udp_ctloutput(struct socket *, struct sockopt *);
186 static int ip6_fragment_packet(struct mbuf **m,
187 struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
188 uint32_t mtu, boolean_t alwaysfrag, uint32_t unfragpartlen,
189 struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
190
191 SYSCTL_DECL(_net_inet6_ip6);
192
193 static int ip6_output_measure = 0;
194 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
195 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
196 &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
197
198 static uint64_t ip6_output_measure_bins = 0;
199 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
200 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
201 sysctl_ip6_output_measure_bins, "I",
202 "bins for chaining performance data histogram");
203
204 static net_perf_t net_perf;
205 SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
206 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
207 0, 0, sysctl_ip6_output_getperf, "S,net_perf",
208 "IP6 output performance data (struct net_perf, net/net_perf.h)");
209
210 #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
211
212 /* For gdb */
213 __private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
214
215 struct ip6_moptions_dbg {
216 struct ip6_moptions im6o; /* ip6_moptions */
217 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
218 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
219 /*
220 * Alloc and free callers.
221 */
222 ctrace_t im6o_alloc;
223 ctrace_t im6o_free;
224 /*
225 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
226 */
227 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
228 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
229 };
230
231 #if DEBUG
232 static unsigned int im6o_debug = 1; /* debugging (enabled) */
233 #else
234 static unsigned int im6o_debug; /* debugging (disabled) */
235 #endif /* !DEBUG */
236
237 static unsigned int im6o_size; /* size of zone element */
238 static struct zone *im6o_zone; /* zone for ip6_moptions */
239
240 #define IM6O_ZONE_MAX 64 /* maximum elements in zone */
241 #define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
242
243 /*
244 * ip6_output() calls ip6_output_list() to do the work
245 */
246 int
247 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
248 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
249 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
250 {
251 return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
252 }
253
254 /*
255 * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
256 * header (with pri, len, nxt, hlim, src, dst).
257 * This function may modify ver and hlim only.
258 * The mbuf chain containing the packet will be freed.
259 * The mbuf opt, if present, will not be freed.
260 *
261 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
262 * skipped and ro->ro_rt would be used. Otherwise the result of route
263 * lookup is stored in ro->ro_rt.
264 *
265 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
266 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
267 * which is rt_rmx.rmx_mtu.
268 */
269 int
270 ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
271 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
272 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
273 {
274 struct ip6_hdr *ip6;
275 u_char *nexthdrp;
276 struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
277 struct ifnet **ifpp_save = ifpp;
278 struct mbuf *m, *mprev;
279 struct mbuf *sendchain = NULL, *sendchain_last = NULL;
280 struct mbuf *inputchain = NULL;
281 int nxt0 = 0;
282 struct route_in6 *ro_pmtu = NULL;
283 struct rtentry *rt = NULL;
284 struct sockaddr_in6 *dst = NULL, src_sa, dst_sa;
285 int error = 0;
286 struct in6_ifaddr *ia = NULL, *src_ia = NULL;
287 u_int32_t mtu = 0;
288 boolean_t alwaysfrag = FALSE;
289 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
290 struct ip6_rthdr *rh;
291 struct in6_addr finaldst;
292 ipfilter_t inject_filter_ref;
293 struct ipf_pktopts *ippo = NULL;
294 struct flowadv *adv = NULL;
295 uint32_t pktcnt = 0;
296 uint32_t packets_processed = 0;
297 struct timeval start_tv;
298 #if DUMMYNET
299 struct m_tag *tag;
300 struct ip6_out_args saved_ip6oa;
301 struct sockaddr_in6 dst_buf;
302 #endif /* DUMMYNET */
303 #if IPSEC
304 struct socket *so = NULL;
305 struct secpolicy *sp = NULL;
306 struct route_in6 *ipsec_saved_route = NULL;
307 boolean_t needipsectun = FALSE;
308 #endif /* IPSEC */
309 #if NECP
310 necp_kernel_policy_result necp_result = 0;
311 necp_kernel_policy_result_parameter necp_result_parameter;
312 necp_kernel_policy_id necp_matched_policy_id = 0;
313 #endif /* NECP */
314 struct {
315 struct ipf_pktopts ipf_pktopts;
316 struct ip6_exthdrs exthdrs;
317 struct route_in6 ip6route;
318 #if IPSEC
319 struct ipsec_output_state ipsec_state;
320 #endif /* IPSEC */
321 #if NECP
322 struct route_in6 necp_route;
323 #endif /* NECP */
324 #if DUMMYNET
325 struct route_in6 saved_route;
326 struct route_in6 saved_ro_pmtu;
327 struct ip_fw_args args;
328 #endif /* DUMMYNET */
329 } ip6obz;
330 #define ipf_pktopts ip6obz.ipf_pktopts
331 #define exthdrs ip6obz.exthdrs
332 #define ip6route ip6obz.ip6route
333 #define ipsec_state ip6obz.ipsec_state
334 #define necp_route ip6obz.necp_route
335 #define saved_route ip6obz.saved_route
336 #define saved_ro_pmtu ip6obz.saved_ro_pmtu
337 #define args ip6obz.args
338 union {
339 struct {
340 boolean_t select_srcif : 1;
341 boolean_t hdrsplit : 1;
342 boolean_t route_selected : 1;
343 boolean_t dontfrag : 1;
344 #if IPSEC
345 boolean_t needipsec : 1;
346 boolean_t noipsec : 1;
347 #endif /* IPSEC */
348 };
349 uint32_t raw;
350 } ip6obf = { .raw = 0 };
351
352 if (ip6_output_measure) {
353 net_perf_start_time(&net_perf, &start_tv);
354 }
355
356 VERIFY(m0->m_flags & M_PKTHDR);
357
358 /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
359 bzero(&ip6obz, sizeof(ip6obz));
360
361 #if DUMMYNET
362 if (SLIST_EMPTY(&m0->m_pkthdr.tags)) {
363 goto tags_done;
364 }
365
366 /* Grab info from mtags prepended to the chain */
367 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
368 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
369 struct dn_pkt_tag *dn_tag;
370
371 /*
372 * ip6_output_list() cannot handle chains of packets reinjected
373 * by dummynet. The same restriction applies to
374 * ip_output_list().
375 */
376 VERIFY(0 == packetchain);
377
378 dn_tag = (struct dn_pkt_tag *)(tag + 1);
379 args.fwa_pf_rule = dn_tag->dn_pf_rule;
380
381 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
382 dst = &dst_buf;
383 ifp = dn_tag->dn_ifp;
384 if (ifp != NULL) {
385 ifnet_reference(ifp);
386 }
387 flags = dn_tag->dn_flags;
388 if (dn_tag->dn_flags & IPV6_OUTARGS) {
389 saved_ip6oa = dn_tag->dn_ip6oa;
390 ip6oa = &saved_ip6oa;
391 }
392
393 saved_route = dn_tag->dn_ro6;
394 ro = &saved_route;
395 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
396 ro_pmtu = &saved_ro_pmtu;
397 origifp = dn_tag->dn_origifp;
398 if (origifp != NULL) {
399 ifnet_reference(origifp);
400 }
401 mtu = dn_tag->dn_mtu;
402 alwaysfrag = (dn_tag->dn_alwaysfrag != 0);
403 unfragpartlen = dn_tag->dn_unfragpartlen;
404
405 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs));
406
407 m_tag_delete(m0, tag);
408 }
409
410 tags_done:
411 #endif /* DUMMYNET */
412
413 m = m0;
414
415 #if IPSEC
416 if (ipsec_bypass == 0) {
417 so = ipsec_getsocket(m);
418 if (so != NULL) {
419 (void) ipsec_setsocket(m, NULL);
420 }
421 /* If packet is bound to an interface, check bound policies */
422 if ((flags & IPV6_OUTARGS) &&
423 (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
424 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
425 /* ip6obf.noipsec is a bitfield, use temp integer */
426 int noipsec = 0;
427
428 if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
429 flags, ip6oa, &noipsec, &sp) != 0) {
430 goto bad;
431 }
432
433 ip6obf.noipsec = (noipsec != 0);
434 }
435 }
436 #endif /* IPSEC */
437
438 ippo = &ipf_pktopts;
439
440 if (flags & IPV6_OUTARGS) {
441 /*
442 * In the forwarding case, only the ifscope value is used,
443 * as source interface selection doesn't take place.
444 */
445 if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
446 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
447 (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF)))) {
448 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
449 }
450
451 if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
452 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
453 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
454 (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
455 }
456
457 if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR) {
458 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
459 }
460 } else {
461 ip6obf.select_srcif = FALSE;
462 if (flags & IPV6_OUTARGS) {
463 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
464 ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
465 IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
466 }
467 }
468
469 if (flags & IPV6_OUTARGS) {
470 if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) {
471 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
472 }
473 if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE) {
474 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
475 }
476 adv = &ip6oa->ip6oa_flowadv;
477 adv->code = FADV_SUCCESS;
478 ip6oa->ip6oa_retflags = 0;
479 }
480
481 /*
482 * Clear out ifpp to be filled in after determining route. ifpp_save is
483 * used to keep old value to release reference properly and dtrace
484 * ipsec tunnel traffic properly.
485 */
486 if (ifpp != NULL && *ifpp != NULL) {
487 *ifpp = NULL;
488 }
489
490 #if DUMMYNET
491 if (args.fwa_pf_rule) {
492 ip6 = mtod(m, struct ip6_hdr *);
493 VERIFY(ro != NULL); /* ro == saved_route */
494 goto check_with_pf;
495 }
496 #endif /* DUMMYNET */
497
498 #if NECP
499 /*
500 * Since all packets are assumed to come from same socket, necp lookup
501 * only needs to happen once per function entry.
502 */
503 necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
504 (flags & IPV6_OUTARGS) ? ip6oa : NULL, &necp_result,
505 &necp_result_parameter);
506 #endif /* NECP */
507
508 /*
509 * If a chain was passed in, prepare for ther first iteration. For all
510 * other iterations, this work will be done at evaluateloop: label.
511 */
512 if (packetchain) {
513 /*
514 * Remove m from the chain during processing to avoid
515 * accidental frees on entire list.
516 */
517 inputchain = m->m_nextpkt;
518 m->m_nextpkt = NULL;
519 }
520
521 loopit:
522 packets_processed++;
523 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP | PKTF_IFAINFO);
524 ip6 = mtod(m, struct ip6_hdr *);
525 nxt0 = ip6->ip6_nxt;
526 finaldst = ip6->ip6_dst;
527 ip6obf.hdrsplit = FALSE;
528 ro_pmtu = NULL;
529
530 if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
531 inject_filter_ref = ipf_get_inject_filter(m);
532 } else {
533 inject_filter_ref = NULL;
534 }
535
536 #define MAKE_EXTHDR(hp, mp) do { \
537 if (hp != NULL) { \
538 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
539 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
540 ((eh)->ip6e_len + 1) << 3); \
541 if (error) \
542 goto freehdrs; \
543 } \
544 } while (0)
545
546 if (opt != NULL) {
547 /* Hop-by-Hop options header */
548 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
549 /* Destination options header(1st part) */
550 if (opt->ip6po_rthdr) {
551 /*
552 * Destination options header(1st part)
553 * This only makes sense with a routing header.
554 * See Section 9.2 of RFC 3542.
555 * Disabling this part just for MIP6 convenience is
556 * a bad idea. We need to think carefully about a
557 * way to make the advanced API coexist with MIP6
558 * options, which might automatically be inserted in
559 * the kernel.
560 */
561 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
562 }
563 /* Routing header */
564 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
565 /* Destination options header(2nd part) */
566 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
567 }
568
569 #undef MAKE_EXTHDR
570
571 #if NECP
572 if (necp_matched_policy_id) {
573 necp_mark_packet_from_ip(m, necp_matched_policy_id);
574
575 switch (necp_result) {
576 case NECP_KERNEL_POLICY_RESULT_PASS:
577 goto skip_ipsec;
578 case NECP_KERNEL_POLICY_RESULT_DROP:
579 case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
580 /*
581 * Flow divert packets should be blocked at the IP
582 * layer.
583 */
584 error = EHOSTUNREACH;
585 ip6stat.ip6s_necp_policy_drop++;
586 goto freehdrs;
587 case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
588 /*
589 * Verify that the packet is being routed to the tunnel
590 */
591 struct ifnet *policy_ifp =
592 necp_get_ifnet_from_result_parameter(
593 &necp_result_parameter);
594
595 if (policy_ifp == ifp) {
596 goto skip_ipsec;
597 } else {
598 if (necp_packet_can_rebind_to_ifnet(m,
599 policy_ifp, (struct route *)&necp_route,
600 AF_INET6)) {
601 /*
602 * Set scoped index to the tunnel
603 * interface, since it is compatible
604 * with the packet. This will only work
605 * for callers who pass IPV6_OUTARGS,
606 * but that covers all of the clients
607 * we care about today.
608 */
609 if (flags & IPV6_OUTARGS) {
610 ip6oa->ip6oa_boundif =
611 policy_ifp->if_index;
612 ip6oa->ip6oa_flags |=
613 IP6OAF_BOUND_IF;
614 }
615 if (opt != NULL
616 && opt->ip6po_pktinfo != NULL) {
617 opt->ip6po_pktinfo->
618 ipi6_ifindex =
619 policy_ifp->if_index;
620 }
621 ro = &necp_route;
622 goto skip_ipsec;
623 } else {
624 error = ENETUNREACH;
625 ip6stat.ip6s_necp_policy_drop++;
626 goto freehdrs;
627 }
628 }
629 }
630 default:
631 break;
632 }
633 }
634 #endif /* NECP */
635
636 #if IPSEC
637 if (ipsec_bypass != 0 || ip6obf.noipsec) {
638 goto skip_ipsec;
639 }
640
641 if (sp == NULL) {
642 /* get a security policy for this packet */
643 if (so != NULL) {
644 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
645 so, &error);
646 } else {
647 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
648 0, &error);
649 }
650 if (sp == NULL) {
651 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
652 goto freehdrs;
653 }
654 }
655
656 error = 0;
657
658 /* check policy */
659 switch (sp->policy) {
660 case IPSEC_POLICY_DISCARD:
661 case IPSEC_POLICY_GENERATE:
662 /*
663 * This packet is just discarded.
664 */
665 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
666 goto freehdrs;
667
668 case IPSEC_POLICY_BYPASS:
669 case IPSEC_POLICY_NONE:
670 /* no need to do IPsec. */
671 ip6obf.needipsec = FALSE;
672 break;
673
674 case IPSEC_POLICY_IPSEC:
675 if (sp->req == NULL) {
676 /* acquire a policy */
677 error = key_spdacquire(sp);
678 goto freehdrs;
679 }
680 if (sp->ipsec_if) {
681 goto skip_ipsec;
682 } else {
683 ip6obf.needipsec = TRUE;
684 }
685 break;
686
687 case IPSEC_POLICY_ENTRUST:
688 default:
689 printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
690 break;
691 }
692 skip_ipsec:
693 #endif /* IPSEC */
694
695 /*
696 * Calculate the total length of the extension header chain.
697 * Keep the length of the unfragmentable part for fragmentation.
698 */
699 optlen = 0;
700 if (exthdrs.ip6e_hbh != NULL) {
701 optlen += exthdrs.ip6e_hbh->m_len;
702 }
703 if (exthdrs.ip6e_dest1 != NULL) {
704 optlen += exthdrs.ip6e_dest1->m_len;
705 }
706 if (exthdrs.ip6e_rthdr != NULL) {
707 optlen += exthdrs.ip6e_rthdr->m_len;
708 }
709 unfragpartlen = optlen + sizeof(struct ip6_hdr);
710
711 /* NOTE: we don't add AH/ESP length here. do that later. */
712 if (exthdrs.ip6e_dest2 != NULL) {
713 optlen += exthdrs.ip6e_dest2->m_len;
714 }
715
716 /*
717 * If we need IPsec, or there is at least one extension header,
718 * separate IP6 header from the payload.
719 */
720 if ((
721 #if IPSEC
722 ip6obf.needipsec ||
723 #endif /* IPSEC */
724 optlen) && !ip6obf.hdrsplit) {
725 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
726 m = NULL;
727 goto freehdrs;
728 }
729 m = exthdrs.ip6e_ip6;
730 ip6obf.hdrsplit = TRUE;
731 }
732
733 /* adjust pointer */
734 ip6 = mtod(m, struct ip6_hdr *);
735
736 /* adjust mbuf packet header length */
737 m->m_pkthdr.len += optlen;
738 plen = m->m_pkthdr.len - sizeof(*ip6);
739
740 /* If this is a jumbo payload, insert a jumbo payload option. */
741 if (plen > IPV6_MAXPACKET) {
742 if (!ip6obf.hdrsplit) {
743 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
744 m = NULL;
745 goto freehdrs;
746 }
747 m = exthdrs.ip6e_ip6;
748 ip6obf.hdrsplit = TRUE;
749 }
750 /* adjust pointer */
751 ip6 = mtod(m, struct ip6_hdr *);
752 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) {
753 goto freehdrs;
754 }
755 ip6->ip6_plen = 0;
756 } else {
757 ip6->ip6_plen = htons(plen);
758 }
759 /*
760 * Concatenate headers and fill in next header fields.
761 * Here we have, on "m"
762 * IPv6 payload
763 * and we insert headers accordingly. Finally, we should be getting:
764 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
765 *
766 * during the header composing process, "m" points to IPv6 header.
767 * "mprev" points to an extension header prior to esp.
768 */
769 nexthdrp = &ip6->ip6_nxt;
770 mprev = m;
771
772 /*
773 * we treat dest2 specially. this makes IPsec processing
774 * much easier. the goal here is to make mprev point the
775 * mbuf prior to dest2.
776 *
777 * result: IPv6 dest2 payload
778 * m and mprev will point to IPv6 header.
779 */
780 if (exthdrs.ip6e_dest2 != NULL) {
781 if (!ip6obf.hdrsplit) {
782 panic("assumption failed: hdr not split");
783 /* NOTREACHED */
784 }
785 exthdrs.ip6e_dest2->m_next = m->m_next;
786 m->m_next = exthdrs.ip6e_dest2;
787 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
788 ip6->ip6_nxt = IPPROTO_DSTOPTS;
789 }
790
791 #define MAKE_CHAIN(m, mp, p, i) do { \
792 if (m != NULL) { \
793 if (!ip6obf.hdrsplit) { \
794 panic("assumption failed: hdr not split"); \
795 /* NOTREACHED */ \
796 } \
797 *mtod((m), u_char *) = *(p); \
798 *(p) = (i); \
799 p = mtod((m), u_char *); \
800 (m)->m_next = (mp)->m_next; \
801 (mp)->m_next = (m); \
802 (mp) = (m); \
803 } \
804 } while (0)
805 /*
806 * result: IPv6 hbh dest1 rthdr dest2 payload
807 * m will point to IPv6 header. mprev will point to the
808 * extension header prior to dest2 (rthdr in the above case).
809 */
810 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
811 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
812 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
813
814 /* It is no longer safe to free the pointers in exthdrs. */
815 exthdrs.merged = TRUE;
816
817 #undef MAKE_CHAIN
818
819 #if IPSEC
820 if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
821 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
822 }
823 #endif /* IPSEC */
824
825 if (!TAILQ_EMPTY(&ipv6_filters) &&
826 !((flags & IPV6_OUTARGS) &&
827 (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED))) {
828 struct ipfilter *filter;
829 int seen = (inject_filter_ref == NULL);
830 int fixscope = 0;
831
832 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
833 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
834 IM6O_LOCK(im6o);
835 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
836 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
837 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
838 IM6O_UNLOCK(im6o);
839 }
840
841 /* Hack: embed the scope_id in the destination */
842 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
843 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
844 fixscope = 1;
845 ip6->ip6_dst.s6_addr16[1] =
846 htons(ro->ro_dst.sin6_scope_id);
847 }
848
849 ipf_ref();
850 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
851 /*
852 * Don't process packet twice if we've already seen it.
853 */
854 if (seen == 0) {
855 if ((struct ipfilter *)inject_filter_ref ==
856 filter) {
857 seen = 1;
858 }
859 } else if (filter->ipf_filter.ipf_output != NULL) {
860 errno_t result;
861
862 result = filter->ipf_filter.ipf_output(
863 filter->ipf_filter.cookie,
864 (mbuf_t *)&m, ippo);
865 if (result == EJUSTRETURN) {
866 ipf_unref();
867 m = NULL;
868 goto evaluateloop;
869 }
870 if (result != 0) {
871 ipf_unref();
872 goto bad;
873 }
874 }
875 }
876 ipf_unref();
877
878 ip6 = mtod(m, struct ip6_hdr *);
879 /* Hack: cleanup embedded scope_id if we put it there */
880 if (fixscope) {
881 ip6->ip6_dst.s6_addr16[1] = 0;
882 }
883 }
884
885 #if IPSEC
886 if (ip6obf.needipsec) {
887 int segleft_org;
888
889 /*
890 * pointers after IPsec headers are not valid any more.
891 * other pointers need a great care too.
892 * (IPsec routines should not mangle mbufs prior to AH/ESP)
893 */
894 exthdrs.ip6e_dest2 = NULL;
895
896 if (exthdrs.ip6e_rthdr != NULL) {
897 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
898 segleft_org = rh->ip6r_segleft;
899 rh->ip6r_segleft = 0;
900 } else {
901 rh = NULL;
902 segleft_org = 0;
903 }
904
905 ipsec_state.m = m;
906 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
907 sp, flags, &needipsectun);
908 m = ipsec_state.m;
909 if (error) {
910 /* mbuf is already reclaimed in ipsec6_output_trans. */
911 m = NULL;
912 switch (error) {
913 case EHOSTUNREACH:
914 case ENETUNREACH:
915 case EMSGSIZE:
916 case ENOBUFS:
917 case ENOMEM:
918 break;
919 default:
920 printf("ip6_output (ipsec): error code %d\n",
921 error);
922 /* FALLTHRU */
923 case ENOENT:
924 /* don't show these error codes to the user */
925 error = 0;
926 break;
927 }
928 goto bad;
929 }
930 if (exthdrs.ip6e_rthdr != NULL) {
931 /* ah6_output doesn't modify mbuf chain */
932 rh->ip6r_segleft = segleft_org;
933 }
934 }
935 #endif /* IPSEC */
936
937 /* If there is a routing header, discard the packet. */
938 if (exthdrs.ip6e_rthdr != NULL) {
939 error = EINVAL;
940 goto bad;
941 }
942
943 /* Source address validation */
944 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
945 !(flags & IPV6_UNSPECSRC)) {
946 error = EOPNOTSUPP;
947 ip6stat.ip6s_badscope++;
948 goto bad;
949 }
950 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
951 error = EOPNOTSUPP;
952 ip6stat.ip6s_badscope++;
953 goto bad;
954 }
955
956 ip6stat.ip6s_localout++;
957
958 /*
959 * Route packet.
960 */
961 if (ro == NULL) {
962 ro = &ip6route;
963 bzero((caddr_t)ro, sizeof(*ro));
964 }
965 ro_pmtu = ro;
966 if (opt != NULL && opt->ip6po_rthdr) {
967 ro = &opt->ip6po_route;
968 }
969 dst = SIN6(&ro->ro_dst);
970
971 if (ro->ro_rt != NULL) {
972 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
973 }
974 /*
975 * if specified, try to fill in the traffic class field.
976 * do not override if a non-zero value is already set.
977 * we check the diffserv field and the ecn field separately.
978 */
979 if (opt != NULL && opt->ip6po_tclass >= 0) {
980 int mask = 0;
981
982 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) {
983 mask |= 0xfc;
984 }
985 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) {
986 mask |= 0x03;
987 }
988 if (mask != 0) {
989 ip6->ip6_flow |=
990 htonl((opt->ip6po_tclass & mask) << 20);
991 }
992 }
993
994 /* fill in or override the hop limit field, if necessary. */
995 if (opt && opt->ip6po_hlim != -1) {
996 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
997 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
998 if (im6o != NULL) {
999 IM6O_LOCK(im6o);
1000 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
1001 IM6O_UNLOCK(im6o);
1002 } else {
1003 ip6->ip6_hlim = ip6_defmcasthlim;
1004 }
1005 }
1006
1007 /*
1008 * If there is a cached route, check that it is to the same
1009 * destination and is still up. If not, free it and try again.
1010 * Test rt_flags without holding rt_lock for performance reasons;
1011 * if the route is down it will hopefully be caught by the layer
1012 * below (since it uses this route as a hint) or during the
1013 * next transmit.
1014 */
1015 if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
1016 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst)) {
1017 ROUTE_RELEASE(ro);
1018 }
1019
1020 if (ro->ro_rt == NULL) {
1021 bzero(dst, sizeof(*dst));
1022 dst->sin6_family = AF_INET6;
1023 dst->sin6_len = sizeof(struct sockaddr_in6);
1024 dst->sin6_addr = ip6->ip6_dst;
1025 }
1026 #if IPSEC
1027 if (ip6obf.needipsec && needipsectun) {
1028 #if CONFIG_DTRACE
1029 struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
1030 #endif /* CONFIG_DTRACE */
1031 /*
1032 * All the extension headers will become inaccessible
1033 * (since they can be encrypted).
1034 * Don't panic, we need no more updates to extension headers
1035 * on inner IPv6 packet (since they are now encapsulated).
1036 *
1037 * IPv6 [ESP|AH] IPv6 [extension headers] payload
1038 */
1039 bzero(&exthdrs, sizeof(exthdrs));
1040 exthdrs.ip6e_ip6 = m;
1041
1042 ipsec_state.m = m;
1043 route_copyout((struct route *)&ipsec_state.ro, (struct route *)ro,
1044 sizeof(struct route_in6));
1045 ipsec_state.dst = SA(dst);
1046
1047 /* So that we can see packets inside the tunnel */
1048 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
1049 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
1050 struct ip *, NULL, struct ip6_hdr *, ip6);
1051
1052 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
1053 /* tunneled in IPv4? packet is gone */
1054 if (ipsec_state.tunneled == 4) {
1055 m = NULL;
1056 goto evaluateloop;
1057 }
1058 m = ipsec_state.m;
1059 ipsec_saved_route = ro;
1060 ro = (struct route_in6 *)&ipsec_state.ro;
1061 dst = SIN6(ipsec_state.dst);
1062 if (error) {
1063 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
1064 m = NULL;
1065 switch (error) {
1066 case EHOSTUNREACH:
1067 case ENETUNREACH:
1068 case EMSGSIZE:
1069 case ENOBUFS:
1070 case ENOMEM:
1071 break;
1072 default:
1073 printf("ip6_output (ipsec): error code %d\n",
1074 error);
1075 /* FALLTHRU */
1076 case ENOENT:
1077 /* don't show these error codes to the user */
1078 error = 0;
1079 break;
1080 }
1081 goto bad;
1082 }
1083 /*
1084 * The packet has been encapsulated so the ifscope
1085 * is no longer valid since it does not apply to the
1086 * outer address: ignore the ifscope.
1087 */
1088 if (flags & IPV6_OUTARGS) {
1089 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
1090 ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
1091 }
1092 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
1093 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE) {
1094 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
1095 }
1096 }
1097 exthdrs.ip6e_ip6 = m;
1098 }
1099 #endif /* IPSEC */
1100
1101 /*
1102 * ifp should only be filled in for dummy net packets which will jump
1103 * to check_with_pf label.
1104 */
1105 if (ifp != NULL) {
1106 VERIFY(ip6obf.route_selected);
1107 }
1108
1109 /* adjust pointer */
1110 ip6 = mtod(m, struct ip6_hdr *);
1111
1112 if (ip6obf.select_srcif) {
1113 bzero(&src_sa, sizeof(src_sa));
1114 src_sa.sin6_family = AF_INET6;
1115 src_sa.sin6_len = sizeof(src_sa);
1116 src_sa.sin6_addr = ip6->ip6_src;
1117 }
1118 bzero(&dst_sa, sizeof(dst_sa));
1119 dst_sa.sin6_family = AF_INET6;
1120 dst_sa.sin6_len = sizeof(dst_sa);
1121 dst_sa.sin6_addr = ip6->ip6_dst;
1122
1123 /*
1124 * Only call in6_selectroute() on first iteration to avoid taking
1125 * multiple references on ifp and rt.
1126 *
1127 * in6_selectroute() might return an ifp with its reference held
1128 * even in the error case, so make sure to release its reference.
1129 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
1130 */
1131 if (!ip6obf.route_selected) {
1132 error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
1133 &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
1134
1135 if (error != 0) {
1136 switch (error) {
1137 case EHOSTUNREACH:
1138 ip6stat.ip6s_noroute++;
1139 break;
1140 case EADDRNOTAVAIL:
1141 default:
1142 break; /* XXX statistics? */
1143 }
1144 if (ifp != NULL) {
1145 in6_ifstat_inc(ifp, ifs6_out_discard);
1146 }
1147 /* ifp (if non-NULL) will be released at the end */
1148 goto bad;
1149 }
1150 ip6obf.route_selected = TRUE;
1151 }
1152 if (rt == NULL) {
1153 /*
1154 * If in6_selectroute() does not return a route entry,
1155 * dst may not have been updated.
1156 */
1157 *dst = dst_sa; /* XXX */
1158 }
1159
1160 #if NECP
1161 /* Catch-all to check if the interface is allowed */
1162 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1163 error = EHOSTUNREACH;
1164 ip6stat.ip6s_necp_policy_drop++;
1165 goto bad;
1166 }
1167 #endif /* NECP */
1168
1169 /*
1170 * then rt (for unicast) and ifp must be non-NULL valid values.
1171 */
1172 if (!(flags & IPV6_FORWARDING)) {
1173 in6_ifstat_inc_na(ifp, ifs6_out_request);
1174 }
1175 if (rt != NULL) {
1176 RT_LOCK(rt);
1177 if (ia == NULL) {
1178 ia = (struct in6_ifaddr *)(rt->rt_ifa);
1179 if (ia != NULL) {
1180 IFA_ADDREF(&ia->ia_ifa);
1181 }
1182 }
1183 rt->rt_use++;
1184 RT_UNLOCK(rt);
1185 }
1186
1187 /*
1188 * The outgoing interface must be in the zone of source and
1189 * destination addresses (except local/loopback). We should
1190 * use ia_ifp to support the case of sending packets to an
1191 * address of our own.
1192 */
1193 if (ia != NULL && ia->ia_ifp) {
1194 ifnet_reference(ia->ia_ifp); /* for origifp */
1195 if (origifp != NULL) {
1196 ifnet_release(origifp);
1197 }
1198 origifp = ia->ia_ifp;
1199 } else {
1200 if (ifp != NULL) {
1201 ifnet_reference(ifp); /* for origifp */
1202 }
1203 if (origifp != NULL) {
1204 ifnet_release(origifp);
1205 }
1206 origifp = ifp;
1207 }
1208
1209 /* skip scope enforcements for local/loopback route */
1210 if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1211 struct in6_addr src0, dst0;
1212 u_int32_t zone;
1213
1214 src0 = ip6->ip6_src;
1215 if (in6_setscope(&src0, origifp, &zone)) {
1216 goto badscope;
1217 }
1218 bzero(&src_sa, sizeof(src_sa));
1219 src_sa.sin6_family = AF_INET6;
1220 src_sa.sin6_len = sizeof(src_sa);
1221 src_sa.sin6_addr = ip6->ip6_src;
1222 if ((sa6_recoverscope(&src_sa, TRUE) ||
1223 zone != src_sa.sin6_scope_id)) {
1224 goto badscope;
1225 }
1226
1227 dst0 = ip6->ip6_dst;
1228 if ((in6_setscope(&dst0, origifp, &zone))) {
1229 goto badscope;
1230 }
1231 /* re-initialize to be sure */
1232 bzero(&dst_sa, sizeof(dst_sa));
1233 dst_sa.sin6_family = AF_INET6;
1234 dst_sa.sin6_len = sizeof(dst_sa);
1235 dst_sa.sin6_addr = ip6->ip6_dst;
1236 if ((sa6_recoverscope(&dst_sa, TRUE) ||
1237 zone != dst_sa.sin6_scope_id)) {
1238 goto badscope;
1239 }
1240
1241 /* scope check is done. */
1242 goto routefound;
1243
1244 badscope:
1245 ip6stat.ip6s_badscope++;
1246 in6_ifstat_inc(origifp, ifs6_out_discard);
1247 if (error == 0) {
1248 error = EHOSTUNREACH; /* XXX */
1249 }
1250 goto bad;
1251 }
1252
1253 routefound:
1254 if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1255 if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1256 /*
1257 * The nexthop is explicitly specified by the
1258 * application. We assume the next hop is an IPv6
1259 * address.
1260 */
1261 dst = SIN6(opt->ip6po_nexthop);
1262 } else if ((rt->rt_flags & RTF_GATEWAY)) {
1263 dst = SIN6(rt->rt_gateway);
1264 }
1265 /*
1266 * For packets destined to local/loopback, record the
1267 * source the source interface (which owns the source
1268 * address), as well as the output interface. This is
1269 * needed to reconstruct the embedded zone for the
1270 * link-local address case in ip6_input().
1271 */
1272 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1273 uint32_t srcidx;
1274
1275 if (src_ia != NULL) {
1276 srcidx = src_ia->ia_ifp->if_index;
1277 } else if (ro->ro_srcia != NULL) {
1278 srcidx = ro->ro_srcia->ifa_ifp->if_index;
1279 } else {
1280 srcidx = 0;
1281 }
1282
1283 ip6_setsrcifaddr_info(m, srcidx, NULL);
1284 ip6_setdstifaddr_info(m, 0, ia);
1285 }
1286 }
1287
1288 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1289 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1290 } else {
1291 struct in6_multi *in6m;
1292
1293 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1294 in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1295
1296 /*
1297 * Confirm that the outgoing interface supports multicast.
1298 */
1299 if (!(ifp->if_flags & IFF_MULTICAST)) {
1300 ip6stat.ip6s_noroute++;
1301 in6_ifstat_inc(ifp, ifs6_out_discard);
1302 error = ENETUNREACH;
1303 goto bad;
1304 }
1305 in6_multihead_lock_shared();
1306 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1307 in6_multihead_lock_done();
1308 if (im6o != NULL) {
1309 IM6O_LOCK(im6o);
1310 }
1311 if (in6m != NULL &&
1312 (im6o == NULL || im6o->im6o_multicast_loop)) {
1313 if (im6o != NULL) {
1314 IM6O_UNLOCK(im6o);
1315 }
1316 /*
1317 * If we belong to the destination multicast group
1318 * on the outgoing interface, and the caller did not
1319 * forbid loopback, loop back a copy.
1320 */
1321 ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
1322 } else if (im6o != NULL) {
1323 IM6O_UNLOCK(im6o);
1324 }
1325 if (in6m != NULL) {
1326 IN6M_REMREF(in6m);
1327 }
1328 /*
1329 * Multicasts with a hoplimit of zero may be looped back,
1330 * above, but must not be transmitted on a network.
1331 * Also, multicasts addressed to the loopback interface
1332 * are not sent -- the above call to ip6_mloopback() will
1333 * loop back a copy if this host actually belongs to the
1334 * destination group on the loopback interface.
1335 */
1336 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1337 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1338 /* remove m from the packetchain and continue looping */
1339 if (m != NULL) {
1340 m_freem(m);
1341 }
1342 m = NULL;
1343 goto evaluateloop;
1344 }
1345 }
1346
1347 /*
1348 * Fill the outgoing inteface to tell the upper layer
1349 * to increment per-interface statistics.
1350 */
1351 if (ifpp != NULL && *ifpp == NULL) {
1352 ifnet_reference(ifp); /* for caller */
1353 *ifpp = ifp;
1354 }
1355
1356 /* Determine path MTU. */
1357 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1358 &alwaysfrag)) != 0) {
1359 goto bad;
1360 }
1361
1362 /*
1363 * The caller of this function may specify to use the minimum MTU
1364 * in some cases.
1365 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1366 * setting. The logic is a bit complicated; by default, unicast
1367 * packets will follow path MTU while multicast packets will be sent at
1368 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1369 * including unicast ones will be sent at the minimum MTU. Multicast
1370 * packets will always be sent at the minimum MTU unless
1371 * IP6PO_MINMTU_DISABLE is explicitly specified.
1372 * See RFC 3542 for more details.
1373 */
1374 if (mtu > IPV6_MMTU) {
1375 if ((flags & IPV6_MINMTU)) {
1376 mtu = IPV6_MMTU;
1377 } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
1378 mtu = IPV6_MMTU;
1379 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1380 (opt == NULL ||
1381 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1382 mtu = IPV6_MMTU;
1383 }
1384 }
1385
1386 /*
1387 * clear embedded scope identifiers if necessary.
1388 * in6_clearscope will touch the addresses only when necessary.
1389 */
1390 in6_clearscope(&ip6->ip6_src);
1391 in6_clearscope(&ip6->ip6_dst);
1392 /*
1393 * If the outgoing packet contains a hop-by-hop options header,
1394 * it must be examined and processed even by the source node.
1395 * (RFC 2460, section 4.)
1396 */
1397 if (exthdrs.ip6e_hbh != NULL) {
1398 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1399 u_int32_t dummy; /* XXX unused */
1400 uint32_t oplen = 0; /* for ip6_process_hopopts() */
1401 #if DIAGNOSTIC
1402 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) {
1403 panic("ip6e_hbh is not continuous");
1404 }
1405 #endif
1406 /*
1407 * XXX: If we have to send an ICMPv6 error to the sender,
1408 * we need the M_LOOP flag since icmp6_error() expects
1409 * the IPv6 and the hop-by-hop options header are
1410 * continuous unless the flag is set.
1411 */
1412 m->m_flags |= M_LOOP;
1413 m->m_pkthdr.rcvif = ifp;
1414 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1415 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1416 &dummy, &oplen) < 0) {
1417 /*
1418 * m was already freed at this point. Set to NULL so it
1419 * is not re-freed at end of ip6_output_list.
1420 */
1421 m = NULL;
1422 error = EINVAL; /* better error? */
1423 goto bad;
1424 }
1425 m->m_flags &= ~M_LOOP; /* XXX */
1426 m->m_pkthdr.rcvif = NULL;
1427 }
1428
1429 #if DUMMYNET
1430 check_with_pf:
1431 #endif /* DUMMYNET */
1432 #if PF
1433 if (PF_IS_ENABLED) {
1434 #if DUMMYNET
1435
1436 /*
1437 * TODO: Need to save opt->ip6po_flags for reinjection
1438 * rdar://10434993
1439 */
1440 args.fwa_m = m;
1441 args.fwa_oif = ifp;
1442 args.fwa_oflags = flags;
1443 if (flags & IPV6_OUTARGS) {
1444 args.fwa_ip6oa = ip6oa;
1445 }
1446 args.fwa_ro6 = ro;
1447 args.fwa_dst6 = dst;
1448 args.fwa_ro6_pmtu = ro_pmtu;
1449 args.fwa_origifp = origifp;
1450 args.fwa_mtu = mtu;
1451 args.fwa_alwaysfrag = alwaysfrag;
1452 args.fwa_unfragpartlen = unfragpartlen;
1453 args.fwa_exthdrs = &exthdrs;
1454 /* Invoke outbound packet filter */
1455 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1456 #else /* !DUMMYNET */
1457 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1458 #endif /* !DUMMYNET */
1459
1460 if (error != 0 || m == NULL) {
1461 if (m != NULL) {
1462 panic("%s: unexpected packet %p\n",
1463 __func__, m);
1464 /* NOTREACHED */
1465 }
1466 /* m was already freed by callee and is now NULL. */
1467 goto evaluateloop;
1468 }
1469 ip6 = mtod(m, struct ip6_hdr *);
1470 }
1471 #endif /* PF */
1472
1473 #ifdef IPSEC
1474 /* clean ipsec history before fragmentation */
1475 ipsec_delaux(m);
1476 #endif /* IPSEC */
1477
1478 if (ip6oa != NULL) {
1479 u_int8_t dscp;
1480
1481 dscp = (ntohl(ip6->ip6_flow) & IP6FLOW_DSCP_MASK) >> IP6FLOW_DSCP_SHIFT;
1482
1483 error = set_packet_qos(m, ifp,
1484 ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
1485 ip6oa->ip6oa_sotc, ip6oa->ip6oa_netsvctype, &dscp);
1486 if (error == 0) {
1487 ip6->ip6_flow &= ~htonl(IP6FLOW_DSCP_MASK);
1488 ip6->ip6_flow |= htonl((u_int32_t)dscp << IP6FLOW_DSCP_SHIFT);
1489 } else {
1490 printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
1491 error = 0;
1492 }
1493 }
1494 /*
1495 * Determine whether fragmentation is necessary. If so, m is passed
1496 * back as a chain of packets and original mbuf is freed. Otherwise, m
1497 * is unchanged.
1498 */
1499 error = ip6_fragment_packet(&m, opt,
1500 &exthdrs, ifp, mtu, alwaysfrag, unfragpartlen, ro_pmtu, nxt0,
1501 optlen);
1502
1503 if (error) {
1504 goto bad;
1505 }
1506
1507 /*
1508 * The evaluateloop label is where we decide whether to continue looping over
1509 * packets or call into nd code to send.
1510 */
1511 evaluateloop:
1512
1513 /*
1514 * m may be NULL when we jump to the evaluateloop label from PF or
1515 * other code that can drop packets.
1516 */
1517 if (m != NULL) {
1518 /*
1519 * If we already have a chain to send, tack m onto the end.
1520 * Otherwise make m the start and end of the to-be-sent chain.
1521 */
1522 if (sendchain != NULL) {
1523 sendchain_last->m_nextpkt = m;
1524 } else {
1525 sendchain = m;
1526 }
1527
1528 /* Fragmentation may mean m is a chain. Find the last packet. */
1529 while (m->m_nextpkt) {
1530 m = m->m_nextpkt;
1531 }
1532 sendchain_last = m;
1533 pktcnt++;
1534 }
1535
1536 /* Fill in next m from inputchain as appropriate. */
1537 m = inputchain;
1538 if (m != NULL) {
1539 /* Isolate m from rest of input chain. */
1540 inputchain = m->m_nextpkt;
1541 m->m_nextpkt = NULL;
1542
1543 /*
1544 * Clear exthdrs and ipsec_state so stale contents are not
1545 * reused. Note this also clears the exthdrs.merged flag.
1546 */
1547 bzero(&exthdrs, sizeof(exthdrs));
1548 bzero(&ipsec_state, sizeof(ipsec_state));
1549
1550 /* Continue looping. */
1551 goto loopit;
1552 }
1553
1554 /*
1555 * If we get here, there's no more mbufs in inputchain, so send the
1556 * sendchain if there is one.
1557 */
1558 if (pktcnt > 0) {
1559 error = nd6_output_list(ifp, origifp, sendchain, dst,
1560 ro->ro_rt, adv);
1561 /*
1562 * Fall through to done label even in error case because
1563 * nd6_output_list frees packetchain in both success and
1564 * failure cases.
1565 */
1566 }
1567
1568 done:
1569 if (ifpp_save != NULL && *ifpp_save != NULL) {
1570 ifnet_release(*ifpp_save);
1571 *ifpp_save = NULL;
1572 }
1573 ROUTE_RELEASE(&ip6route);
1574 #if IPSEC
1575 ROUTE_RELEASE(&ipsec_state.ro);
1576 if (sp != NULL) {
1577 key_freesp(sp, KEY_SADB_UNLOCKED);
1578 }
1579 #endif /* IPSEC */
1580 #if NECP
1581 ROUTE_RELEASE(&necp_route);
1582 #endif /* NECP */
1583 #if DUMMYNET
1584 ROUTE_RELEASE(&saved_route);
1585 ROUTE_RELEASE(&saved_ro_pmtu);
1586 #endif /* DUMMYNET */
1587
1588 if (ia != NULL) {
1589 IFA_REMREF(&ia->ia_ifa);
1590 }
1591 if (src_ia != NULL) {
1592 IFA_REMREF(&src_ia->ia_ifa);
1593 }
1594 if (ifp != NULL) {
1595 ifnet_release(ifp);
1596 }
1597 if (origifp != NULL) {
1598 ifnet_release(origifp);
1599 }
1600 if (ip6_output_measure) {
1601 net_perf_measure_time(&net_perf, &start_tv, packets_processed);
1602 net_perf_histogram(&net_perf, packets_processed);
1603 }
1604 return error;
1605
1606 freehdrs:
1607 if (exthdrs.ip6e_hbh != NULL) {
1608 if (exthdrs.merged) {
1609 panic("Double free of ip6e_hbh");
1610 }
1611 m_freem(exthdrs.ip6e_hbh);
1612 }
1613 if (exthdrs.ip6e_dest1 != NULL) {
1614 if (exthdrs.merged) {
1615 panic("Double free of ip6e_dest1");
1616 }
1617 m_freem(exthdrs.ip6e_dest1);
1618 }
1619 if (exthdrs.ip6e_rthdr != NULL) {
1620 if (exthdrs.merged) {
1621 panic("Double free of ip6e_rthdr");
1622 }
1623 m_freem(exthdrs.ip6e_rthdr);
1624 }
1625 if (exthdrs.ip6e_dest2 != NULL) {
1626 if (exthdrs.merged) {
1627 panic("Double free of ip6e_dest2");
1628 }
1629 m_freem(exthdrs.ip6e_dest2);
1630 }
1631 /* FALLTHRU */
1632 bad:
1633 if (inputchain != NULL) {
1634 m_freem_list(inputchain);
1635 }
1636 if (sendchain != NULL) {
1637 m_freem_list(sendchain);
1638 }
1639 if (m != NULL) {
1640 m_freem(m);
1641 }
1642
1643 goto done;
1644
1645 #undef ipf_pktopts
1646 #undef exthdrs
1647 #undef ip6route
1648 #undef ipsec_state
1649 #undef saved_route
1650 #undef saved_ro_pmtu
1651 #undef args
1652 }
1653
1654 /* ip6_fragment_packet
1655 *
1656 * The fragmentation logic is rather complex:
1657 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1658 * 1-a: send as is if tlen <= path mtu
1659 * 1-b: fragment if tlen > path mtu
1660 *
1661 * 2: if user asks us not to fragment (dontfrag == 1)
1662 * 2-a: send as is if tlen <= interface mtu
1663 * 2-b: error if tlen > interface mtu
1664 *
1665 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1666 * always fragment
1667 *
1668 * 4: if dontfrag == 1 && alwaysfrag == 1
1669 * error, as we cannot handle this conflicting request
1670 */
1671
1672 static int
1673 ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
1674 struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
1675 boolean_t alwaysfrag, uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
1676 int nxt0, uint32_t optlen)
1677 {
1678 VERIFY(NULL != mptr);
1679 struct mbuf *m = *mptr;
1680 int error = 0;
1681 size_t tlen = m->m_pkthdr.len;
1682 boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
1683
1684 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
1685 dontfrag = TRUE;
1686 /*
1687 * Discard partial sum information if this packet originated
1688 * from another interface; the packet would already have the
1689 * final checksum and we shouldn't recompute it.
1690 */
1691 if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
1692 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
1693 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1694 m->m_pkthdr.csum_data = 0;
1695 }
1696 }
1697
1698 if (dontfrag && alwaysfrag) { /* case 4 */
1699 /* conflicting request - can't transmit */
1700 return EMSGSIZE;
1701 }
1702
1703 /* Access without acquiring nd_ifinfo lock for performance */
1704 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
1705 /*
1706 * Even if the DONTFRAG option is specified, we cannot send the
1707 * packet when the data length is larger than the MTU of the
1708 * outgoing interface.
1709 * Notify the error by sending IPV6_PATHMTU ancillary data as
1710 * well as returning an error code (the latter is not described
1711 * in the API spec.)
1712 */
1713 u_int32_t mtu32;
1714 struct ip6ctlparam ip6cp;
1715
1716 mtu32 = (u_int32_t)mtu;
1717 bzero(&ip6cp, sizeof(ip6cp));
1718 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1719 pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
1720 return EMSGSIZE;
1721 }
1722
1723 /*
1724 * transmit packet without fragmentation
1725 */
1726 if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
1727 (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
1728 (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1729 /*
1730 * mppn not updated in this case because no new chain is formed
1731 * and inserted
1732 */
1733 ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
1734 } else {
1735 /*
1736 * time to fragment - cases 1-b and 3 are handled inside
1737 * ip6_do_fragmentation().
1738 * mppn is passed down to be updated to point at fragment chain.
1739 */
1740 error = ip6_do_fragmentation(mptr, optlen, ifp,
1741 unfragpartlen, mtod(m, struct ip6_hdr *), exthdrsp, mtu, nxt0);
1742 }
1743
1744 return error;
1745 }
1746
1747 /*
1748 * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
1749 * the packet needs to be fragmented. on success, morig is freed and a chain
1750 * of fragments is linked into the packet chain where morig existed. Otherwise,
1751 * an errno is returned.
1752 */
1753 int
1754 ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
1755 uint32_t unfragpartlen, struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp,
1756 uint32_t mtu, int nxt0)
1757 {
1758 VERIFY(NULL != mptr);
1759 int error = 0;
1760
1761 struct mbuf *morig = *mptr;
1762 struct mbuf *first_mbufp = NULL;
1763 struct mbuf *last_mbufp = NULL;
1764
1765 size_t tlen = morig->m_pkthdr.len;
1766
1767 /*
1768 * try to fragment the packet. case 1-b and 3
1769 */
1770 if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
1771 /* TSO and fragment aren't compatible */
1772 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1773 return EMSGSIZE;
1774 } else if (mtu < IPV6_MMTU) {
1775 /* path MTU cannot be less than IPV6_MMTU */
1776 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1777 return EMSGSIZE;
1778 } else if (ip6->ip6_plen == 0) {
1779 /* jumbo payload cannot be fragmented */
1780 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1781 return EMSGSIZE;
1782 } else {
1783 size_t hlen, len, off;
1784 struct mbuf **mnext = NULL;
1785 struct ip6_frag *ip6f;
1786 u_int32_t id = htonl(ip6_randomid());
1787 u_char nextproto;
1788
1789 /*
1790 * Too large for the destination or interface;
1791 * fragment if possible.
1792 * Must be able to put at least 8 bytes per fragment.
1793 */
1794 hlen = unfragpartlen;
1795 if (mtu > IPV6_MAXPACKET) {
1796 mtu = IPV6_MAXPACKET;
1797 }
1798
1799 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1800 if (len < 8) {
1801 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1802 return EMSGSIZE;
1803 }
1804
1805 /*
1806 * Change the next header field of the last header in the
1807 * unfragmentable part.
1808 */
1809 if (exthdrsp->ip6e_rthdr != NULL) {
1810 nextproto = *mtod(exthdrsp->ip6e_rthdr, u_char *);
1811 *mtod(exthdrsp->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1812 } else if (exthdrsp->ip6e_dest1 != NULL) {
1813 nextproto = *mtod(exthdrsp->ip6e_dest1, u_char *);
1814 *mtod(exthdrsp->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1815 } else if (exthdrsp->ip6e_hbh != NULL) {
1816 nextproto = *mtod(exthdrsp->ip6e_hbh, u_char *);
1817 *mtod(exthdrsp->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1818 } else {
1819 nextproto = ip6->ip6_nxt;
1820 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1821 }
1822
1823 if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
1824 in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
1825 }
1826
1827 /*
1828 * Loop through length of segment after first fragment,
1829 * make new header and copy data of each part and link onto
1830 * chain.
1831 */
1832 for (off = hlen; off < tlen; off += len) {
1833 struct ip6_hdr *new_mhip6;
1834 struct mbuf *new_m;
1835 struct mbuf *m_frgpart;
1836
1837 MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1838 if (new_m == NULL) {
1839 error = ENOBUFS;
1840 ip6stat.ip6s_odropped++;
1841 break;
1842 }
1843 new_m->m_pkthdr.rcvif = NULL;
1844 new_m->m_flags = morig->m_flags & M_COPYFLAGS;
1845
1846 if (first_mbufp != NULL) {
1847 /* Every pass through loop but first */
1848 *mnext = new_m;
1849 last_mbufp = new_m;
1850 } else {
1851 /* This is the first element of the fragment chain */
1852 first_mbufp = new_m;
1853 last_mbufp = new_m;
1854 }
1855 mnext = &new_m->m_nextpkt;
1856
1857 new_m->m_data += max_linkhdr;
1858 new_mhip6 = mtod(new_m, struct ip6_hdr *);
1859 *new_mhip6 = *ip6;
1860 new_m->m_len = sizeof(*new_mhip6);
1861
1862 error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
1863 if (error) {
1864 ip6stat.ip6s_odropped++;
1865 break;
1866 }
1867
1868 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1869 if (off + len >= tlen) {
1870 len = tlen - off;
1871 } else {
1872 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1873 }
1874 new_mhip6->ip6_plen = htons((u_short)(len + hlen +
1875 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
1876
1877 if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
1878 error = ENOBUFS;
1879 ip6stat.ip6s_odropped++;
1880 break;
1881 }
1882 m_cat(new_m, m_frgpart);
1883 new_m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1884 new_m->m_pkthdr.rcvif = NULL;
1885
1886 M_COPY_CLASSIFIER(new_m, morig);
1887 M_COPY_PFTAG(new_m, morig);
1888
1889 #ifdef notyet
1890 #if CONFIG_MACF_NET
1891 mac_create_fragment(morig, new_m);
1892 #endif /* CONFIG_MACF_NET */
1893 #endif /* notyet */
1894
1895 ip6f->ip6f_reserved = 0;
1896 ip6f->ip6f_ident = id;
1897 ip6f->ip6f_nxt = nextproto;
1898 ip6stat.ip6s_ofragments++;
1899 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1900 }
1901
1902 if (error) {
1903 /* free all the fragments created */
1904 if (first_mbufp != NULL) {
1905 m_freem_list(first_mbufp);
1906 first_mbufp = NULL;
1907 }
1908 last_mbufp = NULL;
1909 } else {
1910 /* successful fragmenting */
1911 m_freem(morig);
1912 *mptr = first_mbufp;
1913 last_mbufp->m_nextpkt = NULL;
1914 ip6stat.ip6s_fragmented++;
1915 in6_ifstat_inc(ifp, ifs6_out_fragok);
1916 }
1917 }
1918 return error;
1919 }
1920
1921 static int
1922 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1923 {
1924 struct mbuf *m;
1925
1926 if (hlen > MCLBYTES) {
1927 return ENOBUFS; /* XXX */
1928 }
1929 MGET(m, M_DONTWAIT, MT_DATA);
1930 if (m == NULL) {
1931 return ENOBUFS;
1932 }
1933
1934 if (hlen > MLEN) {
1935 MCLGET(m, M_DONTWAIT);
1936 if (!(m->m_flags & M_EXT)) {
1937 m_free(m);
1938 return ENOBUFS;
1939 }
1940 }
1941 m->m_len = hlen;
1942 if (hdr != NULL) {
1943 bcopy(hdr, mtod(m, caddr_t), hlen);
1944 }
1945
1946 *mp = m;
1947 return 0;
1948 }
1949
1950 static void
1951 ip6_out_cksum_stats(int proto, u_int32_t len)
1952 {
1953 switch (proto) {
1954 case IPPROTO_TCP:
1955 tcp_out6_cksum_stats(len);
1956 break;
1957 case IPPROTO_UDP:
1958 udp_out6_cksum_stats(len);
1959 break;
1960 default:
1961 /* keep only TCP or UDP stats for now */
1962 break;
1963 }
1964 }
1965
1966 /*
1967 * Process a delayed payload checksum calculation (outbound path.)
1968 *
1969 * hoff is the number of bytes beyond the mbuf data pointer which
1970 * points to the IPv6 header. optlen is the number of bytes, if any,
1971 * between the end of IPv6 header and the beginning of the ULP payload
1972 * header, which represents the extension headers. If optlen is less
1973 * than zero, this routine will bail when it detects extension headers.
1974 *
1975 * Returns a bitmask representing all the work done in software.
1976 */
1977 uint32_t
1978 in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
1979 int32_t nxt0, uint32_t csum_flags)
1980 {
1981 unsigned char buf[sizeof(struct ip6_hdr)] __attribute__((aligned(8)));
1982 struct ip6_hdr *ip6;
1983 uint32_t offset, mlen, hlen, olen, sw_csum;
1984 uint16_t csum, ulpoff, plen;
1985 uint8_t nxt;
1986
1987 _CASSERT(sizeof(csum) == sizeof(uint16_t));
1988 VERIFY(m->m_flags & M_PKTHDR);
1989
1990 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
1991
1992 if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0) {
1993 goto done;
1994 }
1995
1996 mlen = m->m_pkthdr.len; /* total mbuf len */
1997 hlen = sizeof(*ip6); /* IPv6 header len */
1998
1999 /* sanity check (need at least IPv6 header) */
2000 if (mlen < (hoff + hlen)) {
2001 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
2002 "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
2003 /* NOTREACHED */
2004 }
2005
2006 /*
2007 * In case the IPv6 header is not contiguous, or not 32-bit
2008 * aligned, copy it to a local buffer.
2009 */
2010 if ((hoff + hlen) > m->m_len ||
2011 !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
2012 m_copydata(m, hoff, hlen, (caddr_t)buf);
2013 ip6 = (struct ip6_hdr *)(void *)buf;
2014 } else {
2015 ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
2016 }
2017
2018 nxt = ip6->ip6_nxt;
2019 plen = ntohs(ip6->ip6_plen);
2020 if (plen != (mlen - (hoff + hlen))) {
2021 plen = OSSwapInt16(plen);
2022 if (plen != (mlen - (hoff + hlen))) {
2023 /* Don't complain for jumbograms */
2024 if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
2025 printf("%s: mbuf 0x%llx proto %d IPv6 "
2026 "plen %d (%x) [swapped %d (%x)] doesn't "
2027 "match actual packet length; %d is used "
2028 "instead\n", __func__,
2029 (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
2030 ip6->ip6_plen, ip6->ip6_plen, plen, plen,
2031 (mlen - (hoff + hlen)));
2032 }
2033 plen = mlen - (hoff + hlen);
2034 }
2035 }
2036
2037 if (optlen < 0) {
2038 /* next header isn't TCP/UDP and we don't know optlen, bail */
2039 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2040 sw_csum = 0;
2041 goto done;
2042 }
2043 olen = 0;
2044 } else {
2045 /* caller supplied the original transport number; use it */
2046 if (nxt0 >= 0) {
2047 nxt = nxt0;
2048 }
2049 olen = optlen;
2050 }
2051
2052 offset = hoff + hlen + olen; /* ULP header */
2053
2054 /* sanity check */
2055 if (mlen < offset) {
2056 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
2057 "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
2058 /* NOTREACHED */
2059 }
2060
2061 /*
2062 * offset is added to the lower 16-bit value of csum_data,
2063 * which is expected to contain the ULP offset; therefore
2064 * CSUM_PARTIAL offset adjustment must be undone.
2065 */
2066 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL | CSUM_DATA_VALID)) ==
2067 (CSUM_PARTIAL | CSUM_DATA_VALID)) {
2068 /*
2069 * Get back the original ULP offset (this will
2070 * undo the CSUM_PARTIAL logic in ip6_output.)
2071 */
2072 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
2073 m->m_pkthdr.csum_tx_start);
2074 }
2075
2076 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
2077
2078 if (mlen < (ulpoff + sizeof(csum))) {
2079 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2080 "cksum offset (%u) cksum flags 0x%x\n", __func__,
2081 m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
2082 /* NOTREACHED */
2083 }
2084
2085 csum = inet6_cksum(m, 0, offset, plen - olen);
2086
2087 /* Update stats */
2088 ip6_out_cksum_stats(nxt, plen - olen);
2089
2090 /* RFC1122 4.1.3.4 */
2091 if (csum == 0 &&
2092 (m->m_pkthdr.csum_flags & (CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
2093 csum = 0xffff;
2094 }
2095
2096 /* Insert the checksum in the ULP csum field */
2097 offset += ulpoff;
2098 if ((offset + sizeof(csum)) > m->m_len) {
2099 m_copyback(m, offset, sizeof(csum), &csum);
2100 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
2101 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
2102 } else {
2103 bcopy(&csum, (mtod(m, char *) + offset), sizeof(csum));
2104 }
2105 m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID |
2106 CSUM_PARTIAL | CSUM_ZERO_INVERT);
2107
2108 done:
2109 return sw_csum;
2110 }
2111
2112 /*
2113 * Insert jumbo payload option.
2114 */
2115 static int
2116 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
2117 {
2118 struct mbuf *mopt;
2119 u_char *optbuf;
2120 u_int32_t v;
2121
2122 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
2123
2124 /*
2125 * If there is no hop-by-hop options header, allocate new one.
2126 * If there is one but it doesn't have enough space to store the
2127 * jumbo payload option, allocate a cluster to store the whole options.
2128 * Otherwise, use it to store the options.
2129 */
2130 if (exthdrs->ip6e_hbh == NULL) {
2131 MGET(mopt, M_DONTWAIT, MT_DATA);
2132 if (mopt == NULL) {
2133 return ENOBUFS;
2134 }
2135 mopt->m_len = JUMBOOPTLEN;
2136 optbuf = mtod(mopt, u_char *);
2137 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
2138 exthdrs->ip6e_hbh = mopt;
2139 } else {
2140 struct ip6_hbh *hbh;
2141
2142 mopt = exthdrs->ip6e_hbh;
2143 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
2144 /*
2145 * XXX assumption:
2146 * - exthdrs->ip6e_hbh is not referenced from places
2147 * other than exthdrs.
2148 * - exthdrs->ip6e_hbh is not an mbuf chain.
2149 */
2150 u_int32_t oldoptlen = mopt->m_len;
2151 struct mbuf *n;
2152
2153 /*
2154 * XXX: give up if the whole (new) hbh header does
2155 * not fit even in an mbuf cluster.
2156 */
2157 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) {
2158 return ENOBUFS;
2159 }
2160
2161 /*
2162 * As a consequence, we must always prepare a cluster
2163 * at this point.
2164 */
2165 MGET(n, M_DONTWAIT, MT_DATA);
2166 if (n != NULL) {
2167 MCLGET(n, M_DONTWAIT);
2168 if (!(n->m_flags & M_EXT)) {
2169 m_freem(n);
2170 n = NULL;
2171 }
2172 }
2173 if (n == NULL) {
2174 return ENOBUFS;
2175 }
2176 n->m_len = oldoptlen + JUMBOOPTLEN;
2177 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
2178 oldoptlen);
2179 optbuf = mtod(n, u_char *) + oldoptlen;
2180 m_freem(mopt);
2181 mopt = exthdrs->ip6e_hbh = n;
2182 } else {
2183 optbuf = mtod(mopt, u_char *) + mopt->m_len;
2184 mopt->m_len += JUMBOOPTLEN;
2185 }
2186 optbuf[0] = IP6OPT_PADN;
2187 optbuf[1] = 1;
2188
2189 /*
2190 * Adjust the header length according to the pad and
2191 * the jumbo payload option.
2192 */
2193 hbh = mtod(mopt, struct ip6_hbh *);
2194 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
2195 }
2196
2197 /* fill in the option. */
2198 optbuf[2] = IP6OPT_JUMBO;
2199 optbuf[3] = 4;
2200 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
2201 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
2202
2203 /* finally, adjust the packet header length */
2204 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
2205
2206 return 0;
2207 #undef JUMBOOPTLEN
2208 }
2209
2210 /*
2211 * Insert fragment header and copy unfragmentable header portions.
2212 */
2213 static int
2214 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
2215 struct ip6_frag **frghdrp)
2216 {
2217 struct mbuf *n, *mlast;
2218
2219 if (hlen > sizeof(struct ip6_hdr)) {
2220 n = m_copym(m0, sizeof(struct ip6_hdr),
2221 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
2222 if (n == NULL) {
2223 return ENOBUFS;
2224 }
2225 m->m_next = n;
2226 } else {
2227 n = m;
2228 }
2229
2230 /* Search for the last mbuf of unfragmentable part. */
2231 for (mlast = n; mlast->m_next; mlast = mlast->m_next) {
2232 ;
2233 }
2234
2235 if (!(mlast->m_flags & M_EXT) &&
2236 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
2237 /* use the trailing space of the last mbuf for the frag hdr */
2238 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
2239 mlast->m_len);
2240 mlast->m_len += sizeof(struct ip6_frag);
2241 m->m_pkthdr.len += sizeof(struct ip6_frag);
2242 } else {
2243 /* allocate a new mbuf for the fragment header */
2244 struct mbuf *mfrg;
2245
2246 MGET(mfrg, M_DONTWAIT, MT_DATA);
2247 if (mfrg == NULL) {
2248 return ENOBUFS;
2249 }
2250 mfrg->m_len = sizeof(struct ip6_frag);
2251 *frghdrp = mtod(mfrg, struct ip6_frag *);
2252 mlast->m_next = mfrg;
2253 }
2254
2255 return 0;
2256 }
2257
2258 static int
2259 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
2260 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
2261 boolean_t *alwaysfragp)
2262 {
2263 u_int32_t mtu = 0;
2264 boolean_t alwaysfrag = FALSE;
2265 int error = 0;
2266 boolean_t is_local = FALSE;
2267
2268 if (IN6_IS_SCOPE_LINKLOCAL(dst)) {
2269 is_local = TRUE;
2270 }
2271
2272 if (ro_pmtu != ro) {
2273 /* The first hop and the final destination may differ. */
2274 struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
2275 if (ROUTE_UNUSABLE(ro_pmtu) ||
2276 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) {
2277 ROUTE_RELEASE(ro_pmtu);
2278 }
2279
2280 if (ro_pmtu->ro_rt == NULL) {
2281 bzero(sa6_dst, sizeof(*sa6_dst));
2282 sa6_dst->sin6_family = AF_INET6;
2283 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
2284 sa6_dst->sin6_addr = *dst;
2285
2286 rtalloc_scoped((struct route *)ro_pmtu,
2287 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
2288 }
2289 }
2290
2291 if (ro_pmtu->ro_rt != NULL) {
2292 u_int32_t ifmtu;
2293
2294 if (ifp == NULL) {
2295 ifp = ro_pmtu->ro_rt->rt_ifp;
2296 }
2297 /* Access without acquiring nd_ifinfo lock for performance */
2298 ifmtu = IN6_LINKMTU(ifp);
2299
2300 /*
2301 * Access rmx_mtu without holding the route entry lock,
2302 * for performance; this isn't something that changes
2303 * often, so optimize.
2304 */
2305 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2306 if (mtu > ifmtu || mtu == 0) {
2307 /*
2308 * The MTU on the route is larger than the MTU on
2309 * the interface! This shouldn't happen, unless the
2310 * MTU of the interface has been changed after the
2311 * interface was brought up. Change the MTU in the
2312 * route to match the interface MTU (as long as the
2313 * field isn't locked).
2314 *
2315 * if MTU on the route is 0, we need to fix the MTU.
2316 * this case happens with path MTU discovery timeouts.
2317 */
2318 mtu = ifmtu;
2319 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) {
2320 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
2321 }
2322 } else if (mtu < IPV6_MMTU) {
2323 /*
2324 * RFC2460 section 5, last paragraph:
2325 * if we record ICMPv6 too big message with
2326 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
2327 * or smaller, with framgent header attached.
2328 * (fragment header is needed regardless from the
2329 * packet size, for translators to identify packets)
2330 */
2331 alwaysfrag = TRUE;
2332 mtu = IPV6_MMTU;
2333 }
2334 } else {
2335 if (ifp) {
2336 /* Don't hold nd_ifinfo lock for performance */
2337 mtu = IN6_LINKMTU(ifp);
2338 } else {
2339 error = EHOSTUNREACH; /* XXX */
2340 }
2341 }
2342
2343 *mtup = mtu;
2344 if ((alwaysfragp != NULL) && !is_local) {
2345 *alwaysfragp = alwaysfrag;
2346 }
2347 return error;
2348 }
2349
2350 /*
2351 * IP6 socket option processing.
2352 */
2353 int
2354 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
2355 {
2356 int optdatalen, uproto;
2357 void *optdata;
2358 int privileged;
2359 struct inpcb *in6p = sotoinpcb(so);
2360 int error = 0, optval = 0;
2361 int level, op = -1, optname = 0;
2362 int optlen = 0;
2363 struct proc *p;
2364
2365 VERIFY(sopt != NULL);
2366
2367 level = sopt->sopt_level;
2368 op = sopt->sopt_dir;
2369 optname = sopt->sopt_name;
2370 optlen = sopt->sopt_valsize;
2371 p = sopt->sopt_p;
2372 uproto = (int)SOCK_PROTO(so);
2373
2374 privileged = (proc_suser(p) == 0);
2375
2376 if (level == IPPROTO_IPV6) {
2377 boolean_t capture_exthdrstat_in = FALSE;
2378 switch (op) {
2379 case SOPT_SET:
2380 switch (optname) {
2381 case IPV6_2292PKTOPTIONS: {
2382 struct mbuf *m;
2383
2384 error = soopt_getm(sopt, &m);
2385 if (error != 0) {
2386 break;
2387 }
2388 error = soopt_mcopyin(sopt, m);
2389 if (error != 0) {
2390 break;
2391 }
2392 error = ip6_pcbopts(&in6p->in6p_outputopts,
2393 m, so, sopt);
2394 m_freem(m);
2395 break;
2396 }
2397
2398 /*
2399 * Use of some Hop-by-Hop options or some
2400 * Destination options, might require special
2401 * privilege. That is, normal applications
2402 * (without special privilege) might be forbidden
2403 * from setting certain options in outgoing packets,
2404 * and might never see certain options in received
2405 * packets. [RFC 2292 Section 6]
2406 * KAME specific note:
2407 * KAME prevents non-privileged users from sending or
2408 * receiving ANY hbh/dst options in order to avoid
2409 * overhead of parsing options in the kernel.
2410 */
2411 case IPV6_RECVHOPOPTS:
2412 case IPV6_RECVDSTOPTS:
2413 case IPV6_RECVRTHDRDSTOPTS:
2414 if (!privileged) {
2415 break;
2416 }
2417 /* FALLTHROUGH */
2418 case IPV6_UNICAST_HOPS:
2419 case IPV6_HOPLIMIT:
2420 case IPV6_RECVPKTINFO:
2421 case IPV6_RECVHOPLIMIT:
2422 case IPV6_RECVRTHDR:
2423 case IPV6_RECVPATHMTU:
2424 case IPV6_RECVTCLASS:
2425 case IPV6_V6ONLY:
2426 case IPV6_AUTOFLOWLABEL:
2427 if (optlen != sizeof(int)) {
2428 error = EINVAL;
2429 break;
2430 }
2431 error = sooptcopyin(sopt, &optval,
2432 sizeof(optval), sizeof(optval));
2433 if (error) {
2434 break;
2435 }
2436
2437 switch (optname) {
2438 case IPV6_UNICAST_HOPS:
2439 if (optval < -1 || optval >= 256) {
2440 error = EINVAL;
2441 } else {
2442 /* -1 = kernel default */
2443 in6p->in6p_hops = optval;
2444 if (in6p->inp_vflag &
2445 INP_IPV4) {
2446 in6p->inp_ip_ttl =
2447 optval;
2448 }
2449 }
2450 break;
2451 #define OPTSET(bit) do { \
2452 if (optval) \
2453 in6p->inp_flags |= (bit); \
2454 else \
2455 in6p->inp_flags &= ~(bit); \
2456 } while (0)
2457
2458 #define OPTSET2292(bit) do { \
2459 in6p->inp_flags |= IN6P_RFC2292; \
2460 if (optval) \
2461 in6p->inp_flags |= (bit); \
2462 else \
2463 in6p->inp_flags &= ~(bit); \
2464 } while (0)
2465
2466 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
2467
2468 case IPV6_RECVPKTINFO:
2469 /* cannot mix with RFC2292 */
2470 if (OPTBIT(IN6P_RFC2292)) {
2471 error = EINVAL;
2472 break;
2473 }
2474 OPTSET(IN6P_PKTINFO);
2475 break;
2476
2477 case IPV6_HOPLIMIT: {
2478 struct ip6_pktopts **optp;
2479
2480 /* cannot mix with RFC2292 */
2481 if (OPTBIT(IN6P_RFC2292)) {
2482 error = EINVAL;
2483 break;
2484 }
2485 optp = &in6p->in6p_outputopts;
2486 error = ip6_pcbopt(IPV6_HOPLIMIT,
2487 (u_char *)&optval, sizeof(optval),
2488 optp, uproto);
2489 break;
2490 }
2491
2492 case IPV6_RECVHOPLIMIT:
2493 /* cannot mix with RFC2292 */
2494 if (OPTBIT(IN6P_RFC2292)) {
2495 error = EINVAL;
2496 break;
2497 }
2498 OPTSET(IN6P_HOPLIMIT);
2499 break;
2500
2501 case IPV6_RECVHOPOPTS:
2502 /* cannot mix with RFC2292 */
2503 if (OPTBIT(IN6P_RFC2292)) {
2504 error = EINVAL;
2505 break;
2506 }
2507 OPTSET(IN6P_HOPOPTS);
2508 capture_exthdrstat_in = TRUE;
2509 break;
2510
2511 case IPV6_RECVDSTOPTS:
2512 /* cannot mix with RFC2292 */
2513 if (OPTBIT(IN6P_RFC2292)) {
2514 error = EINVAL;
2515 break;
2516 }
2517 OPTSET(IN6P_DSTOPTS);
2518 capture_exthdrstat_in = TRUE;
2519 break;
2520
2521 case IPV6_RECVRTHDRDSTOPTS:
2522 /* cannot mix with RFC2292 */
2523 if (OPTBIT(IN6P_RFC2292)) {
2524 error = EINVAL;
2525 break;
2526 }
2527 OPTSET(IN6P_RTHDRDSTOPTS);
2528 capture_exthdrstat_in = TRUE;
2529 break;
2530
2531 case IPV6_RECVRTHDR:
2532 /* cannot mix with RFC2292 */
2533 if (OPTBIT(IN6P_RFC2292)) {
2534 error = EINVAL;
2535 break;
2536 }
2537 OPTSET(IN6P_RTHDR);
2538 capture_exthdrstat_in = TRUE;
2539 break;
2540
2541 case IPV6_RECVPATHMTU:
2542 /*
2543 * We ignore this option for TCP
2544 * sockets.
2545 * (RFC3542 leaves this case
2546 * unspecified.)
2547 */
2548 if (uproto != IPPROTO_TCP) {
2549 OPTSET(IN6P_MTU);
2550 }
2551 break;
2552
2553 case IPV6_V6ONLY:
2554 /*
2555 * make setsockopt(IPV6_V6ONLY)
2556 * available only prior to bind(2).
2557 * see ipng mailing list, Jun 22 2001.
2558 */
2559 if (in6p->inp_lport ||
2560 !IN6_IS_ADDR_UNSPECIFIED(
2561 &in6p->in6p_laddr)) {
2562 error = EINVAL;
2563 break;
2564 }
2565 OPTSET(IN6P_IPV6_V6ONLY);
2566 if (optval) {
2567 in6p->inp_vflag &= ~INP_IPV4;
2568 } else {
2569 in6p->inp_vflag |= INP_IPV4;
2570 }
2571 break;
2572
2573 case IPV6_RECVTCLASS:
2574 /* we can mix with RFC2292 */
2575 OPTSET(IN6P_TCLASS);
2576 break;
2577
2578 case IPV6_AUTOFLOWLABEL:
2579 OPTSET(IN6P_AUTOFLOWLABEL);
2580 break;
2581 }
2582 break;
2583
2584 case IPV6_TCLASS:
2585 case IPV6_DONTFRAG:
2586 case IPV6_USE_MIN_MTU:
2587 case IPV6_PREFER_TEMPADDR: {
2588 struct ip6_pktopts **optp;
2589
2590 if (optlen != sizeof(optval)) {
2591 error = EINVAL;
2592 break;
2593 }
2594 error = sooptcopyin(sopt, &optval,
2595 sizeof(optval), sizeof(optval));
2596 if (error) {
2597 break;
2598 }
2599
2600 optp = &in6p->in6p_outputopts;
2601 error = ip6_pcbopt(optname, (u_char *)&optval,
2602 sizeof(optval), optp, uproto);
2603
2604 if (optname == IPV6_TCLASS) {
2605 // Add in the ECN flags
2606 u_int8_t tos = (in6p->inp_ip_tos & ~IPTOS_ECN_MASK);
2607 u_int8_t ecn = optval & IPTOS_ECN_MASK;
2608 in6p->inp_ip_tos = tos | ecn;
2609 }
2610 break;
2611 }
2612
2613 case IPV6_2292PKTINFO:
2614 case IPV6_2292HOPLIMIT:
2615 case IPV6_2292HOPOPTS:
2616 case IPV6_2292DSTOPTS:
2617 case IPV6_2292RTHDR:
2618 /* RFC 2292 */
2619 if (optlen != sizeof(int)) {
2620 error = EINVAL;
2621 break;
2622 }
2623 error = sooptcopyin(sopt, &optval,
2624 sizeof(optval), sizeof(optval));
2625 if (error) {
2626 break;
2627 }
2628 switch (optname) {
2629 case IPV6_2292PKTINFO:
2630 OPTSET2292(IN6P_PKTINFO);
2631 break;
2632 case IPV6_2292HOPLIMIT:
2633 OPTSET2292(IN6P_HOPLIMIT);
2634 break;
2635 case IPV6_2292HOPOPTS:
2636 /*
2637 * Check super-user privilege.
2638 * See comments for IPV6_RECVHOPOPTS.
2639 */
2640 if (!privileged) {
2641 return EPERM;
2642 }
2643 OPTSET2292(IN6P_HOPOPTS);
2644 capture_exthdrstat_in = TRUE;
2645 break;
2646 case IPV6_2292DSTOPTS:
2647 if (!privileged) {
2648 return EPERM;
2649 }
2650 OPTSET2292(IN6P_DSTOPTS |
2651 IN6P_RTHDRDSTOPTS); /* XXX */
2652 capture_exthdrstat_in = TRUE;
2653 break;
2654 case IPV6_2292RTHDR:
2655 OPTSET2292(IN6P_RTHDR);
2656 capture_exthdrstat_in = TRUE;
2657 break;
2658 }
2659 break;
2660
2661 case IPV6_3542PKTINFO:
2662 case IPV6_3542HOPOPTS:
2663 case IPV6_3542RTHDR:
2664 case IPV6_3542DSTOPTS:
2665 case IPV6_RTHDRDSTOPTS:
2666 case IPV6_3542NEXTHOP: {
2667 struct ip6_pktopts **optp;
2668 /* new advanced API (RFC3542) */
2669 struct mbuf *m;
2670
2671 /* cannot mix with RFC2292 */
2672 if (OPTBIT(IN6P_RFC2292)) {
2673 error = EINVAL;
2674 break;
2675 }
2676 error = soopt_getm(sopt, &m);
2677 if (error != 0) {
2678 break;
2679 }
2680 error = soopt_mcopyin(sopt, m);
2681 if (error != 0) {
2682 break;
2683 }
2684
2685 optp = &in6p->in6p_outputopts;
2686 error = ip6_pcbopt(optname, mtod(m, u_char *),
2687 m->m_len, optp, uproto);
2688 m_freem(m);
2689 break;
2690 }
2691 #undef OPTSET
2692 case IPV6_MULTICAST_IF:
2693 case IPV6_MULTICAST_HOPS:
2694 case IPV6_MULTICAST_LOOP:
2695 case IPV6_JOIN_GROUP:
2696 case IPV6_LEAVE_GROUP:
2697 case IPV6_MSFILTER:
2698 case MCAST_BLOCK_SOURCE:
2699 case MCAST_UNBLOCK_SOURCE:
2700 case MCAST_JOIN_GROUP:
2701 case MCAST_LEAVE_GROUP:
2702 case MCAST_JOIN_SOURCE_GROUP:
2703 case MCAST_LEAVE_SOURCE_GROUP:
2704 error = ip6_setmoptions(in6p, sopt);
2705 break;
2706
2707 case IPV6_PORTRANGE:
2708 error = sooptcopyin(sopt, &optval,
2709 sizeof(optval), sizeof(optval));
2710 if (error) {
2711 break;
2712 }
2713
2714 switch (optval) {
2715 case IPV6_PORTRANGE_DEFAULT:
2716 in6p->inp_flags &= ~(INP_LOWPORT);
2717 in6p->inp_flags &= ~(INP_HIGHPORT);
2718 break;
2719
2720 case IPV6_PORTRANGE_HIGH:
2721 in6p->inp_flags &= ~(INP_LOWPORT);
2722 in6p->inp_flags |= INP_HIGHPORT;
2723 break;
2724
2725 case IPV6_PORTRANGE_LOW:
2726 in6p->inp_flags &= ~(INP_HIGHPORT);
2727 in6p->inp_flags |= INP_LOWPORT;
2728 break;
2729
2730 default:
2731 error = EINVAL;
2732 break;
2733 }
2734 break;
2735 #if IPSEC
2736 case IPV6_IPSEC_POLICY: {
2737 caddr_t req = NULL;
2738 size_t len = 0;
2739 struct mbuf *m;
2740
2741 if ((error = soopt_getm(sopt, &m)) != 0) {
2742 break;
2743 }
2744 if ((error = soopt_mcopyin(sopt, m)) != 0) {
2745 break;
2746 }
2747
2748 req = mtod(m, caddr_t);
2749 len = m->m_len;
2750 error = ipsec6_set_policy(in6p, optname, req,
2751 len, privileged);
2752 m_freem(m);
2753 break;
2754 }
2755 #endif /* IPSEC */
2756 /*
2757 * IPv6 variant of IP_BOUND_IF; for details see
2758 * comments on IP_BOUND_IF in ip_ctloutput().
2759 */
2760 case IPV6_BOUND_IF:
2761 /* This option is settable only on IPv6 */
2762 if (!(in6p->inp_vflag & INP_IPV6)) {
2763 error = EINVAL;
2764 break;
2765 }
2766
2767 error = sooptcopyin(sopt, &optval,
2768 sizeof(optval), sizeof(optval));
2769
2770 if (error) {
2771 break;
2772 }
2773
2774 error = inp_bindif(in6p, optval, NULL);
2775 break;
2776
2777 case IPV6_NO_IFT_CELLULAR:
2778 /* This option is settable only for IPv6 */
2779 if (!(in6p->inp_vflag & INP_IPV6)) {
2780 error = EINVAL;
2781 break;
2782 }
2783
2784 error = sooptcopyin(sopt, &optval,
2785 sizeof(optval), sizeof(optval));
2786
2787 if (error) {
2788 break;
2789 }
2790
2791 /* once set, it cannot be unset */
2792 if (!optval && INP_NO_CELLULAR(in6p)) {
2793 error = EINVAL;
2794 break;
2795 }
2796
2797 error = so_set_restrictions(so,
2798 SO_RESTRICT_DENY_CELLULAR);
2799 break;
2800
2801 case IPV6_OUT_IF:
2802 /* This option is not settable */
2803 error = EINVAL;
2804 break;
2805
2806 default:
2807 error = ENOPROTOOPT;
2808 break;
2809 }
2810 if (capture_exthdrstat_in) {
2811 if (uproto == IPPROTO_TCP) {
2812 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_in);
2813 } else if (uproto == IPPROTO_UDP) {
2814 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_in);
2815 }
2816 }
2817 break;
2818
2819 case SOPT_GET:
2820 switch (optname) {
2821 case IPV6_2292PKTOPTIONS:
2822 /*
2823 * RFC3542 (effectively) deprecated the
2824 * semantics of the 2292-style pktoptions.
2825 * Since it was not reliable in nature (i.e.,
2826 * applications had to expect the lack of some
2827 * information after all), it would make sense
2828 * to simplify this part by always returning
2829 * empty data.
2830 */
2831 sopt->sopt_valsize = 0;
2832 break;
2833
2834 case IPV6_RECVHOPOPTS:
2835 case IPV6_RECVDSTOPTS:
2836 case IPV6_RECVRTHDRDSTOPTS:
2837 case IPV6_UNICAST_HOPS:
2838 case IPV6_RECVPKTINFO:
2839 case IPV6_RECVHOPLIMIT:
2840 case IPV6_RECVRTHDR:
2841 case IPV6_RECVPATHMTU:
2842 case IPV6_V6ONLY:
2843 case IPV6_PORTRANGE:
2844 case IPV6_RECVTCLASS:
2845 case IPV6_AUTOFLOWLABEL:
2846 switch (optname) {
2847 case IPV6_RECVHOPOPTS:
2848 optval = OPTBIT(IN6P_HOPOPTS);
2849 break;
2850
2851 case IPV6_RECVDSTOPTS:
2852 optval = OPTBIT(IN6P_DSTOPTS);
2853 break;
2854
2855 case IPV6_RECVRTHDRDSTOPTS:
2856 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2857 break;
2858
2859 case IPV6_UNICAST_HOPS:
2860 optval = in6p->in6p_hops;
2861 break;
2862
2863 case IPV6_RECVPKTINFO:
2864 optval = OPTBIT(IN6P_PKTINFO);
2865 break;
2866
2867 case IPV6_RECVHOPLIMIT:
2868 optval = OPTBIT(IN6P_HOPLIMIT);
2869 break;
2870
2871 case IPV6_RECVRTHDR:
2872 optval = OPTBIT(IN6P_RTHDR);
2873 break;
2874
2875 case IPV6_RECVPATHMTU:
2876 optval = OPTBIT(IN6P_MTU);
2877 break;
2878
2879 case IPV6_V6ONLY:
2880 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2881 break;
2882
2883 case IPV6_PORTRANGE: {
2884 int flags;
2885 flags = in6p->inp_flags;
2886 if (flags & INP_HIGHPORT) {
2887 optval = IPV6_PORTRANGE_HIGH;
2888 } else if (flags & INP_LOWPORT) {
2889 optval = IPV6_PORTRANGE_LOW;
2890 } else {
2891 optval = 0;
2892 }
2893 break;
2894 }
2895 case IPV6_RECVTCLASS:
2896 optval = OPTBIT(IN6P_TCLASS);
2897 break;
2898
2899 case IPV6_AUTOFLOWLABEL:
2900 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2901 break;
2902 }
2903 if (error) {
2904 break;
2905 }
2906 error = sooptcopyout(sopt, &optval,
2907 sizeof(optval));
2908 break;
2909
2910 case IPV6_PATHMTU: {
2911 u_int32_t pmtu = 0;
2912 struct ip6_mtuinfo mtuinfo;
2913 struct route_in6 sro;
2914
2915 bzero(&sro, sizeof(sro));
2916
2917 if (!(so->so_state & SS_ISCONNECTED)) {
2918 return ENOTCONN;
2919 }
2920 /*
2921 * XXX: we dot not consider the case of source
2922 * routing, or optional information to specify
2923 * the outgoing interface.
2924 */
2925 error = ip6_getpmtu(&sro, NULL, NULL,
2926 &in6p->in6p_faddr, &pmtu, NULL);
2927 ROUTE_RELEASE(&sro);
2928 if (error) {
2929 break;
2930 }
2931 if (pmtu > IPV6_MAXPACKET) {
2932 pmtu = IPV6_MAXPACKET;
2933 }
2934
2935 bzero(&mtuinfo, sizeof(mtuinfo));
2936 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2937 optdata = (void *)&mtuinfo;
2938 optdatalen = sizeof(mtuinfo);
2939 error = sooptcopyout(sopt, optdata,
2940 optdatalen);
2941 break;
2942 }
2943
2944 case IPV6_2292PKTINFO:
2945 case IPV6_2292HOPLIMIT:
2946 case IPV6_2292HOPOPTS:
2947 case IPV6_2292RTHDR:
2948 case IPV6_2292DSTOPTS:
2949 switch (optname) {
2950 case IPV6_2292PKTINFO:
2951 optval = OPTBIT(IN6P_PKTINFO);
2952 break;
2953 case IPV6_2292HOPLIMIT:
2954 optval = OPTBIT(IN6P_HOPLIMIT);
2955 break;
2956 case IPV6_2292HOPOPTS:
2957 optval = OPTBIT(IN6P_HOPOPTS);
2958 break;
2959 case IPV6_2292RTHDR:
2960 optval = OPTBIT(IN6P_RTHDR);
2961 break;
2962 case IPV6_2292DSTOPTS:
2963 optval = OPTBIT(IN6P_DSTOPTS |
2964 IN6P_RTHDRDSTOPTS);
2965 break;
2966 }
2967 error = sooptcopyout(sopt, &optval,
2968 sizeof(optval));
2969 break;
2970
2971 case IPV6_PKTINFO:
2972 case IPV6_HOPOPTS:
2973 case IPV6_RTHDR:
2974 case IPV6_DSTOPTS:
2975 case IPV6_RTHDRDSTOPTS:
2976 case IPV6_NEXTHOP:
2977 case IPV6_TCLASS:
2978 case IPV6_DONTFRAG:
2979 case IPV6_USE_MIN_MTU:
2980 case IPV6_PREFER_TEMPADDR:
2981 error = ip6_getpcbopt(in6p->in6p_outputopts,
2982 optname, sopt);
2983 break;
2984
2985 case IPV6_MULTICAST_IF:
2986 case IPV6_MULTICAST_HOPS:
2987 case IPV6_MULTICAST_LOOP:
2988 case IPV6_MSFILTER:
2989 error = ip6_getmoptions(in6p, sopt);
2990 break;
2991 #if IPSEC
2992 case IPV6_IPSEC_POLICY: {
2993 error = 0; /* This option is no longer supported */
2994 break;
2995 }
2996 #endif /* IPSEC */
2997 case IPV6_BOUND_IF:
2998 if (in6p->inp_flags & INP_BOUND_IF) {
2999 optval = in6p->inp_boundifp->if_index;
3000 }
3001 error = sooptcopyout(sopt, &optval,
3002 sizeof(optval));
3003 break;
3004
3005 case IPV6_NO_IFT_CELLULAR:
3006 optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
3007 error = sooptcopyout(sopt, &optval,
3008 sizeof(optval));
3009 break;
3010
3011 case IPV6_OUT_IF:
3012 optval = (in6p->in6p_last_outifp != NULL) ?
3013 in6p->in6p_last_outifp->if_index : 0;
3014 error = sooptcopyout(sopt, &optval,
3015 sizeof(optval));
3016 break;
3017
3018 default:
3019 error = ENOPROTOOPT;
3020 break;
3021 }
3022 break;
3023 }
3024 } else if (level == IPPROTO_UDP) {
3025 error = udp_ctloutput(so, sopt);
3026 } else {
3027 error = EINVAL;
3028 }
3029 return error;
3030 }
3031
3032 int
3033 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
3034 {
3035 int error = 0, optval, optlen;
3036 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
3037 struct inpcb *in6p = sotoinpcb(so);
3038 int level, op, optname;
3039
3040 level = sopt->sopt_level;
3041 op = sopt->sopt_dir;
3042 optname = sopt->sopt_name;
3043 optlen = sopt->sopt_valsize;
3044
3045 if (level != IPPROTO_IPV6) {
3046 return EINVAL;
3047 }
3048
3049 switch (optname) {
3050 case IPV6_CHECKSUM:
3051 /*
3052 * For ICMPv6 sockets, no modification allowed for checksum
3053 * offset, permit "no change" values to help existing apps.
3054 *
3055 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
3056 * for an ICMPv6 socket will fail."
3057 * The current behavior does not meet RFC3542.
3058 */
3059 switch (op) {
3060 case SOPT_SET:
3061 if (optlen != sizeof(int)) {
3062 error = EINVAL;
3063 break;
3064 }
3065 error = sooptcopyin(sopt, &optval, sizeof(optval),
3066 sizeof(optval));
3067 if (error) {
3068 break;
3069 }
3070 if ((optval % 2) != 0) {
3071 /* the API assumes even offset values */
3072 error = EINVAL;
3073 } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
3074 if (optval != icmp6off) {
3075 error = EINVAL;
3076 }
3077 } else {
3078 in6p->in6p_cksum = optval;
3079 }
3080 break;
3081
3082 case SOPT_GET:
3083 if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
3084 optval = icmp6off;
3085 } else {
3086 optval = in6p->in6p_cksum;
3087 }
3088
3089 error = sooptcopyout(sopt, &optval, sizeof(optval));
3090 break;
3091
3092 default:
3093 error = EINVAL;
3094 break;
3095 }
3096 break;
3097
3098 default:
3099 error = ENOPROTOOPT;
3100 break;
3101 }
3102
3103 return error;
3104 }
3105
3106 /*
3107 * Set up IP6 options in pcb for insertion in output packets or
3108 * specifying behavior of outgoing packets.
3109 */
3110 static int
3111 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
3112 struct sockopt *sopt)
3113 {
3114 #pragma unused(sopt)
3115 struct ip6_pktopts *opt = *pktopt;
3116 int error = 0;
3117
3118 /* turn off any old options. */
3119 if (opt != NULL) {
3120 #if DIAGNOSTIC
3121 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
3122 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
3123 opt->ip6po_rhinfo.ip6po_rhi_rthdr) {
3124 printf("%s: all specified options are cleared.\n",
3125 __func__);
3126 }
3127 #endif
3128 ip6_clearpktopts(opt, -1);
3129 } else {
3130 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
3131 if (opt == NULL) {
3132 return ENOBUFS;
3133 }
3134 }
3135 *pktopt = NULL;
3136
3137 if (m == NULL || m->m_len == 0) {
3138 /*
3139 * Only turning off any previous options, regardless of
3140 * whether the opt is just created or given.
3141 */
3142 if (opt != NULL) {
3143 FREE(opt, M_IP6OPT);
3144 }
3145 return 0;
3146 }
3147
3148 /* set options specified by user. */
3149 if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) {
3150 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
3151 FREE(opt, M_IP6OPT);
3152 return error;
3153 }
3154 *pktopt = opt;
3155 return 0;
3156 }
3157
3158 /*
3159 * initialize ip6_pktopts. beware that there are non-zero default values in
3160 * the struct.
3161 */
3162 void
3163 ip6_initpktopts(struct ip6_pktopts *opt)
3164 {
3165 bzero(opt, sizeof(*opt));
3166 opt->ip6po_hlim = -1; /* -1 means default hop limit */
3167 opt->ip6po_tclass = -1; /* -1 means default traffic class */
3168 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
3169 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
3170 }
3171
3172 static int
3173 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
3174 int uproto)
3175 {
3176 struct ip6_pktopts *opt;
3177
3178 opt = *pktopt;
3179 if (opt == NULL) {
3180 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
3181 if (opt == NULL) {
3182 return ENOBUFS;
3183 }
3184 ip6_initpktopts(opt);
3185 *pktopt = opt;
3186 }
3187
3188 return ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto);
3189 }
3190
3191 static int
3192 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
3193 {
3194 void *optdata = NULL;
3195 int optdatalen = 0;
3196 struct ip6_ext *ip6e;
3197 struct in6_pktinfo null_pktinfo;
3198 int deftclass = 0, on;
3199 int defminmtu = IP6PO_MINMTU_MCASTONLY;
3200 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
3201
3202
3203 switch (optname) {
3204 case IPV6_PKTINFO:
3205 if (pktopt && pktopt->ip6po_pktinfo) {
3206 optdata = (void *)pktopt->ip6po_pktinfo;
3207 } else {
3208 /* XXX: we don't have to do this every time... */
3209 bzero(&null_pktinfo, sizeof(null_pktinfo));
3210 optdata = (void *)&null_pktinfo;
3211 }
3212 optdatalen = sizeof(struct in6_pktinfo);
3213 break;
3214
3215 case IPV6_TCLASS:
3216 if (pktopt && pktopt->ip6po_tclass >= 0) {
3217 optdata = (void *)&pktopt->ip6po_tclass;
3218 } else {
3219 optdata = (void *)&deftclass;
3220 }
3221 optdatalen = sizeof(int);
3222 break;
3223
3224 case IPV6_HOPOPTS:
3225 if (pktopt && pktopt->ip6po_hbh) {
3226 optdata = (void *)pktopt->ip6po_hbh;
3227 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
3228 optdatalen = (ip6e->ip6e_len + 1) << 3;
3229 }
3230 break;
3231
3232 case IPV6_RTHDR:
3233 if (pktopt && pktopt->ip6po_rthdr) {
3234 optdata = (void *)pktopt->ip6po_rthdr;
3235 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
3236 optdatalen = (ip6e->ip6e_len + 1) << 3;
3237 }
3238 break;
3239
3240 case IPV6_RTHDRDSTOPTS:
3241 if (pktopt && pktopt->ip6po_dest1) {
3242 optdata = (void *)pktopt->ip6po_dest1;
3243 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
3244 optdatalen = (ip6e->ip6e_len + 1) << 3;
3245 }
3246 break;
3247
3248 case IPV6_DSTOPTS:
3249 if (pktopt && pktopt->ip6po_dest2) {
3250 optdata = (void *)pktopt->ip6po_dest2;
3251 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
3252 optdatalen = (ip6e->ip6e_len + 1) << 3;
3253 }
3254 break;
3255
3256 case IPV6_NEXTHOP:
3257 if (pktopt && pktopt->ip6po_nexthop) {
3258 optdata = (void *)pktopt->ip6po_nexthop;
3259 optdatalen = pktopt->ip6po_nexthop->sa_len;
3260 }
3261 break;
3262
3263 case IPV6_USE_MIN_MTU:
3264 if (pktopt) {
3265 optdata = (void *)&pktopt->ip6po_minmtu;
3266 } else {
3267 optdata = (void *)&defminmtu;
3268 }
3269 optdatalen = sizeof(int);
3270 break;
3271
3272 case IPV6_DONTFRAG:
3273 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) {
3274 on = 1;
3275 } else {
3276 on = 0;
3277 }
3278 optdata = (void *)&on;
3279 optdatalen = sizeof(on);
3280 break;
3281
3282 case IPV6_PREFER_TEMPADDR:
3283 if (pktopt) {
3284 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
3285 } else {
3286 optdata = (void *)&defpreftemp;
3287 }
3288 optdatalen = sizeof(int);
3289 break;
3290
3291 default: /* should not happen */
3292 #ifdef DIAGNOSTIC
3293 panic("ip6_getpcbopt: unexpected option\n");
3294 #endif
3295 return ENOPROTOOPT;
3296 }
3297
3298 return sooptcopyout(sopt, optdata, optdatalen);
3299 }
3300
3301 void
3302 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
3303 {
3304 if (pktopt == NULL) {
3305 return;
3306 }
3307
3308 if (optname == -1 || optname == IPV6_PKTINFO) {
3309 if (pktopt->ip6po_pktinfo) {
3310 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
3311 }
3312 pktopt->ip6po_pktinfo = NULL;
3313 }
3314 if (optname == -1 || optname == IPV6_HOPLIMIT) {
3315 pktopt->ip6po_hlim = -1;
3316 }
3317 if (optname == -1 || optname == IPV6_TCLASS) {
3318 pktopt->ip6po_tclass = -1;
3319 }
3320 if (optname == -1 || optname == IPV6_NEXTHOP) {
3321 ROUTE_RELEASE(&pktopt->ip6po_nextroute);
3322 if (pktopt->ip6po_nexthop) {
3323 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
3324 }
3325 pktopt->ip6po_nexthop = NULL;
3326 }
3327 if (optname == -1 || optname == IPV6_HOPOPTS) {
3328 if (pktopt->ip6po_hbh) {
3329 FREE(pktopt->ip6po_hbh, M_IP6OPT);
3330 }
3331 pktopt->ip6po_hbh = NULL;
3332 }
3333 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
3334 if (pktopt->ip6po_dest1) {
3335 FREE(pktopt->ip6po_dest1, M_IP6OPT);
3336 }
3337 pktopt->ip6po_dest1 = NULL;
3338 }
3339 if (optname == -1 || optname == IPV6_RTHDR) {
3340 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) {
3341 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
3342 }
3343 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
3344 ROUTE_RELEASE(&pktopt->ip6po_route);
3345 }
3346 if (optname == -1 || optname == IPV6_DSTOPTS) {
3347 if (pktopt->ip6po_dest2) {
3348 FREE(pktopt->ip6po_dest2, M_IP6OPT);
3349 }
3350 pktopt->ip6po_dest2 = NULL;
3351 }
3352 }
3353
3354 #define PKTOPT_EXTHDRCPY(type) do { \
3355 if (src->type) { \
3356 int hlen = \
3357 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \
3358 dst->type = _MALLOC(hlen, M_IP6OPT, canwait); \
3359 if (dst->type == NULL && canwait == M_NOWAIT) \
3360 goto bad; \
3361 bcopy(src->type, dst->type, hlen); \
3362 } \
3363 } while (0)
3364
3365 static int
3366 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
3367 {
3368 if (dst == NULL || src == NULL) {
3369 printf("copypktopts: invalid argument\n");
3370 return EINVAL;
3371 }
3372
3373 dst->ip6po_hlim = src->ip6po_hlim;
3374 dst->ip6po_tclass = src->ip6po_tclass;
3375 dst->ip6po_flags = src->ip6po_flags;
3376 if (src->ip6po_pktinfo) {
3377 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
3378 M_IP6OPT, canwait);
3379 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) {
3380 goto bad;
3381 }
3382 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3383 }
3384 if (src->ip6po_nexthop) {
3385 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
3386 M_IP6OPT, canwait);
3387 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) {
3388 goto bad;
3389 }
3390 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
3391 src->ip6po_nexthop->sa_len);
3392 }
3393 PKTOPT_EXTHDRCPY(ip6po_hbh);
3394 PKTOPT_EXTHDRCPY(ip6po_dest1);
3395 PKTOPT_EXTHDRCPY(ip6po_dest2);
3396 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
3397 return 0;
3398
3399 bad:
3400 ip6_clearpktopts(dst, -1);
3401 return ENOBUFS;
3402 }
3403 #undef PKTOPT_EXTHDRCPY
3404
3405 struct ip6_pktopts *
3406 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
3407 {
3408 int error;
3409 struct ip6_pktopts *dst;
3410
3411 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
3412 if (dst == NULL) {
3413 return NULL;
3414 }
3415 ip6_initpktopts(dst);
3416
3417 if ((error = copypktopts(dst, src, canwait)) != 0) {
3418 FREE(dst, M_IP6OPT);
3419 return NULL;
3420 }
3421
3422 return dst;
3423 }
3424
3425 void
3426 ip6_freepcbopts(struct ip6_pktopts *pktopt)
3427 {
3428 if (pktopt == NULL) {
3429 return;
3430 }
3431
3432 ip6_clearpktopts(pktopt, -1);
3433
3434 FREE(pktopt, M_IP6OPT);
3435 }
3436
3437 void
3438 ip6_moptions_init(void)
3439 {
3440 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof(im6o_debug));
3441
3442 im6o_size = (im6o_debug == 0) ? sizeof(struct ip6_moptions) :
3443 sizeof(struct ip6_moptions_dbg);
3444
3445 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
3446 IM6O_ZONE_NAME);
3447 if (im6o_zone == NULL) {
3448 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
3449 /* NOTREACHED */
3450 }
3451 zone_change(im6o_zone, Z_EXPAND, TRUE);
3452 }
3453
3454 void
3455 im6o_addref(struct ip6_moptions *im6o, int locked)
3456 {
3457 if (!locked) {
3458 IM6O_LOCK(im6o);
3459 } else {
3460 IM6O_LOCK_ASSERT_HELD(im6o);
3461 }
3462
3463 if (++im6o->im6o_refcnt == 0) {
3464 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
3465 /* NOTREACHED */
3466 } else if (im6o->im6o_trace != NULL) {
3467 (*im6o->im6o_trace)(im6o, TRUE);
3468 }
3469
3470 if (!locked) {
3471 IM6O_UNLOCK(im6o);
3472 }
3473 }
3474
3475 void
3476 im6o_remref(struct ip6_moptions *im6o)
3477 {
3478 int i;
3479
3480 IM6O_LOCK(im6o);
3481 if (im6o->im6o_refcnt == 0) {
3482 panic("%s: im6o %p negative refcnt", __func__, im6o);
3483 /* NOTREACHED */
3484 } else if (im6o->im6o_trace != NULL) {
3485 (*im6o->im6o_trace)(im6o, FALSE);
3486 }
3487
3488 --im6o->im6o_refcnt;
3489 if (im6o->im6o_refcnt > 0) {
3490 IM6O_UNLOCK(im6o);
3491 return;
3492 }
3493
3494 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3495 struct in6_mfilter *imf;
3496
3497 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
3498 if (imf != NULL) {
3499 im6f_leave(imf);
3500 }
3501
3502 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
3503
3504 if (imf != NULL) {
3505 im6f_purge(imf);
3506 }
3507
3508 IN6M_REMREF(im6o->im6o_membership[i]);
3509 im6o->im6o_membership[i] = NULL;
3510 }
3511 im6o->im6o_num_memberships = 0;
3512 if (im6o->im6o_mfilters != NULL) {
3513 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
3514 im6o->im6o_mfilters = NULL;
3515 }
3516 if (im6o->im6o_membership != NULL) {
3517 FREE(im6o->im6o_membership, M_IP6MOPTS);
3518 im6o->im6o_membership = NULL;
3519 }
3520 IM6O_UNLOCK(im6o);
3521
3522 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
3523
3524 if (!(im6o->im6o_debug & IFD_ALLOC)) {
3525 panic("%s: im6o %p cannot be freed", __func__, im6o);
3526 /* NOTREACHED */
3527 }
3528 zfree(im6o_zone, im6o);
3529 }
3530
3531 static void
3532 im6o_trace(struct ip6_moptions *im6o, int refhold)
3533 {
3534 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3535 ctrace_t *tr;
3536 u_int32_t idx;
3537 u_int16_t *cnt;
3538
3539 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3540 panic("%s: im6o %p has no debug structure", __func__, im6o);
3541 /* NOTREACHED */
3542 }
3543 if (refhold) {
3544 cnt = &im6o_dbg->im6o_refhold_cnt;
3545 tr = im6o_dbg->im6o_refhold;
3546 } else {
3547 cnt = &im6o_dbg->im6o_refrele_cnt;
3548 tr = im6o_dbg->im6o_refrele;
3549 }
3550
3551 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3552 ctrace_record(&tr[idx]);
3553 }
3554
3555 struct ip6_moptions *
3556 ip6_allocmoptions(int how)
3557 {
3558 struct ip6_moptions *im6o;
3559
3560 im6o = (how == M_WAITOK) ?
3561 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3562 if (im6o != NULL) {
3563 bzero(im6o, im6o_size);
3564 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3565 im6o->im6o_debug |= IFD_ALLOC;
3566 if (im6o_debug != 0) {
3567 im6o->im6o_debug |= IFD_DEBUG;
3568 im6o->im6o_trace = im6o_trace;
3569 }
3570 IM6O_ADDREF(im6o);
3571 }
3572
3573 return im6o;
3574 }
3575
3576 /*
3577 * Set IPv6 outgoing packet options based on advanced API.
3578 */
3579 int
3580 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3581 struct ip6_pktopts *stickyopt, int uproto)
3582 {
3583 struct cmsghdr *cm = NULL;
3584
3585 if (control == NULL || opt == NULL) {
3586 return EINVAL;
3587 }
3588
3589 ip6_initpktopts(opt);
3590 if (stickyopt) {
3591 int error;
3592
3593 /*
3594 * If stickyopt is provided, make a local copy of the options
3595 * for this particular packet, then override them by ancillary
3596 * objects.
3597 * XXX: copypktopts() does not copy the cached route to a next
3598 * hop (if any). This is not very good in terms of efficiency,
3599 * but we can allow this since this option should be rarely
3600 * used.
3601 */
3602 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) {
3603 return error;
3604 }
3605 }
3606
3607 /*
3608 * XXX: Currently, we assume all the optional information is stored
3609 * in a single mbuf.
3610 */
3611 if (control->m_next) {
3612 return EINVAL;
3613 }
3614
3615 if (control->m_len < CMSG_LEN(0)) {
3616 return EINVAL;
3617 }
3618
3619 for (cm = M_FIRST_CMSGHDR(control);
3620 is_cmsg_valid(control, cm);
3621 cm = M_NXT_CMSGHDR(control, cm)) {
3622 int error;
3623
3624 if (cm->cmsg_level != IPPROTO_IPV6) {
3625 continue;
3626 }
3627
3628 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3629 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3630 if (error) {
3631 return error;
3632 }
3633 }
3634
3635 return 0;
3636 }
3637 /*
3638 * Set a particular packet option, as a sticky option or an ancillary data
3639 * item. "len" can be 0 only when it's a sticky option.
3640 * We have 4 cases of combination of "sticky" and "cmsg":
3641 * "sticky=0, cmsg=0": impossible
3642 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3643 * "sticky=1, cmsg=0": RFC3542 socket option
3644 * "sticky=1, cmsg=1": RFC2292 socket option
3645 */
3646 static int
3647 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3648 int sticky, int cmsg, int uproto)
3649 {
3650 int minmtupolicy, preftemp;
3651 int error;
3652 boolean_t capture_exthdrstat_out = FALSE;
3653
3654 if (!sticky && !cmsg) {
3655 #ifdef DIAGNOSTIC
3656 printf("ip6_setpktopt: impossible case\n");
3657 #endif
3658 return EINVAL;
3659 }
3660
3661 /*
3662 * Caller must have ensured that the buffer is at least
3663 * aligned on 32-bit boundary.
3664 */
3665 VERIFY(IS_P2ALIGNED(buf, sizeof(u_int32_t)));
3666
3667 /*
3668 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3669 * not be specified in the context of RFC3542. Conversely,
3670 * RFC3542 types should not be specified in the context of RFC2292.
3671 */
3672 if (!cmsg) {
3673 switch (optname) {
3674 case IPV6_2292PKTINFO:
3675 case IPV6_2292HOPLIMIT:
3676 case IPV6_2292NEXTHOP:
3677 case IPV6_2292HOPOPTS:
3678 case IPV6_2292DSTOPTS:
3679 case IPV6_2292RTHDR:
3680 case IPV6_2292PKTOPTIONS:
3681 return ENOPROTOOPT;
3682 }
3683 }
3684 if (sticky && cmsg) {
3685 switch (optname) {
3686 case IPV6_PKTINFO:
3687 case IPV6_HOPLIMIT:
3688 case IPV6_NEXTHOP:
3689 case IPV6_HOPOPTS:
3690 case IPV6_DSTOPTS:
3691 case IPV6_RTHDRDSTOPTS:
3692 case IPV6_RTHDR:
3693 case IPV6_USE_MIN_MTU:
3694 case IPV6_DONTFRAG:
3695 case IPV6_TCLASS:
3696 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3697 return ENOPROTOOPT;
3698 }
3699 }
3700
3701 switch (optname) {
3702 case IPV6_2292PKTINFO:
3703 case IPV6_PKTINFO: {
3704 struct ifnet *ifp = NULL;
3705 struct in6_pktinfo *pktinfo;
3706
3707 if (len != sizeof(struct in6_pktinfo)) {
3708 return EINVAL;
3709 }
3710
3711 pktinfo = (struct in6_pktinfo *)(void *)buf;
3712
3713 /*
3714 * An application can clear any sticky IPV6_PKTINFO option by
3715 * doing a "regular" setsockopt with ipi6_addr being
3716 * in6addr_any and ipi6_ifindex being zero.
3717 * [RFC 3542, Section 6]
3718 */
3719 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3720 pktinfo->ipi6_ifindex == 0 &&
3721 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3722 ip6_clearpktopts(opt, optname);
3723 break;
3724 }
3725
3726 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3727 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3728 return EINVAL;
3729 }
3730
3731 /* validate the interface index if specified. */
3732 ifnet_head_lock_shared();
3733
3734 if (pktinfo->ipi6_ifindex > if_index) {
3735 ifnet_head_done();
3736 return ENXIO;
3737 }
3738
3739 if (pktinfo->ipi6_ifindex) {
3740 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3741 if (ifp == NULL) {
3742 ifnet_head_done();
3743 return ENXIO;
3744 }
3745 }
3746
3747 ifnet_head_done();
3748
3749 /*
3750 * We store the address anyway, and let in6_selectsrc()
3751 * validate the specified address. This is because ipi6_addr
3752 * may not have enough information about its scope zone, and
3753 * we may need additional information (such as outgoing
3754 * interface or the scope zone of a destination address) to
3755 * disambiguate the scope.
3756 * XXX: the delay of the validation may confuse the
3757 * application when it is used as a sticky option.
3758 */
3759 if (opt->ip6po_pktinfo == NULL) {
3760 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
3761 M_IP6OPT, M_NOWAIT);
3762 if (opt->ip6po_pktinfo == NULL) {
3763 return ENOBUFS;
3764 }
3765 }
3766 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3767 break;
3768 }
3769
3770 case IPV6_2292HOPLIMIT:
3771 case IPV6_HOPLIMIT: {
3772 int *hlimp;
3773
3774 /*
3775 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3776 * to simplify the ordering among hoplimit options.
3777 */
3778 if (optname == IPV6_HOPLIMIT && sticky) {
3779 return ENOPROTOOPT;
3780 }
3781
3782 if (len != sizeof(int)) {
3783 return EINVAL;
3784 }
3785 hlimp = (int *)(void *)buf;
3786 if (*hlimp < -1 || *hlimp > 255) {
3787 return EINVAL;
3788 }
3789
3790 opt->ip6po_hlim = *hlimp;
3791 break;
3792 }
3793
3794 case IPV6_TCLASS: {
3795 int tclass;
3796
3797 if (len != sizeof(int)) {
3798 return EINVAL;
3799 }
3800 tclass = *(int *)(void *)buf;
3801 if (tclass < -1 || tclass > 255) {
3802 return EINVAL;
3803 }
3804
3805 opt->ip6po_tclass = tclass;
3806 break;
3807 }
3808
3809 case IPV6_2292NEXTHOP:
3810 case IPV6_NEXTHOP:
3811 error = suser(kauth_cred_get(), 0);
3812 if (error) {
3813 return EACCES;
3814 }
3815
3816 if (len == 0) { /* just remove the option */
3817 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3818 break;
3819 }
3820
3821 /* check if cmsg_len is large enough for sa_len */
3822 if (len < sizeof(struct sockaddr) || len < *buf) {
3823 return EINVAL;
3824 }
3825
3826 switch (SA(buf)->sa_family) {
3827 case AF_INET6: {
3828 struct sockaddr_in6 *sa6 = SIN6(buf);
3829
3830 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) {
3831 return EINVAL;
3832 }
3833
3834 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3835 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3836 return EINVAL;
3837 }
3838 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3839 != 0) {
3840 return error;
3841 }
3842 break;
3843 }
3844 case AF_LINK: /* should eventually be supported */
3845 default:
3846 return EAFNOSUPPORT;
3847 }
3848
3849 /* turn off the previous option, then set the new option. */
3850 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3851 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3852 if (opt->ip6po_nexthop == NULL) {
3853 return ENOBUFS;
3854 }
3855 bcopy(buf, opt->ip6po_nexthop, *buf);
3856 break;
3857
3858 case IPV6_2292HOPOPTS:
3859 case IPV6_HOPOPTS: {
3860 struct ip6_hbh *hbh;
3861 int hbhlen;
3862
3863 /*
3864 * XXX: We don't allow a non-privileged user to set ANY HbH
3865 * options, since per-option restriction has too much
3866 * overhead.
3867 */
3868 error = suser(kauth_cred_get(), 0);
3869 if (error) {
3870 return EACCES;
3871 }
3872
3873 if (len == 0) {
3874 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3875 break; /* just remove the option */
3876 }
3877
3878 /* message length validation */
3879 if (len < sizeof(struct ip6_hbh)) {
3880 return EINVAL;
3881 }
3882 hbh = (struct ip6_hbh *)(void *)buf;
3883 hbhlen = (hbh->ip6h_len + 1) << 3;
3884 if (len != hbhlen) {
3885 return EINVAL;
3886 }
3887
3888 /* turn off the previous option, then set the new option. */
3889 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3890 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3891 if (opt->ip6po_hbh == NULL) {
3892 return ENOBUFS;
3893 }
3894 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3895 capture_exthdrstat_out = TRUE;
3896 break;
3897 }
3898
3899 case IPV6_2292DSTOPTS:
3900 case IPV6_DSTOPTS:
3901 case IPV6_RTHDRDSTOPTS: {
3902 struct ip6_dest *dest, **newdest = NULL;
3903 int destlen;
3904
3905 error = suser(kauth_cred_get(), 0);
3906 if (error) {
3907 return EACCES;
3908 }
3909
3910 if (len == 0) {
3911 ip6_clearpktopts(opt, optname);
3912 break; /* just remove the option */
3913 }
3914
3915 /* message length validation */
3916 if (len < sizeof(struct ip6_dest)) {
3917 return EINVAL;
3918 }
3919 dest = (struct ip6_dest *)(void *)buf;
3920 destlen = (dest->ip6d_len + 1) << 3;
3921 if (len != destlen) {
3922 return EINVAL;
3923 }
3924
3925 /*
3926 * Determine the position that the destination options header
3927 * should be inserted; before or after the routing header.
3928 */
3929 switch (optname) {
3930 case IPV6_2292DSTOPTS:
3931 /*
3932 * The old advacned API is ambiguous on this point.
3933 * Our approach is to determine the position based
3934 * according to the existence of a routing header.
3935 * Note, however, that this depends on the order of the
3936 * extension headers in the ancillary data; the 1st
3937 * part of the destination options header must appear
3938 * before the routing header in the ancillary data,
3939 * too.
3940 * RFC3542 solved the ambiguity by introducing
3941 * separate ancillary data or option types.
3942 */
3943 if (opt->ip6po_rthdr == NULL) {
3944 newdest = &opt->ip6po_dest1;
3945 } else {
3946 newdest = &opt->ip6po_dest2;
3947 }
3948 break;
3949 case IPV6_RTHDRDSTOPTS:
3950 newdest = &opt->ip6po_dest1;
3951 break;
3952 case IPV6_DSTOPTS:
3953 newdest = &opt->ip6po_dest2;
3954 break;
3955 }
3956
3957 /* turn off the previous option, then set the new option. */
3958 ip6_clearpktopts(opt, optname);
3959 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3960 if (*newdest == NULL) {
3961 return ENOBUFS;
3962 }
3963 bcopy(dest, *newdest, destlen);
3964 capture_exthdrstat_out = TRUE;
3965 break;
3966 }
3967
3968 case IPV6_2292RTHDR:
3969 case IPV6_RTHDR: {
3970 struct ip6_rthdr *rth;
3971 int rthlen;
3972
3973 if (len == 0) {
3974 ip6_clearpktopts(opt, IPV6_RTHDR);
3975 break; /* just remove the option */
3976 }
3977
3978 /* message length validation */
3979 if (len < sizeof(struct ip6_rthdr)) {
3980 return EINVAL;
3981 }
3982 rth = (struct ip6_rthdr *)(void *)buf;
3983 rthlen = (rth->ip6r_len + 1) << 3;
3984 if (len != rthlen) {
3985 return EINVAL;
3986 }
3987
3988 switch (rth->ip6r_type) {
3989 case IPV6_RTHDR_TYPE_0:
3990 if (rth->ip6r_len == 0) { /* must contain one addr */
3991 return EINVAL;
3992 }
3993 if (rth->ip6r_len % 2) { /* length must be even */
3994 return EINVAL;
3995 }
3996 if (rth->ip6r_len / 2 != rth->ip6r_segleft) {
3997 return EINVAL;
3998 }
3999 break;
4000 default:
4001 return EINVAL; /* not supported */
4002 }
4003
4004 /* turn off the previous option */
4005 ip6_clearpktopts(opt, IPV6_RTHDR);
4006 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
4007 if (opt->ip6po_rthdr == NULL) {
4008 return ENOBUFS;
4009 }
4010 bcopy(rth, opt->ip6po_rthdr, rthlen);
4011 capture_exthdrstat_out = TRUE;
4012 break;
4013 }
4014
4015 case IPV6_USE_MIN_MTU:
4016 if (len != sizeof(int)) {
4017 return EINVAL;
4018 }
4019 minmtupolicy = *(int *)(void *)buf;
4020 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
4021 minmtupolicy != IP6PO_MINMTU_DISABLE &&
4022 minmtupolicy != IP6PO_MINMTU_ALL) {
4023 return EINVAL;
4024 }
4025 opt->ip6po_minmtu = minmtupolicy;
4026 break;
4027
4028 case IPV6_DONTFRAG:
4029 if (len != sizeof(int)) {
4030 return EINVAL;
4031 }
4032
4033 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
4034 /*
4035 * we ignore this option for TCP sockets.
4036 * (RFC3542 leaves this case unspecified.)
4037 */
4038 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
4039 } else {
4040 opt->ip6po_flags |= IP6PO_DONTFRAG;
4041 }
4042 break;
4043
4044 case IPV6_PREFER_TEMPADDR:
4045 if (len != sizeof(int)) {
4046 return EINVAL;
4047 }
4048 preftemp = *(int *)(void *)buf;
4049 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
4050 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
4051 preftemp != IP6PO_TEMPADDR_PREFER) {
4052 return EINVAL;
4053 }
4054 opt->ip6po_prefer_tempaddr = preftemp;
4055 break;
4056
4057 default:
4058 return ENOPROTOOPT;
4059 } /* end of switch */
4060
4061 if (capture_exthdrstat_out) {
4062 if (uproto == IPPROTO_TCP) {
4063 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_out);
4064 } else if (uproto == IPPROTO_UDP) {
4065 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_out);
4066 }
4067 }
4068
4069 return 0;
4070 }
4071
4072 /*
4073 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
4074 * packet to the input queue of a specified interface. Note that this
4075 * calls the output routine of the loopback "driver", but with an interface
4076 * pointer that might NOT be &loif -- easier than replicating that code here.
4077 */
4078 void
4079 ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
4080 struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
4081 {
4082 struct mbuf *copym;
4083 struct ip6_hdr *ip6;
4084 struct in6_addr src;
4085
4086 if (lo_ifp == NULL) {
4087 return;
4088 }
4089
4090 /*
4091 * Copy the packet header as it's needed for the checksum.
4092 * Make sure to deep-copy IPv6 header portion in case the data
4093 * is in an mbuf cluster, so that we can safely override the IPv6
4094 * header portion later.
4095 */
4096 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
4097 if (copym != NULL && ((copym->m_flags & M_EXT) ||
4098 copym->m_len < sizeof(struct ip6_hdr))) {
4099 copym = m_pullup(copym, sizeof(struct ip6_hdr));
4100 }
4101
4102 if (copym == NULL) {
4103 return;
4104 }
4105
4106 ip6 = mtod(copym, struct ip6_hdr *);
4107 src = ip6->ip6_src;
4108 /*
4109 * clear embedded scope identifiers if necessary.
4110 * in6_clearscope will touch the addresses only when necessary.
4111 */
4112 in6_clearscope(&ip6->ip6_src);
4113 in6_clearscope(&ip6->ip6_dst);
4114
4115 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
4116 in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
4117 }
4118
4119 /*
4120 * Stuff the 'real' ifp into the pkthdr, to be used in matching
4121 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
4122 * to make the loopback driver compliant with the data link
4123 * requirements.
4124 */
4125 copym->m_pkthdr.rcvif = origifp;
4126
4127 /*
4128 * Also record the source interface (which owns the source address).
4129 * This is basically a stripped down version of ifa_foraddr6().
4130 */
4131 if (srcifp == NULL) {
4132 struct in6_ifaddr *ia;
4133
4134 lck_rw_lock_shared(&in6_ifaddr_rwlock);
4135 for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) {
4136 IFA_LOCK_SPIN(&ia->ia_ifa);
4137 /* compare against src addr with embedded scope */
4138 if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) {
4139 srcifp = ia->ia_ifp;
4140 IFA_UNLOCK(&ia->ia_ifa);
4141 break;
4142 }
4143 IFA_UNLOCK(&ia->ia_ifa);
4144 }
4145 lck_rw_done(&in6_ifaddr_rwlock);
4146 }
4147 if (srcifp != NULL) {
4148 ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
4149 }
4150 ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
4151
4152 dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
4153 }
4154
4155 /*
4156 * Chop IPv6 header off from the payload.
4157 */
4158 static int
4159 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
4160 {
4161 struct mbuf *mh;
4162 struct ip6_hdr *ip6;
4163
4164 ip6 = mtod(m, struct ip6_hdr *);
4165 if (m->m_len > sizeof(*ip6)) {
4166 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
4167 if (mh == NULL) {
4168 m_freem(m);
4169 return ENOBUFS;
4170 }
4171 M_COPY_PKTHDR(mh, m);
4172 MH_ALIGN(mh, sizeof(*ip6));
4173 m->m_flags &= ~M_PKTHDR;
4174 m->m_len -= sizeof(*ip6);
4175 m->m_data += sizeof(*ip6);
4176 mh->m_next = m;
4177 m = mh;
4178 m->m_len = sizeof(*ip6);
4179 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
4180 }
4181 exthdrs->ip6e_ip6 = m;
4182 return 0;
4183 }
4184
4185 static void
4186 ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
4187 int nxt0, uint32_t tlen, uint32_t optlen)
4188 {
4189 uint32_t sw_csum, hwcap = ifp->if_hwassist;
4190 int tso = TSO_IPV6_OK(ifp, m);
4191
4192 if (!hwcksum_tx) {
4193 /* do all in software; checksum offload is disabled */
4194 sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
4195 } else {
4196 /* do in software what the hardware cannot */
4197 sw_csum = m->m_pkthdr.csum_flags &
4198 ~IF_HWASSIST_CSUM_FLAGS(hwcap);
4199 }
4200
4201 if (optlen != 0) {
4202 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4203 m->m_pkthdr.csum_flags);
4204 } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
4205 (hwcap & CSUM_PARTIAL)) {
4206 /*
4207 * Partial checksum offload, ere), if no extension headers,
4208 * and TCP only (no UDP support, as the hardware may not be
4209 * able to convert +0 to -0 (0xffff) per RFC1122 4.1.3.4.
4210 * unless the interface supports "invert zero" capability.)
4211 */
4212 if (hwcksum_tx && !tso &&
4213 ((m->m_pkthdr.csum_flags & CSUM_TCPIPV6) ||
4214 ((hwcap & CSUM_ZERO_INVERT) &&
4215 (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
4216 tlen <= mtu) {
4217 uint16_t start = sizeof(struct ip6_hdr);
4218 uint16_t ulpoff =
4219 m->m_pkthdr.csum_data & 0xffff;
4220 m->m_pkthdr.csum_flags |=
4221 (CSUM_DATA_VALID | CSUM_PARTIAL);
4222 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
4223 m->m_pkthdr.csum_tx_start = start;
4224 sw_csum = 0;
4225 } else {
4226 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4227 m->m_pkthdr.csum_flags);
4228 }
4229 }
4230
4231 if (sw_csum & CSUM_DELAY_IPV6_DATA) {
4232 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
4233 sw_csum &= ~CSUM_DELAY_IPV6_DATA;
4234 }
4235
4236 if (hwcksum_tx) {
4237 /*
4238 * Drop off bits that aren't supported by hardware;
4239 * also make sure to preserve non-checksum related bits.
4240 */
4241 m->m_pkthdr.csum_flags =
4242 ((m->m_pkthdr.csum_flags &
4243 (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
4244 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
4245 } else {
4246 /* drop all bits; checksum offload is disabled */
4247 m->m_pkthdr.csum_flags = 0;
4248 }
4249 }
4250
4251 /*
4252 * Compute IPv6 extension header length.
4253 */
4254 int
4255 ip6_optlen(struct in6pcb *in6p)
4256 {
4257 int len;
4258
4259 if (!in6p->in6p_outputopts) {
4260 return 0;
4261 }
4262
4263 len = 0;
4264 #define elen(x) \
4265 (((struct ip6_ext *)(x)) ? \
4266 (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
4267
4268 len += elen(in6p->in6p_outputopts->ip6po_hbh);
4269 if (in6p->in6p_outputopts->ip6po_rthdr) {
4270 /* dest1 is valid with rthdr only */
4271 len += elen(in6p->in6p_outputopts->ip6po_dest1);
4272 }
4273 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
4274 len += elen(in6p->in6p_outputopts->ip6po_dest2);
4275 return len;
4276 #undef elen
4277 }
4278
4279 static int
4280 sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
4281 {
4282 #pragma unused(arg1, arg2)
4283 int error, i;
4284
4285 i = ip6_output_measure;
4286 error = sysctl_handle_int(oidp, &i, 0, req);
4287 if (error || req->newptr == USER_ADDR_NULL) {
4288 goto done;
4289 }
4290 /* impose bounds */
4291 if (i < 0 || i > 1) {
4292 error = EINVAL;
4293 goto done;
4294 }
4295 if (ip6_output_measure != i && i == 1) {
4296 net_perf_initialize(&net_perf, ip6_output_measure_bins);
4297 }
4298 ip6_output_measure = i;
4299 done:
4300 return error;
4301 }
4302
4303 static int
4304 sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
4305 {
4306 #pragma unused(arg1, arg2)
4307 int error;
4308 uint64_t i;
4309
4310 i = ip6_output_measure_bins;
4311 error = sysctl_handle_quad(oidp, &i, 0, req);
4312 if (error || req->newptr == USER_ADDR_NULL) {
4313 goto done;
4314 }
4315 /* validate data */
4316 if (!net_perf_validate_bins(i)) {
4317 error = EINVAL;
4318 goto done;
4319 }
4320 ip6_output_measure_bins = i;
4321 done:
4322 return error;
4323 }
4324
4325 static int
4326 sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
4327 {
4328 #pragma unused(oidp, arg1, arg2)
4329 if (req->oldptr == USER_ADDR_NULL) {
4330 req->oldlen = (size_t)sizeof(struct ipstat);
4331 }
4332
4333 return SYSCTL_OUT(req, &net_perf, MIN(sizeof(net_perf), req->oldlen));
4334 }