]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet6/ip6_output.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
CommitLineData
b0d623f7 1/*
0a7de745 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
b0d623f7
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
b0d623f7
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
b0d623f7
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
b0d623f7
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
1c79356b
A
29/*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
2d21ac55
A
92/*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections. This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
1c79356b
A
98
99#include <sys/param.h>
100#include <sys/malloc.h>
101#include <sys/mbuf.h>
102#include <sys/errno.h>
103#include <sys/protosw.h>
104#include <sys/socket.h>
105#include <sys/socketvar.h>
106#include <sys/systm.h>
1c79356b 107#include <sys/kernel.h>
1c79356b 108#include <sys/proc.h>
91447636 109#include <sys/kauth.h>
6d2010ae
A
110#include <sys/mcache.h>
111#include <sys/sysctl.h>
112#include <kern/zalloc.h>
39236c6e 113#include <libkern/OSByteOrder.h>
6d2010ae
A
114
115#include <pexpert/pexpert.h>
39236c6e 116#include <mach/sdt.h>
1c79356b
A
117
118#include <net/if.h>
119#include <net/route.h>
2d21ac55 120#include <net/dlil.h>
5ba3f43e 121#include <net/net_api_stats.h>
39236c6e 122#include <net/net_osdep.h>
3e170ce0 123#include <net/net_perf.h>
1c79356b 124
d9a64523 125#include <netinet/ip.h>
1c79356b
A
126#include <netinet/in.h>
127#include <netinet/in_var.h>
55e303ae 128#include <netinet/ip_var.h>
9bccf70c 129#include <netinet6/in6_var.h>
1c79356b 130#include <netinet/ip6.h>
39236c6e 131#include <netinet/kpi_ipfilter_var.h>
39037602 132#include <netinet/in_tclass.h>
39236c6e 133
6d2010ae 134#include <netinet6/ip6protosw.h>
1c79356b
A
135#include <netinet/icmp6.h>
136#include <netinet6/ip6_var.h>
1c79356b 137#include <netinet/in_pcb.h>
1c79356b 138#include <netinet6/nd6.h>
6d2010ae 139#include <netinet6/scope6_var.h>
1c79356b
A
140#if IPSEC
141#include <netinet6/ipsec.h>
9bccf70c 142#include <netinet6/ipsec6.h>
1c79356b 143#include <netkey/key.h>
9bccf70c 144extern int ipsec_bypass;
1c79356b 145#endif /* IPSEC */
2d21ac55 146
fe8ab488
A
147#if NECP
148#include <net/necp.h>
149#endif /* NECP */
150
2d21ac55
A
151#if CONFIG_MACF_NET
152#include <security/mac.h>
39236c6e 153#endif /* CONFIG_MACF_NET */
1c79356b 154
316670eb
A
155#if DUMMYNET
156#include <netinet/ip_fw.h>
157#include <netinet/ip_dummynet.h>
158#endif /* DUMMYNET */
159
b0d623f7
A
160#if PF
161#include <net/pfvar.h>
162#endif /* PF */
163
3e170ce0
A
164static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
165static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
166static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
91447636 167static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
39236c6e 168static void ip6_out_cksum_stats(int, u_int32_t);
91447636 169static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
39236c6e
A
170static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
171 struct ip6_frag **);
172static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
cb323159 173 struct ifnet *, struct in6_addr *, u_int32_t *);
39236c6e
A
174static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
175 struct sockopt *sopt);
176static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
177static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
178static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
179static void im6o_trace(struct ip6_moptions *, int);
180static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
181 int, int);
91447636 182static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
39236c6e
A
183static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
184 int, uint32_t, uint32_t);
3e170ce0 185extern int udp_ctloutput(struct socket *, struct sockopt *);
3e170ce0
A
186static int ip6_fragment_packet(struct mbuf **m,
187 struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
cb323159 188 uint32_t mtu, uint32_t unfragpartlen,
3e170ce0
A
189 struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
190
191SYSCTL_DECL(_net_inet6_ip6);
192
193static int ip6_output_measure = 0;
194SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
0a7de745
A
195 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
196 &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
3e170ce0
A
197
198static uint64_t ip6_output_measure_bins = 0;
199SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
0a7de745
A
200 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
201 sysctl_ip6_output_measure_bins, "I",
202 "bins for chaining performance data histogram");
3e170ce0
A
203
204static net_perf_t net_perf;
205SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
0a7de745
A
206 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
207 0, 0, sysctl_ip6_output_getperf, "S,net_perf",
208 "IP6 output performance data (struct net_perf, net/net_perf.h)");
6d2010ae 209
0a7de745 210#define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
6d2010ae
A
211
212/* For gdb */
213__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
214
215struct ip6_moptions_dbg {
0a7de745
A
216 struct ip6_moptions im6o; /* ip6_moptions */
217 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
218 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
6d2010ae
A
219 /*
220 * Alloc and free callers.
221 */
0a7de745
A
222 ctrace_t im6o_alloc;
223 ctrace_t im6o_free;
6d2010ae
A
224 /*
225 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
226 */
0a7de745
A
227 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
228 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
6d2010ae
A
229};
230
231#if DEBUG
0a7de745 232static unsigned int im6o_debug = 1; /* debugging (enabled) */
6d2010ae 233#else
0a7de745 234static unsigned int im6o_debug; /* debugging (disabled) */
6d2010ae
A
235#endif /* !DEBUG */
236
0a7de745
A
237static unsigned int im6o_size; /* size of zone element */
238static struct zone *im6o_zone; /* zone for ip6_moptions */
6d2010ae 239
0a7de745
A
240#define IM6O_ZONE_MAX 64 /* maximum elements in zone */
241#define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
1c79356b 242
316670eb 243/*
3e170ce0 244 * ip6_output() calls ip6_output_list() to do the work
316670eb
A
245 */
246int
3e170ce0 247ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
39236c6e
A
248 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
249 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
316670eb 250{
3e170ce0 251 return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
316670eb 252}
55e303ae 253
1c79356b 254/*
3e170ce0 255 * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
1c79356b
A
256 * header (with pri, len, nxt, hlim, src, dst).
257 * This function may modify ver and hlim only.
258 * The mbuf chain containing the packet will be freed.
259 * The mbuf opt, if present, will not be freed.
9bccf70c 260 *
39236c6e
A
261 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
262 * skipped and ro->ro_rt would be used. Otherwise the result of route
263 * lookup is stored in ro->ro_rt.
264 *
b0d623f7
A
265 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
266 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
9bccf70c 267 * which is rt_rmx.rmx_mtu.
1c79356b
A
268 */
269int
3e170ce0
A
270ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
271 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
272 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
1c79356b 273{
39236c6e
A
274 struct ip6_hdr *ip6;
275 u_char *nexthdrp;
0a7de745 276 struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
3e170ce0 277 struct ifnet **ifpp_save = ifpp;
39236c6e 278 struct mbuf *m, *mprev;
3e170ce0
A
279 struct mbuf *sendchain = NULL, *sendchain_last = NULL;
280 struct mbuf *inputchain = NULL;
5ba3f43e 281 int nxt0 = 0;
39236c6e 282 struct route_in6 *ro_pmtu = NULL;
6d2010ae 283 struct rtentry *rt = NULL;
5ba3f43e 284 struct sockaddr_in6 *dst = NULL, src_sa, dst_sa;
1c79356b 285 int error = 0;
39236c6e 286 struct in6_ifaddr *ia = NULL, *src_ia = NULL;
5ba3f43e 287 u_int32_t mtu = 0;
1c79356b 288 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
39236c6e
A
289 struct ip6_rthdr *rh;
290 struct in6_addr finaldst;
91447636 291 ipfilter_t inject_filter_ref;
39236c6e 292 struct ipf_pktopts *ippo = NULL;
316670eb 293 struct flowadv *adv = NULL;
3e170ce0
A
294 uint32_t pktcnt = 0;
295 uint32_t packets_processed = 0;
296 struct timeval start_tv;
cb323159
A
297#if PF
298 boolean_t skip_pf = (ip6oa != NULL) &&
299 (ip6oa->ip6oa_flags & IP6OAF_SKIP_PF);
300#endif
301
316670eb
A
302#if DUMMYNET
303 struct m_tag *tag;
39236c6e 304 struct ip6_out_args saved_ip6oa;
316670eb 305 struct sockaddr_in6 dst_buf;
316670eb 306#endif /* DUMMYNET */
1c79356b 307#if IPSEC
9bccf70c 308 struct socket *so = NULL;
1c79356b 309 struct secpolicy *sp = NULL;
ebb1b9f4 310 struct route_in6 *ipsec_saved_route = NULL;
39236c6e 311 boolean_t needipsectun = FALSE;
1c79356b 312#endif /* IPSEC */
fe8ab488
A
313#if NECP
314 necp_kernel_policy_result necp_result = 0;
315 necp_kernel_policy_result_parameter necp_result_parameter;
316 necp_kernel_policy_id necp_matched_policy_id = 0;
317#endif /* NECP */
39236c6e
A
318 struct {
319 struct ipf_pktopts ipf_pktopts;
320 struct ip6_exthdrs exthdrs;
321 struct route_in6 ip6route;
322#if IPSEC
323 struct ipsec_output_state ipsec_state;
324#endif /* IPSEC */
fe8ab488
A
325#if NECP
326 struct route_in6 necp_route;
327#endif /* NECP */
39236c6e
A
328#if DUMMYNET
329 struct route_in6 saved_route;
330 struct route_in6 saved_ro_pmtu;
331 struct ip_fw_args args;
332#endif /* DUMMYNET */
333 } ip6obz;
0a7de745
A
334#define ipf_pktopts ip6obz.ipf_pktopts
335#define exthdrs ip6obz.exthdrs
336#define ip6route ip6obz.ip6route
337#define ipsec_state ip6obz.ipsec_state
338#define necp_route ip6obz.necp_route
339#define saved_route ip6obz.saved_route
340#define saved_ro_pmtu ip6obz.saved_ro_pmtu
341#define args ip6obz.args
39236c6e
A
342 union {
343 struct {
344 boolean_t select_srcif : 1;
345 boolean_t hdrsplit : 1;
3e170ce0 346 boolean_t route_selected : 1;
39236c6e
A
347 boolean_t dontfrag : 1;
348#if IPSEC
349 boolean_t needipsec : 1;
350 boolean_t noipsec : 1;
351#endif /* IPSEC */
352 };
353 uint32_t raw;
354 } ip6obf = { .raw = 0 };
1c79356b 355
0a7de745 356 if (ip6_output_measure) {
3e170ce0 357 net_perf_start_time(&net_perf, &start_tv);
0a7de745 358 }
3e170ce0 359
39236c6e 360 VERIFY(m0->m_flags & M_PKTHDR);
6d2010ae 361
39236c6e 362 /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
0a7de745 363 bzero(&ip6obz, sizeof(ip6obz));
316670eb 364
316670eb 365#if DUMMYNET
0a7de745 366 if (SLIST_EMPTY(&m0->m_pkthdr.tags)) {
39236c6e 367 goto tags_done;
0a7de745 368 }
39236c6e
A
369
370 /* Grab info from mtags prepended to the chain */
316670eb
A
371 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
372 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
0a7de745 373 struct dn_pkt_tag *dn_tag;
316670eb 374
3e170ce0
A
375 /*
376 * ip6_output_list() cannot handle chains of packets reinjected
377 * by dummynet. The same restriction applies to
378 * ip_output_list().
379 */
380 VERIFY(0 == packetchain);
381
0a7de745 382 dn_tag = (struct dn_pkt_tag *)(tag + 1);
316670eb
A
383 args.fwa_pf_rule = dn_tag->dn_pf_rule;
384
0a7de745 385 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
316670eb
A
386 dst = &dst_buf;
387 ifp = dn_tag->dn_ifp;
0a7de745 388 if (ifp != NULL) {
316670eb 389 ifnet_reference(ifp);
0a7de745 390 }
316670eb 391 flags = dn_tag->dn_flags;
39236c6e
A
392 if (dn_tag->dn_flags & IPV6_OUTARGS) {
393 saved_ip6oa = dn_tag->dn_ip6oa;
394 ip6oa = &saved_ip6oa;
395 }
316670eb
A
396
397 saved_route = dn_tag->dn_ro6;
398 ro = &saved_route;
399 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
400 ro_pmtu = &saved_ro_pmtu;
401 origifp = dn_tag->dn_origifp;
0a7de745 402 if (origifp != NULL) {
316670eb 403 ifnet_reference(origifp);
0a7de745 404 }
316670eb 405 mtu = dn_tag->dn_mtu;
316670eb
A
406 unfragpartlen = dn_tag->dn_unfragpartlen;
407
0a7de745 408 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs));
316670eb
A
409
410 m_tag_delete(m0, tag);
411 }
39236c6e
A
412
413tags_done:
316670eb
A
414#endif /* DUMMYNET */
415
39236c6e 416 m = m0;
39236c6e
A
417
418#if IPSEC
39236c6e
A
419 if (ipsec_bypass == 0) {
420 so = ipsec_getsocket(m);
fe8ab488
A
421 if (so != NULL) {
422 (void) ipsec_setsocket(m, NULL);
423 }
39236c6e
A
424 /* If packet is bound to an interface, check bound policies */
425 if ((flags & IPV6_OUTARGS) &&
0a7de745
A
426 (ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
427 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
39236c6e
A
428 /* ip6obf.noipsec is a bitfield, use temp integer */
429 int noipsec = 0;
430
431 if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
0a7de745 432 flags, ip6oa, &noipsec, &sp) != 0) {
39236c6e 433 goto bad;
0a7de745 434 }
39236c6e
A
435
436 ip6obf.noipsec = (noipsec != 0);
437 }
438 }
439#endif /* IPSEC */
39037602 440
39236c6e 441 ippo = &ipf_pktopts;
91447636 442
39037602 443 if (flags & IPV6_OUTARGS) {
39236c6e
A
444 /*
445 * In the forwarding case, only the ifscope value is used,
446 * as source interface selection doesn't take place.
447 */
448 if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
316670eb 449 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
0a7de745 450 (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF)))) {
316670eb 451 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
0a7de745 452 }
316670eb 453
39236c6e
A
454 if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
455 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
316670eb 456 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
39236c6e 457 (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
316670eb
A
458 }
459
0a7de745 460 if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR) {
316670eb 461 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
0a7de745 462 }
6d2010ae 463 } else {
39236c6e
A
464 ip6obf.select_srcif = FALSE;
465 if (flags & IPV6_OUTARGS) {
466 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
467 ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
468 IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
469 }
6d2010ae
A
470 }
471
39236c6e 472 if (flags & IPV6_OUTARGS) {
0a7de745 473 if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) {
fe8ab488 474 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
0a7de745
A
475 }
476 if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE) {
fe8ab488 477 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
0a7de745 478 }
cb323159
A
479 if (ip6oa->ip6oa_flags & IP6OAF_NO_CONSTRAINED) {
480 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_CONSTRAINED;
481 }
39236c6e
A
482 adv = &ip6oa->ip6oa_flowadv;
483 adv->code = FADV_SUCCESS;
484 ip6oa->ip6oa_retflags = 0;
485 }
486
3e170ce0
A
487 /*
488 * Clear out ifpp to be filled in after determining route. ifpp_save is
489 * used to keep old value to release reference properly and dtrace
490 * ipsec tunnel traffic properly.
491 */
0a7de745 492 if (ifpp != NULL && *ifpp != NULL) {
3e170ce0 493 *ifpp = NULL;
0a7de745 494 }
3e170ce0 495
316670eb
A
496#if DUMMYNET
497 if (args.fwa_pf_rule) {
498 ip6 = mtod(m, struct ip6_hdr *);
0a7de745 499 VERIFY(ro != NULL); /* ro == saved_route */
316670eb 500 goto check_with_pf;
6d2010ae 501 }
316670eb 502#endif /* DUMMYNET */
6d2010ae 503
3e170ce0
A
504#if NECP
505 /*
506 * Since all packets are assumed to come from same socket, necp lookup
507 * only needs to happen once per function entry.
508 */
509 necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
cb323159 510 (flags & IPV6_OUTARGS) ? ip6oa : NULL, ro ? ro->ro_rt : NULL, &necp_result,
3e170ce0
A
511 &necp_result_parameter);
512#endif /* NECP */
513
514 /*
515 * If a chain was passed in, prepare for ther first iteration. For all
516 * other iterations, this work will be done at evaluateloop: label.
517 */
518 if (packetchain) {
519 /*
520 * Remove m from the chain during processing to avoid
521 * accidental frees on entire list.
522 */
523 inputchain = m->m_nextpkt;
524 m->m_nextpkt = NULL;
525 }
526
527loopit:
528 packets_processed++;
0a7de745 529 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP | PKTF_IFAINFO);
3e170ce0
A
530 ip6 = mtod(m, struct ip6_hdr *);
531 nxt0 = ip6->ip6_nxt;
532 finaldst = ip6->ip6_dst;
533 ip6obf.hdrsplit = FALSE;
534 ro_pmtu = NULL;
535
0a7de745 536 if (!SLIST_EMPTY(&m->m_pkthdr.tags)) {
3e170ce0 537 inject_filter_ref = ipf_get_inject_filter(m);
0a7de745 538 } else {
3e170ce0 539 inject_filter_ref = NULL;
0a7de745 540 }
3e170ce0 541
0a7de745
A
542#define MAKE_EXTHDR(hp, mp) do { \
543 if (hp != NULL) { \
544 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
545 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
546 ((eh)->ip6e_len + 1) << 3); \
547 if (error) \
548 goto freehdrs; \
549 } \
39236c6e 550} while (0)
316670eb 551
39236c6e 552 if (opt != NULL) {
1c79356b
A
553 /* Hop-by-Hop options header */
554 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
9bccf70c 555 /* Destination options header(1st part) */
6d2010ae
A
556 if (opt->ip6po_rthdr) {
557 /*
558 * Destination options header(1st part)
559 * This only makes sense with a routing header.
560 * See Section 9.2 of RFC 3542.
561 * Disabling this part just for MIP6 convenience is
562 * a bad idea. We need to think carefully about a
563 * way to make the advanced API coexist with MIP6
564 * options, which might automatically be inserted in
565 * the kernel.
566 */
567 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
568 }
1c79356b
A
569 /* Routing header */
570 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
571 /* Destination options header(2nd part) */
572 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
573 }
574
39236c6e
A
575#undef MAKE_EXTHDR
576
fe8ab488 577#if NECP
fe8ab488
A
578 if (necp_matched_policy_id) {
579 necp_mark_packet_from_ip(m, necp_matched_policy_id);
3e170ce0 580
fe8ab488 581 switch (necp_result) {
3e170ce0 582 case NECP_KERNEL_POLICY_RESULT_PASS:
ea3f0419
A
583 if (necp_result_parameter.pass_flags & NECP_KERNEL_POLICY_PASS_NO_SKIP_IPSEC) {
584 break;
585 }
3e170ce0
A
586 goto skip_ipsec;
587 case NECP_KERNEL_POLICY_RESULT_DROP:
cb323159
A
588 error = EHOSTUNREACH;
589 ip6stat.ip6s_necp_policy_drop++;
590 goto freehdrs;
3e170ce0
A
591 case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
592 /*
593 * Flow divert packets should be blocked at the IP
594 * layer.
595 */
596 error = EHOSTUNREACH;
5ba3f43e 597 ip6stat.ip6s_necp_policy_drop++;
3e170ce0
A
598 goto freehdrs;
599 case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
600 /*
601 * Verify that the packet is being routed to the tunnel
602 */
603 struct ifnet *policy_ifp =
604 necp_get_ifnet_from_result_parameter(
605 &necp_result_parameter);
606
607 if (policy_ifp == ifp) {
fe8ab488 608 goto skip_ipsec;
3e170ce0
A
609 } else {
610 if (necp_packet_can_rebind_to_ifnet(m,
611 policy_ifp, (struct route *)&necp_route,
612 AF_INET6)) {
613 /*
614 * Set scoped index to the tunnel
615 * interface, since it is compatible
616 * with the packet. This will only work
617 * for callers who pass IPV6_OUTARGS,
618 * but that covers all of the clients
619 * we care about today.
620 */
621 if (flags & IPV6_OUTARGS) {
622 ip6oa->ip6oa_boundif =
623 policy_ifp->if_index;
624 ip6oa->ip6oa_flags |=
625 IP6OAF_BOUND_IF;
626 }
627 if (opt != NULL
628 && opt->ip6po_pktinfo != NULL) {
629 opt->ip6po_pktinfo->
0a7de745
A
630 ipi6_ifindex =
631 policy_ifp->if_index;
3e170ce0
A
632 }
633 ro = &necp_route;
fe8ab488
A
634 goto skip_ipsec;
635 } else {
3e170ce0 636 error = ENETUNREACH;
5ba3f43e 637 ip6stat.ip6s_necp_policy_drop++;
3e170ce0 638 goto freehdrs;
fe8ab488 639 }
fe8ab488 640 }
3e170ce0
A
641 }
642 default:
643 break;
fe8ab488
A
644 }
645 }
646#endif /* NECP */
39037602 647
1c79356b 648#if IPSEC
0a7de745 649 if (ipsec_bypass != 0 || ip6obf.noipsec) {
9bccf70c 650 goto skip_ipsec;
0a7de745 651 }
316670eb 652
1c79356b 653 if (sp == NULL) {
39236c6e 654 /* get a security policy for this packet */
fe8ab488 655 if (so != NULL) {
39236c6e 656 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
0a7de745 657 so, &error);
fe8ab488
A
658 } else {
659 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
0a7de745 660 0, &error);
39236c6e
A
661 }
662 if (sp == NULL) {
663 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
664 goto freehdrs;
665 }
1c79356b
A
666 }
667
668 error = 0;
669
670 /* check policy */
671 switch (sp->policy) {
672 case IPSEC_POLICY_DISCARD:
2d21ac55 673 case IPSEC_POLICY_GENERATE:
1c79356b
A
674 /*
675 * This packet is just discarded.
676 */
2d21ac55 677 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
9bccf70c 678 goto freehdrs;
1c79356b
A
679
680 case IPSEC_POLICY_BYPASS:
681 case IPSEC_POLICY_NONE:
682 /* no need to do IPsec. */
39236c6e 683 ip6obf.needipsec = FALSE;
1c79356b 684 break;
316670eb 685
1c79356b
A
686 case IPSEC_POLICY_IPSEC:
687 if (sp->req == NULL) {
688 /* acquire a policy */
689 error = key_spdacquire(sp);
9bccf70c 690 goto freehdrs;
1c79356b 691 }
39236c6e 692 if (sp->ipsec_if) {
fe8ab488 693 goto skip_ipsec;
39236c6e
A
694 } else {
695 ip6obf.needipsec = TRUE;
696 }
1c79356b
A
697 break;
698
699 case IPSEC_POLICY_ENTRUST:
700 default:
39236c6e
A
701 printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
702 break;
1c79356b 703 }
39236c6e 704skip_ipsec:
1c79356b
A
705#endif /* IPSEC */
706
707 /*
708 * Calculate the total length of the extension header chain.
709 * Keep the length of the unfragmentable part for fragmentation.
710 */
711 optlen = 0;
0a7de745 712 if (exthdrs.ip6e_hbh != NULL) {
6d2010ae 713 optlen += exthdrs.ip6e_hbh->m_len;
0a7de745
A
714 }
715 if (exthdrs.ip6e_dest1 != NULL) {
6d2010ae 716 optlen += exthdrs.ip6e_dest1->m_len;
0a7de745
A
717 }
718 if (exthdrs.ip6e_rthdr != NULL) {
6d2010ae 719 optlen += exthdrs.ip6e_rthdr->m_len;
0a7de745
A
720 }
721 unfragpartlen = optlen + sizeof(struct ip6_hdr);
6d2010ae 722
1c79356b 723 /* NOTE: we don't add AH/ESP length here. do that later. */
0a7de745 724 if (exthdrs.ip6e_dest2 != NULL) {
6d2010ae 725 optlen += exthdrs.ip6e_dest2->m_len;
0a7de745 726 }
6d2010ae 727
1c79356b
A
728 /*
729 * If we need IPsec, or there is at least one extension header,
730 * separate IP6 header from the payload.
731 */
39236c6e
A
732 if ((
733#if IPSEC
0a7de745 734 ip6obf.needipsec ||
39236c6e 735#endif /* IPSEC */
0a7de745 736 optlen) && !ip6obf.hdrsplit) {
1c79356b
A
737 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
738 m = NULL;
739 goto freehdrs;
740 }
741 m = exthdrs.ip6e_ip6;
39236c6e 742 ip6obf.hdrsplit = TRUE;
1c79356b
A
743 }
744
745 /* adjust pointer */
746 ip6 = mtod(m, struct ip6_hdr *);
747
748 /* adjust mbuf packet header length */
749 m->m_pkthdr.len += optlen;
0a7de745 750 plen = m->m_pkthdr.len - sizeof(*ip6);
1c79356b
A
751
752 /* If this is a jumbo payload, insert a jumbo payload option. */
753 if (plen > IPV6_MAXPACKET) {
39236c6e 754 if (!ip6obf.hdrsplit) {
1c79356b
A
755 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
756 m = NULL;
757 goto freehdrs;
758 }
759 m = exthdrs.ip6e_ip6;
39236c6e 760 ip6obf.hdrsplit = TRUE;
1c79356b
A
761 }
762 /* adjust pointer */
763 ip6 = mtod(m, struct ip6_hdr *);
0a7de745 764 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) {
1c79356b 765 goto freehdrs;
0a7de745 766 }
1c79356b 767 ip6->ip6_plen = 0;
39236c6e 768 } else {
1c79356b 769 ip6->ip6_plen = htons(plen);
39236c6e 770 }
1c79356b
A
771 /*
772 * Concatenate headers and fill in next header fields.
773 * Here we have, on "m"
774 * IPv6 payload
775 * and we insert headers accordingly. Finally, we should be getting:
776 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
777 *
778 * during the header composing process, "m" points to IPv6 header.
779 * "mprev" points to an extension header prior to esp.
780 */
39236c6e
A
781 nexthdrp = &ip6->ip6_nxt;
782 mprev = m;
1c79356b 783
39236c6e
A
784 /*
785 * we treat dest2 specially. this makes IPsec processing
786 * much easier. the goal here is to make mprev point the
787 * mbuf prior to dest2.
788 *
789 * result: IPv6 dest2 payload
790 * m and mprev will point to IPv6 header.
791 */
792 if (exthdrs.ip6e_dest2 != NULL) {
793 if (!ip6obf.hdrsplit) {
794 panic("assumption failed: hdr not split");
795 /* NOTREACHED */
796 }
797 exthdrs.ip6e_dest2->m_next = m->m_next;
798 m->m_next = exthdrs.ip6e_dest2;
799 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
800 ip6->ip6_nxt = IPPROTO_DSTOPTS;
801 }
802
0a7de745
A
803#define MAKE_CHAIN(m, mp, p, i) do { \
804 if (m != NULL) { \
805 if (!ip6obf.hdrsplit) { \
806 panic("assumption failed: hdr not split"); \
807 /* NOTREACHED */ \
808 } \
809 *mtod((m), u_char *) = *(p); \
810 *(p) = (i); \
811 p = mtod((m), u_char *); \
812 (m)->m_next = (mp)->m_next; \
813 (mp)->m_next = (m); \
814 (mp) = (m); \
815 } \
39236c6e
A
816} while (0)
817 /*
818 * result: IPv6 hbh dest1 rthdr dest2 payload
819 * m will point to IPv6 header. mprev will point to the
820 * extension header prior to dest2 (rthdr in the above case).
821 */
822 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
823 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
824 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
91447636 825
3e170ce0
A
826 /* It is no longer safe to free the pointers in exthdrs. */
827 exthdrs.merged = TRUE;
828
39236c6e
A
829#undef MAKE_CHAIN
830
831#if IPSEC
0a7de745 832 if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)) {
39236c6e 833 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
0a7de745 834 }
39236c6e
A
835#endif /* IPSEC */
836
743345f9
A
837 if (!TAILQ_EMPTY(&ipv6_filters) &&
838 !((flags & IPV6_OUTARGS) &&
cb323159
A
839 (ip6oa->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED)
840#if NECP
841 && !necp_packet_should_skip_filters(m)
842#endif // NECP
843 )) {
0a7de745 844 struct ipfilter *filter;
39236c6e
A
845 int seen = (inject_filter_ref == NULL);
846 int fixscope = 0;
847
848 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
849 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
850 IM6O_LOCK(im6o);
851 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
852 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
853 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
854 IM6O_UNLOCK(im6o);
855 }
856
857 /* Hack: embed the scope_id in the destination */
858 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
859 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
860 fixscope = 1;
861 ip6->ip6_dst.s6_addr16[1] =
862 htons(ro->ro_dst.sin6_scope_id);
863 }
864
865 ipf_ref();
866 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
867 /*
868 * Don't process packet twice if we've already seen it.
869 */
870 if (seen == 0) {
871 if ((struct ipfilter *)inject_filter_ref ==
0a7de745 872 filter) {
39236c6e 873 seen = 1;
0a7de745 874 }
39236c6e
A
875 } else if (filter->ipf_filter.ipf_output != NULL) {
876 errno_t result;
877
878 result = filter->ipf_filter.ipf_output(
0a7de745
A
879 filter->ipf_filter.cookie,
880 (mbuf_t *)&m, ippo);
39236c6e
A
881 if (result == EJUSTRETURN) {
882 ipf_unref();
3e170ce0
A
883 m = NULL;
884 goto evaluateloop;
39236c6e
A
885 }
886 if (result != 0) {
887 ipf_unref();
888 goto bad;
91447636 889 }
91447636 890 }
91447636 891 }
39236c6e
A
892 ipf_unref();
893
894 ip6 = mtod(m, struct ip6_hdr *);
895 /* Hack: cleanup embedded scope_id if we put it there */
0a7de745 896 if (fixscope) {
39236c6e 897 ip6->ip6_dst.s6_addr16[1] = 0;
0a7de745 898 }
39236c6e 899 }
91447636 900
1c79356b 901#if IPSEC
39236c6e
A
902 if (ip6obf.needipsec) {
903 int segleft_org;
1c79356b
A
904
905 /*
906 * pointers after IPsec headers are not valid any more.
907 * other pointers need a great care too.
908 * (IPsec routines should not mangle mbufs prior to AH/ESP)
909 */
910 exthdrs.ip6e_dest2 = NULL;
911
39236c6e 912 if (exthdrs.ip6e_rthdr != NULL) {
1c79356b
A
913 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
914 segleft_org = rh->ip6r_segleft;
915 rh->ip6r_segleft = 0;
39236c6e
A
916 } else {
917 rh = NULL;
918 segleft_org = 0;
1c79356b
A
919 }
920
ebb1b9f4 921 ipsec_state.m = m;
39236c6e
A
922 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
923 sp, flags, &needipsectun);
ebb1b9f4 924 m = ipsec_state.m;
1c79356b
A
925 if (error) {
926 /* mbuf is already reclaimed in ipsec6_output_trans. */
927 m = NULL;
928 switch (error) {
929 case EHOSTUNREACH:
930 case ENETUNREACH:
931 case EMSGSIZE:
932 case ENOBUFS:
933 case ENOMEM:
934 break;
935 default:
39236c6e
A
936 printf("ip6_output (ipsec): error code %d\n",
937 error);
0a7de745 938 /* FALLTHRU */
1c79356b
A
939 case ENOENT:
940 /* don't show these error codes to the user */
941 error = 0;
942 break;
943 }
944 goto bad;
945 }
39236c6e 946 if (exthdrs.ip6e_rthdr != NULL) {
1c79356b
A
947 /* ah6_output doesn't modify mbuf chain */
948 rh->ip6r_segleft = segleft_org;
949 }
1c79356b 950 }
39236c6e 951#endif /* IPSEC */
1c79356b 952
0a7de745 953 /* If there is a routing header, discard the packet. */
39236c6e 954 if (exthdrs.ip6e_rthdr != NULL) {
0a7de745
A
955 error = EINVAL;
956 goto bad;
1c79356b
A
957 }
958
959 /* Source address validation */
960 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
39236c6e 961 !(flags & IPV6_UNSPECSRC)) {
1c79356b
A
962 error = EOPNOTSUPP;
963 ip6stat.ip6s_badscope++;
964 goto bad;
965 }
966 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
967 error = EOPNOTSUPP;
968 ip6stat.ip6s_badscope++;
969 goto bad;
970 }
971
972 ip6stat.ip6s_localout++;
973
974 /*
975 * Route packet.
976 */
39236c6e 977 if (ro == NULL) {
1c79356b 978 ro = &ip6route;
0a7de745 979 bzero((caddr_t)ro, sizeof(*ro));
1c79356b
A
980 }
981 ro_pmtu = ro;
0a7de745 982 if (opt != NULL && opt->ip6po_rthdr) {
1c79356b 983 ro = &opt->ip6po_route;
0a7de745 984 }
39236c6e 985 dst = SIN6(&ro->ro_dst);
6d2010ae 986
0a7de745 987 if (ro->ro_rt != NULL) {
6d2010ae 988 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
0a7de745 989 }
6d2010ae
A
990 /*
991 * if specified, try to fill in the traffic class field.
992 * do not override if a non-zero value is already set.
993 * we check the diffserv field and the ecn field separately.
994 */
39236c6e 995 if (opt != NULL && opt->ip6po_tclass >= 0) {
6d2010ae
A
996 int mask = 0;
997
0a7de745 998 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) {
6d2010ae 999 mask |= 0xfc;
0a7de745
A
1000 }
1001 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) {
6d2010ae 1002 mask |= 0x03;
0a7de745 1003 }
39236c6e
A
1004 if (mask != 0) {
1005 ip6->ip6_flow |=
1006 htonl((opt->ip6po_tclass & mask) << 20);
1007 }
6d2010ae
A
1008 }
1009
1010 /* fill in or override the hop limit field, if necessary. */
39236c6e 1011 if (opt && opt->ip6po_hlim != -1) {
6d2010ae 1012 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
39236c6e 1013 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
6d2010ae
A
1014 if (im6o != NULL) {
1015 IM6O_LOCK(im6o);
1016 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
1017 IM6O_UNLOCK(im6o);
1018 } else {
1019 ip6->ip6_hlim = ip6_defmcasthlim;
1020 }
1021 }
1022
1c79356b 1023 /*
b0d623f7
A
1024 * If there is a cached route, check that it is to the same
1025 * destination and is still up. If not, free it and try again.
1026 * Test rt_flags without holding rt_lock for performance reasons;
1027 * if the route is down it will hopefully be caught by the layer
1028 * below (since it uses this route as a hint) or during the
1029 * next transmit.
1c79356b 1030 */
39236c6e 1031 if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
0a7de745 1032 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst)) {
39236c6e 1033 ROUTE_RELEASE(ro);
0a7de745 1034 }
39236c6e 1035
b0d623f7 1036 if (ro->ro_rt == NULL) {
0a7de745 1037 bzero(dst, sizeof(*dst));
1c79356b 1038 dst->sin6_family = AF_INET6;
0a7de745 1039 dst->sin6_len = sizeof(struct sockaddr_in6);
1c79356b
A
1040 dst->sin6_addr = ip6->ip6_dst;
1041 }
1042#if IPSEC
39236c6e 1043 if (ip6obf.needipsec && needipsectun) {
6d2010ae 1044#if CONFIG_DTRACE
3e170ce0 1045 struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
39236c6e 1046#endif /* CONFIG_DTRACE */
1c79356b
A
1047 /*
1048 * All the extension headers will become inaccessible
1049 * (since they can be encrypted).
1050 * Don't panic, we need no more updates to extension headers
1051 * on inner IPv6 packet (since they are now encapsulated).
1052 *
1053 * IPv6 [ESP|AH] IPv6 [extension headers] payload
1054 */
0a7de745 1055 bzero(&exthdrs, sizeof(exthdrs));
1c79356b
A
1056 exthdrs.ip6e_ip6 = m;
1057
ebb1b9f4 1058 ipsec_state.m = m;
5c9f4661 1059 route_copyout((struct route *)&ipsec_state.ro, (struct route *)ro,
0a7de745 1060 sizeof(struct route_in6));
39236c6e 1061 ipsec_state.dst = SA(dst);
6d2010ae 1062
39236c6e 1063 /* So that we can see packets inside the tunnel */
6d2010ae 1064 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
39236c6e
A
1065 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
1066 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae 1067
ebb1b9f4 1068 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
39236c6e 1069 /* tunneled in IPv4? packet is gone */
3e170ce0
A
1070 if (ipsec_state.tunneled == 4) {
1071 m = NULL;
1072 goto evaluateloop;
1073 }
ebb1b9f4
A
1074 m = ipsec_state.m;
1075 ipsec_saved_route = ro;
1076 ro = (struct route_in6 *)&ipsec_state.ro;
39236c6e 1077 dst = SIN6(ipsec_state.dst);
1c79356b
A
1078 if (error) {
1079 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
1c79356b
A
1080 m = NULL;
1081 switch (error) {
1082 case EHOSTUNREACH:
1083 case ENETUNREACH:
1084 case EMSGSIZE:
1085 case ENOBUFS:
1086 case ENOMEM:
1087 break;
1088 default:
39236c6e
A
1089 printf("ip6_output (ipsec): error code %d\n",
1090 error);
0a7de745 1091 /* FALLTHRU */
1c79356b
A
1092 case ENOENT:
1093 /* don't show these error codes to the user */
1094 error = 0;
1095 break;
1096 }
1097 goto bad;
1098 }
316670eb 1099 /*
39236c6e
A
1100 * The packet has been encapsulated so the ifscope
1101 * is no longer valid since it does not apply to the
1102 * outer address: ignore the ifscope.
6d2010ae 1103 */
39236c6e
A
1104 if (flags & IPV6_OUTARGS) {
1105 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
1106 ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
1107 }
6d2010ae 1108 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
0a7de745 1109 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE) {
6d2010ae 1110 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
0a7de745 1111 }
6d2010ae 1112 }
1c79356b
A
1113 exthdrs.ip6e_ip6 = m;
1114 }
55e303ae 1115#endif /* IPSEC */
1c79356b 1116
3e170ce0
A
1117 /*
1118 * ifp should only be filled in for dummy net packets which will jump
1119 * to check_with_pf label.
1120 */
6d2010ae 1121 if (ifp != NULL) {
3e170ce0 1122 VERIFY(ip6obf.route_selected);
6d2010ae 1123 }
1c79356b 1124
6d2010ae
A
1125 /* adjust pointer */
1126 ip6 = mtod(m, struct ip6_hdr *);
1c79356b 1127
39236c6e 1128 if (ip6obf.select_srcif) {
0a7de745 1129 bzero(&src_sa, sizeof(src_sa));
6d2010ae 1130 src_sa.sin6_family = AF_INET6;
0a7de745 1131 src_sa.sin6_len = sizeof(src_sa);
6d2010ae
A
1132 src_sa.sin6_addr = ip6->ip6_src;
1133 }
0a7de745 1134 bzero(&dst_sa, sizeof(dst_sa));
6d2010ae 1135 dst_sa.sin6_family = AF_INET6;
0a7de745 1136 dst_sa.sin6_len = sizeof(dst_sa);
6d2010ae
A
1137 dst_sa.sin6_addr = ip6->ip6_dst;
1138
316670eb 1139 /*
3e170ce0
A
1140 * Only call in6_selectroute() on first iteration to avoid taking
1141 * multiple references on ifp and rt.
1142 *
316670eb
A
1143 * in6_selectroute() might return an ifp with its reference held
1144 * even in the error case, so make sure to release its reference.
39236c6e 1145 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
316670eb 1146 */
3e170ce0
A
1147 if (!ip6obf.route_selected) {
1148 error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
1149 &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
1150
1151 if (error != 0) {
1152 switch (error) {
1153 case EHOSTUNREACH:
1154 ip6stat.ip6s_noroute++;
1155 break;
1156 case EADDRNOTAVAIL:
1157 default:
1158 break; /* XXX statistics? */
1159 }
0a7de745 1160 if (ifp != NULL) {
3e170ce0 1161 in6_ifstat_inc(ifp, ifs6_out_discard);
0a7de745 1162 }
3e170ce0
A
1163 /* ifp (if non-NULL) will be released at the end */
1164 goto bad;
1c79356b 1165 }
3e170ce0 1166 ip6obf.route_selected = TRUE;
6d2010ae
A
1167 }
1168 if (rt == NULL) {
b0d623f7 1169 /*
6d2010ae
A
1170 * If in6_selectroute() does not return a route entry,
1171 * dst may not have been updated.
b0d623f7 1172 */
0a7de745 1173 *dst = dst_sa; /* XXX */
6d2010ae 1174 }
b0d623f7 1175
3e170ce0
A
1176#if NECP
1177 /* Catch-all to check if the interface is allowed */
1178 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1179 error = EHOSTUNREACH;
5ba3f43e 1180 ip6stat.ip6s_necp_policy_drop++;
3e170ce0
A
1181 goto bad;
1182 }
1183#endif /* NECP */
1184
6d2010ae
A
1185 /*
1186 * then rt (for unicast) and ifp must be non-NULL valid values.
1187 */
39236c6e 1188 if (!(flags & IPV6_FORWARDING)) {
39236c6e 1189 in6_ifstat_inc_na(ifp, ifs6_out_request);
6d2010ae
A
1190 }
1191 if (rt != NULL) {
1192 RT_LOCK(rt);
3e170ce0
A
1193 if (ia == NULL) {
1194 ia = (struct in6_ifaddr *)(rt->rt_ifa);
0a7de745 1195 if (ia != NULL) {
3e170ce0 1196 IFA_ADDREF(&ia->ia_ifa);
0a7de745 1197 }
3e170ce0 1198 }
6d2010ae
A
1199 rt->rt_use++;
1200 RT_UNLOCK(rt);
1201 }
1c79356b 1202
6d2010ae
A
1203 /*
1204 * The outgoing interface must be in the zone of source and
39236c6e
A
1205 * destination addresses (except local/loopback). We should
1206 * use ia_ifp to support the case of sending packets to an
1207 * address of our own.
6d2010ae
A
1208 */
1209 if (ia != NULL && ia->ia_ifp) {
0a7de745
A
1210 ifnet_reference(ia->ia_ifp); /* for origifp */
1211 if (origifp != NULL) {
6d2010ae 1212 ifnet_release(origifp);
0a7de745 1213 }
6d2010ae
A
1214 origifp = ia->ia_ifp;
1215 } else {
0a7de745
A
1216 if (ifp != NULL) {
1217 ifnet_reference(ifp); /* for origifp */
1218 }
1219 if (origifp != NULL) {
6d2010ae 1220 ifnet_release(origifp);
0a7de745 1221 }
6d2010ae
A
1222 origifp = ifp;
1223 }
1c79356b 1224
39236c6e
A
1225 /* skip scope enforcements for local/loopback route */
1226 if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1227 struct in6_addr src0, dst0;
1228 u_int32_t zone;
1c79356b 1229
39236c6e 1230 src0 = ip6->ip6_src;
0a7de745 1231 if (in6_setscope(&src0, origifp, &zone)) {
39236c6e 1232 goto badscope;
0a7de745
A
1233 }
1234 bzero(&src_sa, sizeof(src_sa));
39236c6e 1235 src_sa.sin6_family = AF_INET6;
0a7de745 1236 src_sa.sin6_len = sizeof(src_sa);
39236c6e
A
1237 src_sa.sin6_addr = ip6->ip6_src;
1238 if ((sa6_recoverscope(&src_sa, TRUE) ||
0a7de745 1239 zone != src_sa.sin6_scope_id)) {
39236c6e 1240 goto badscope;
0a7de745 1241 }
39236c6e
A
1242
1243 dst0 = ip6->ip6_dst;
0a7de745 1244 if ((in6_setscope(&dst0, origifp, &zone))) {
39236c6e 1245 goto badscope;
0a7de745 1246 }
39236c6e 1247 /* re-initialize to be sure */
0a7de745 1248 bzero(&dst_sa, sizeof(dst_sa));
39236c6e 1249 dst_sa.sin6_family = AF_INET6;
0a7de745 1250 dst_sa.sin6_len = sizeof(dst_sa);
39236c6e
A
1251 dst_sa.sin6_addr = ip6->ip6_dst;
1252 if ((sa6_recoverscope(&dst_sa, TRUE) ||
0a7de745 1253 zone != dst_sa.sin6_scope_id)) {
39236c6e 1254 goto badscope;
0a7de745 1255 }
39236c6e
A
1256
1257 /* scope check is done. */
1258 goto routefound;
1259
1260badscope:
1261 ip6stat.ip6s_badscope++;
1262 in6_ifstat_inc(origifp, ifs6_out_discard);
0a7de745 1263 if (error == 0) {
39236c6e 1264 error = EHOSTUNREACH; /* XXX */
0a7de745 1265 }
39236c6e
A
1266 goto bad;
1267 }
1c79356b 1268
39236c6e
A
1269routefound:
1270 if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1271 if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1c79356b 1272 /*
6d2010ae
A
1273 * The nexthop is explicitly specified by the
1274 * application. We assume the next hop is an IPv6
1275 * address.
1c79356b 1276 */
39236c6e
A
1277 dst = SIN6(opt->ip6po_nexthop);
1278 } else if ((rt->rt_flags & RTF_GATEWAY)) {
1279 dst = SIN6(rt->rt_gateway);
1280 }
1281 /*
1282 * For packets destined to local/loopback, record the
1283 * source the source interface (which owns the source
1284 * address), as well as the output interface. This is
1285 * needed to reconstruct the embedded zone for the
1286 * link-local address case in ip6_input().
1287 */
1288 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1289 uint32_t srcidx;
1290
0a7de745 1291 if (src_ia != NULL) {
39236c6e 1292 srcidx = src_ia->ia_ifp->if_index;
0a7de745 1293 } else if (ro->ro_srcia != NULL) {
39236c6e 1294 srcidx = ro->ro_srcia->ifa_ifp->if_index;
0a7de745 1295 } else {
39236c6e 1296 srcidx = 0;
0a7de745 1297 }
39236c6e
A
1298
1299 ip6_setsrcifaddr_info(m, srcidx, NULL);
1300 ip6_setdstifaddr_info(m, 0, ia);
b0d623f7 1301 }
6d2010ae 1302 }
b0d623f7 1303
6d2010ae
A
1304 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1305 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1306 } else {
0a7de745 1307 struct in6_multi *in6m;
1c79356b 1308
6d2010ae 1309 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
39236c6e 1310 in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1c79356b
A
1311
1312 /*
1313 * Confirm that the outgoing interface supports multicast.
1314 */
39236c6e 1315 if (!(ifp->if_flags & IFF_MULTICAST)) {
1c79356b
A
1316 ip6stat.ip6s_noroute++;
1317 in6_ifstat_inc(ifp, ifs6_out_discard);
1318 error = ENETUNREACH;
1319 goto bad;
1320 }
6d2010ae
A
1321 in6_multihead_lock_shared();
1322 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1323 in6_multihead_lock_done();
0a7de745 1324 if (im6o != NULL) {
6d2010ae 1325 IM6O_LOCK(im6o);
0a7de745 1326 }
1c79356b 1327 if (in6m != NULL &&
39236c6e 1328 (im6o == NULL || im6o->im6o_multicast_loop)) {
0a7de745 1329 if (im6o != NULL) {
6d2010ae 1330 IM6O_UNLOCK(im6o);
0a7de745 1331 }
1c79356b
A
1332 /*
1333 * If we belong to the destination multicast group
1334 * on the outgoing interface, and the caller did not
1335 * forbid loopback, loop back a copy.
1336 */
39236c6e 1337 ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
0a7de745 1338 } else if (im6o != NULL) {
fe8ab488 1339 IM6O_UNLOCK(im6o);
0a7de745
A
1340 }
1341 if (in6m != NULL) {
6d2010ae 1342 IN6M_REMREF(in6m);
0a7de745 1343 }
1c79356b
A
1344 /*
1345 * Multicasts with a hoplimit of zero may be looped back,
1346 * above, but must not be transmitted on a network.
1347 * Also, multicasts addressed to the loopback interface
1348 * are not sent -- the above call to ip6_mloopback() will
1349 * loop back a copy if this host actually belongs to the
1350 * destination group on the loopback interface.
1351 */
6d2010ae
A
1352 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1353 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
3e170ce0 1354 /* remove m from the packetchain and continue looping */
0a7de745 1355 if (m != NULL) {
3e170ce0 1356 m_freem(m);
0a7de745 1357 }
3e170ce0
A
1358 m = NULL;
1359 goto evaluateloop;
1c79356b
A
1360 }
1361 }
1362
1363 /*
1364 * Fill the outgoing inteface to tell the upper layer
1365 * to increment per-interface statistics.
1366 */
3e170ce0 1367 if (ifpp != NULL && *ifpp == NULL) {
0a7de745 1368 ifnet_reference(ifp); /* for caller */
1c79356b 1369 *ifpp = ifp;
1c79356b 1370 }
b0d623f7 1371
6d2010ae 1372 /* Determine path MTU. */
cb323159 1373 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu)) != 0) {
6d2010ae 1374 goto bad;
0a7de745 1375 }
1c79356b
A
1376
1377 /*
6d2010ae
A
1378 * The caller of this function may specify to use the minimum MTU
1379 * in some cases.
1380 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1381 * setting. The logic is a bit complicated; by default, unicast
1382 * packets will follow path MTU while multicast packets will be sent at
1383 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1384 * including unicast ones will be sent at the minimum MTU. Multicast
1385 * packets will always be sent at the minimum MTU unless
1386 * IP6PO_MINMTU_DISABLE is explicitly specified.
1387 * See RFC 3542 for more details.
1c79356b 1388 */
6d2010ae 1389 if (mtu > IPV6_MMTU) {
39236c6e 1390 if ((flags & IPV6_MINMTU)) {
6d2010ae 1391 mtu = IPV6_MMTU;
39236c6e 1392 } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
6d2010ae 1393 mtu = IPV6_MMTU;
39236c6e
A
1394 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1395 (opt == NULL ||
1396 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
6d2010ae 1397 mtu = IPV6_MMTU;
b0d623f7 1398 }
1c79356b 1399 }
6d2010ae 1400
9bccf70c
A
1401 /*
1402 * clear embedded scope identifiers if necessary.
1403 * in6_clearscope will touch the addresses only when necessary.
1404 */
1405 in6_clearscope(&ip6->ip6_src);
1406 in6_clearscope(&ip6->ip6_dst);
1c79356b
A
1407 /*
1408 * If the outgoing packet contains a hop-by-hop options header,
1409 * it must be examined and processed even by the source node.
1410 * (RFC 2460, section 4.)
1411 */
39236c6e 1412 if (exthdrs.ip6e_hbh != NULL) {
9bccf70c 1413 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
6d2010ae 1414 u_int32_t dummy; /* XXX unused */
39236c6e 1415 uint32_t oplen = 0; /* for ip6_process_hopopts() */
9bccf70c 1416#if DIAGNOSTIC
0a7de745 1417 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) {
9bccf70c 1418 panic("ip6e_hbh is not continuous");
0a7de745 1419 }
9bccf70c 1420#endif
1c79356b 1421 /*
39236c6e
A
1422 * XXX: If we have to send an ICMPv6 error to the sender,
1423 * we need the M_LOOP flag since icmp6_error() expects
1424 * the IPv6 and the hop-by-hop options header are
1425 * continuous unless the flag is set.
1c79356b
A
1426 */
1427 m->m_flags |= M_LOOP;
1428 m->m_pkthdr.rcvif = ifp;
6d2010ae 1429 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
0a7de745 1430 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
39236c6e 1431 &dummy, &oplen) < 0) {
3e170ce0
A
1432 /*
1433 * m was already freed at this point. Set to NULL so it
0a7de745 1434 * is not re-freed at end of ip6_output_list.
3e170ce0
A
1435 */
1436 m = NULL;
0a7de745 1437 error = EINVAL; /* better error? */
3e170ce0 1438 goto bad;
1c79356b
A
1439 }
1440 m->m_flags &= ~M_LOOP; /* XXX */
1441 m->m_pkthdr.rcvif = NULL;
1442 }
1443
316670eb
A
1444#if DUMMYNET
1445check_with_pf:
39236c6e 1446#endif /* DUMMYNET */
b0d623f7 1447#if PF
cb323159 1448 if (PF_IS_ENABLED && !skip_pf) {
316670eb 1449#if DUMMYNET
3e170ce0 1450
316670eb 1451 /*
39236c6e
A
1452 * TODO: Need to save opt->ip6po_flags for reinjection
1453 * rdar://10434993
316670eb
A
1454 */
1455 args.fwa_m = m;
1456 args.fwa_oif = ifp;
1457 args.fwa_oflags = flags;
0a7de745 1458 if (flags & IPV6_OUTARGS) {
39236c6e 1459 args.fwa_ip6oa = ip6oa;
0a7de745 1460 }
316670eb
A
1461 args.fwa_ro6 = ro;
1462 args.fwa_dst6 = dst;
1463 args.fwa_ro6_pmtu = ro_pmtu;
1464 args.fwa_origifp = origifp;
1465 args.fwa_mtu = mtu;
316670eb
A
1466 args.fwa_unfragpartlen = unfragpartlen;
1467 args.fwa_exthdrs = &exthdrs;
6d2010ae 1468 /* Invoke outbound packet filter */
316670eb 1469 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
39236c6e 1470#else /* !DUMMYNET */
316670eb 1471 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
39236c6e 1472#endif /* !DUMMYNET */
b0d623f7 1473
316670eb 1474 if (error != 0 || m == NULL) {
6d2010ae 1475 if (m != NULL) {
39236c6e
A
1476 panic("%s: unexpected packet %p\n",
1477 __func__, m);
6d2010ae
A
1478 /* NOTREACHED */
1479 }
3e170ce0
A
1480 /* m was already freed by callee and is now NULL. */
1481 goto evaluateloop;
b0d623f7 1482 }
6d2010ae 1483 ip6 = mtod(m, struct ip6_hdr *);
b0d623f7 1484 }
b0d623f7
A
1485#endif /* PF */
1486
3e170ce0
A
1487#ifdef IPSEC
1488 /* clean ipsec history before fragmentation */
1489 ipsec_delaux(m);
1490#endif /* IPSEC */
1491
39037602
A
1492 if (ip6oa != NULL) {
1493 u_int8_t dscp;
0a7de745 1494
39037602
A
1495 dscp = (ntohl(ip6->ip6_flow) & IP6FLOW_DSCP_MASK) >> IP6FLOW_DSCP_SHIFT;
1496
1497 error = set_packet_qos(m, ifp,
1498 ip6oa->ip6oa_flags & IP6OAF_QOSMARKING_ALLOWED ? TRUE : FALSE,
1499 ip6oa->ip6oa_sotc, ip6oa->ip6oa_netsvctype, &dscp);
1500 if (error == 0) {
1501 ip6->ip6_flow &= ~htonl(IP6FLOW_DSCP_MASK);
1502 ip6->ip6_flow |= htonl((u_int32_t)dscp << IP6FLOW_DSCP_SHIFT);
1503 } else {
1504 printf("%s if_dscp_for_mbuf() error %d\n", __func__, error);
1505 error = 0;
1506 }
1507 }
1c79356b 1508 /*
3e170ce0
A
1509 * Determine whether fragmentation is necessary. If so, m is passed
1510 * back as a chain of packets and original mbuf is freed. Otherwise, m
1511 * is unchanged.
1c79356b 1512 */
3e170ce0 1513 error = ip6_fragment_packet(&m, opt,
cb323159 1514 &exthdrs, ifp, mtu, unfragpartlen, ro_pmtu, nxt0,
3e170ce0 1515 optlen);
6d2010ae 1516
0a7de745 1517 if (error) {
6d2010ae 1518 goto bad;
0a7de745 1519 }
3e170ce0
A
1520
1521/*
1522 * The evaluateloop label is where we decide whether to continue looping over
1523 * packets or call into nd code to send.
1524 */
1525evaluateloop:
1526
1527 /*
1528 * m may be NULL when we jump to the evaluateloop label from PF or
1529 * other code that can drop packets.
1530 */
1531 if (m != NULL) {
1532 /*
1533 * If we already have a chain to send, tack m onto the end.
1534 * Otherwise make m the start and end of the to-be-sent chain.
1535 */
1536 if (sendchain != NULL) {
1537 sendchain_last->m_nextpkt = m;
1538 } else {
1539 sendchain = m;
1540 }
1541
1542 /* Fragmentation may mean m is a chain. Find the last packet. */
0a7de745 1543 while (m->m_nextpkt) {
3e170ce0 1544 m = m->m_nextpkt;
0a7de745 1545 }
3e170ce0
A
1546 sendchain_last = m;
1547 pktcnt++;
1548 }
1549
1550 /* Fill in next m from inputchain as appropriate. */
1551 m = inputchain;
1552 if (m != NULL) {
1553 /* Isolate m from rest of input chain. */
1554 inputchain = m->m_nextpkt;
1555 m->m_nextpkt = NULL;
1556
1557 /*
1558 * Clear exthdrs and ipsec_state so stale contents are not
1559 * reused. Note this also clears the exthdrs.merged flag.
1560 */
1561 bzero(&exthdrs, sizeof(exthdrs));
1562 bzero(&ipsec_state, sizeof(ipsec_state));
1563
1564 /* Continue looping. */
1565 goto loopit;
1566 }
1567
1568 /*
1569 * If we get here, there's no more mbufs in inputchain, so send the
1570 * sendchain if there is one.
1571 */
1572 if (pktcnt > 0) {
1573 error = nd6_output_list(ifp, origifp, sendchain, dst,
1574 ro->ro_rt, adv);
1575 /*
1576 * Fall through to done label even in error case because
1577 * nd6_output_list frees packetchain in both success and
1578 * failure cases.
1579 */
1580 }
1581
1582done:
1583 if (ifpp_save != NULL && *ifpp_save != NULL) {
1584 ifnet_release(*ifpp_save);
1585 *ifpp_save = NULL;
1586 }
1587 ROUTE_RELEASE(&ip6route);
1588#if IPSEC
1589 ROUTE_RELEASE(&ipsec_state.ro);
0a7de745 1590 if (sp != NULL) {
3e170ce0 1591 key_freesp(sp, KEY_SADB_UNLOCKED);
0a7de745 1592 }
3e170ce0
A
1593#endif /* IPSEC */
1594#if NECP
1595 ROUTE_RELEASE(&necp_route);
1596#endif /* NECP */
1597#if DUMMYNET
1598 ROUTE_RELEASE(&saved_route);
1599 ROUTE_RELEASE(&saved_ro_pmtu);
1600#endif /* DUMMYNET */
1601
0a7de745 1602 if (ia != NULL) {
3e170ce0 1603 IFA_REMREF(&ia->ia_ifa);
0a7de745
A
1604 }
1605 if (src_ia != NULL) {
3e170ce0 1606 IFA_REMREF(&src_ia->ia_ifa);
0a7de745
A
1607 }
1608 if (ifp != NULL) {
3e170ce0 1609 ifnet_release(ifp);
0a7de745
A
1610 }
1611 if (origifp != NULL) {
3e170ce0 1612 ifnet_release(origifp);
0a7de745 1613 }
3e170ce0
A
1614 if (ip6_output_measure) {
1615 net_perf_measure_time(&net_perf, &start_tv, packets_processed);
1616 net_perf_histogram(&net_perf, packets_processed);
1617 }
0a7de745 1618 return error;
3e170ce0
A
1619
1620freehdrs:
1621 if (exthdrs.ip6e_hbh != NULL) {
0a7de745 1622 if (exthdrs.merged) {
3e170ce0 1623 panic("Double free of ip6e_hbh");
0a7de745 1624 }
3e170ce0
A
1625 m_freem(exthdrs.ip6e_hbh);
1626 }
1627 if (exthdrs.ip6e_dest1 != NULL) {
0a7de745 1628 if (exthdrs.merged) {
3e170ce0 1629 panic("Double free of ip6e_dest1");
0a7de745 1630 }
3e170ce0
A
1631 m_freem(exthdrs.ip6e_dest1);
1632 }
1633 if (exthdrs.ip6e_rthdr != NULL) {
0a7de745 1634 if (exthdrs.merged) {
3e170ce0 1635 panic("Double free of ip6e_rthdr");
0a7de745 1636 }
3e170ce0
A
1637 m_freem(exthdrs.ip6e_rthdr);
1638 }
1639 if (exthdrs.ip6e_dest2 != NULL) {
0a7de745 1640 if (exthdrs.merged) {
3e170ce0 1641 panic("Double free of ip6e_dest2");
0a7de745 1642 }
3e170ce0
A
1643 m_freem(exthdrs.ip6e_dest2);
1644 }
1645 /* FALLTHRU */
1646bad:
0a7de745 1647 if (inputchain != NULL) {
3e170ce0 1648 m_freem_list(inputchain);
0a7de745
A
1649 }
1650 if (sendchain != NULL) {
3e170ce0 1651 m_freem_list(sendchain);
0a7de745
A
1652 }
1653 if (m != NULL) {
3e170ce0 1654 m_freem(m);
0a7de745 1655 }
3e170ce0
A
1656
1657 goto done;
1658
1659#undef ipf_pktopts
1660#undef exthdrs
1661#undef ip6route
1662#undef ipsec_state
1663#undef saved_route
1664#undef saved_ro_pmtu
1665#undef args
1666}
1667
1668/* ip6_fragment_packet
1669 *
1670 * The fragmentation logic is rather complex:
cb323159 1671 * 1: normal case (dontfrag == 0)
3e170ce0
A
1672 * 1-a: send as is if tlen <= path mtu
1673 * 1-b: fragment if tlen > path mtu
1674 *
1675 * 2: if user asks us not to fragment (dontfrag == 1)
1676 * 2-a: send as is if tlen <= interface mtu
1677 * 2-b: error if tlen > interface mtu
3e170ce0
A
1678 */
1679
1680static int
1681ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
0a7de745 1682 struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
cb323159 1683 uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
0a7de745 1684 int nxt0, uint32_t optlen)
3e170ce0
A
1685{
1686 VERIFY(NULL != mptr);
1687 struct mbuf *m = *mptr;
1688 int error = 0;
1689 size_t tlen = m->m_pkthdr.len;
1690 boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
1691
5ba3f43e 1692 if (m->m_pkthdr.pkt_flags & PKTF_FORWARDED) {
39037602 1693 dontfrag = TRUE;
5ba3f43e
A
1694 /*
1695 * Discard partial sum information if this packet originated
1696 * from another interface; the packet would already have the
1697 * final checksum and we shouldn't recompute it.
1698 */
0a7de745
A
1699 if ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PARTIAL)) ==
1700 (CSUM_DATA_VALID | CSUM_PARTIAL)) {
5ba3f43e
A
1701 m->m_pkthdr.csum_flags &= ~CSUM_TX_FLAGS;
1702 m->m_pkthdr.csum_data = 0;
1703 }
1704 }
39037602 1705
316670eb 1706 /* Access without acquiring nd_ifinfo lock for performance */
0a7de745 1707 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
6d2010ae
A
1708 /*
1709 * Even if the DONTFRAG option is specified, we cannot send the
1710 * packet when the data length is larger than the MTU of the
1711 * outgoing interface.
1712 * Notify the error by sending IPV6_PATHMTU ancillary data as
1713 * well as returning an error code (the latter is not described
1714 * in the API spec.)
1715 */
1716 u_int32_t mtu32;
1717 struct ip6ctlparam ip6cp;
1718
1719 mtu32 = (u_int32_t)mtu;
0a7de745 1720 bzero(&ip6cp, sizeof(ip6cp));
6d2010ae 1721 ip6cp.ip6c_cmdarg = (void *)&mtu32;
39236c6e 1722 pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
3e170ce0 1723 return EMSGSIZE;
6d2010ae
A
1724 }
1725
1726 /*
1727 * transmit packet without fragmentation
1728 */
cb323159 1729 if (dontfrag ||
39236c6e 1730 (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
cb323159 1731 (ifp->if_hwassist & CSUM_FRAGMENT_IPV6))) {
3e170ce0
A
1732 /*
1733 * mppn not updated in this case because no new chain is formed
1734 * and inserted
1735 */
39236c6e 1736 ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
3e170ce0
A
1737 } else {
1738 /*
cb323159 1739 * time to fragment - cases 1-b is handled inside
3e170ce0
A
1740 * ip6_do_fragmentation().
1741 * mppn is passed down to be updated to point at fragment chain.
1742 */
cb323159
A
1743 u_int8_t *lexthdrsp;
1744
1745 if (exthdrsp->ip6e_rthdr != NULL) {
1746 lexthdrsp = mtod(exthdrsp->ip6e_rthdr, uint8_t *);
1747 } else if (exthdrsp->ip6e_dest1 != NULL) {
1748 lexthdrsp = mtod(exthdrsp->ip6e_dest1, uint8_t *);
1749 } else if (exthdrsp->ip6e_hbh != NULL) {
1750 lexthdrsp = mtod(exthdrsp->ip6e_hbh, uint8_t *);
1751 } else {
1752 lexthdrsp = NULL;
1753 }
3e170ce0 1754 error = ip6_do_fragmentation(mptr, optlen, ifp,
cb323159
A
1755 unfragpartlen, mtod(m, struct ip6_hdr *), lexthdrsp, mtu,
1756 nxt0, htonl(ip6_randomid()));
6d2010ae
A
1757 }
1758
3e170ce0
A
1759 return error;
1760}
1761
1762/*
1763 * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
1764 * the packet needs to be fragmented. on success, morig is freed and a chain
1765 * of fragments is linked into the packet chain where morig existed. Otherwise,
1766 * an errno is returned.
cb323159
A
1767 * optlen: total length of all extension headers (excludes the IPv6 header).
1768 * unfragpartlen: length of the per-fragment headers which consist of the IPv6
1769 * header plus any extension headers that must be processed by nodes
1770 * en route to the destination.
1771 * lexthdrsp: pointer to the last extension header in the unfragmentable part
1772 * or NULL.
1773 * nxt0: upper-layer protocol number.
1774 * id: Identification value to be used in the fragment header.
3e170ce0 1775 */
5ba3f43e 1776int
3e170ce0 1777ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
cb323159
A
1778 uint32_t unfragpartlen, struct ip6_hdr *ip6, uint8_t *lexthdrsp,
1779 uint32_t mtu, int nxt0, uint32_t id)
3e170ce0
A
1780{
1781 VERIFY(NULL != mptr);
1782 int error = 0;
1783
1784 struct mbuf *morig = *mptr;
1785 struct mbuf *first_mbufp = NULL;
1786 struct mbuf *last_mbufp = NULL;
1787
1788 size_t tlen = morig->m_pkthdr.len;
1789
cb323159 1790 /* try to fragment the packet. case 1-b */
3e170ce0 1791 if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
39236c6e 1792 /* TSO and fragment aren't compatible */
39236c6e 1793 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1794 return EMSGSIZE;
39236c6e 1795 } else if (mtu < IPV6_MMTU) {
6d2010ae 1796 /* path MTU cannot be less than IPV6_MMTU */
1c79356b 1797 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1798 return EMSGSIZE;
6d2010ae
A
1799 } else if (ip6->ip6_plen == 0) {
1800 /* jumbo payload cannot be fragmented */
1c79356b 1801 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1802 return EMSGSIZE;
1c79356b 1803 } else {
3e170ce0
A
1804 size_t hlen, len, off;
1805 struct mbuf **mnext = NULL;
6d2010ae 1806 struct ip6_frag *ip6f;
1c79356b
A
1807 u_char nextproto;
1808
1809 /*
1810 * Too large for the destination or interface;
1811 * fragment if possible.
1812 * Must be able to put at least 8 bytes per fragment.
1813 */
1814 hlen = unfragpartlen;
0a7de745 1815 if (mtu > IPV6_MAXPACKET) {
1c79356b 1816 mtu = IPV6_MAXPACKET;
0a7de745 1817 }
9bccf70c 1818
0a7de745 1819 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1c79356b 1820 if (len < 8) {
1c79356b 1821 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1822 return EMSGSIZE;
1c79356b
A
1823 }
1824
1c79356b
A
1825 /*
1826 * Change the next header field of the last header in the
1827 * unfragmentable part.
1828 */
cb323159
A
1829 if (lexthdrsp != NULL) {
1830 nextproto = *lexthdrsp;
1831 *lexthdrsp = IPPROTO_FRAGMENT;
1c79356b
A
1832 } else {
1833 nextproto = ip6->ip6_nxt;
1834 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1835 }
1836
0a7de745 1837 if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
3e170ce0 1838 in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
0a7de745 1839 }
6d2010ae 1840
1c79356b
A
1841 /*
1842 * Loop through length of segment after first fragment,
55e303ae
A
1843 * make new header and copy data of each part and link onto
1844 * chain.
1c79356b 1845 */
1c79356b 1846 for (off = hlen; off < tlen; off += len) {
3e170ce0
A
1847 struct ip6_hdr *new_mhip6;
1848 struct mbuf *new_m;
1849 struct mbuf *m_frgpart;
39236c6e 1850
0a7de745 1851 MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
3e170ce0 1852 if (new_m == NULL) {
1c79356b
A
1853 error = ENOBUFS;
1854 ip6stat.ip6s_odropped++;
3e170ce0
A
1855 break;
1856 }
1857 new_m->m_pkthdr.rcvif = NULL;
1858 new_m->m_flags = morig->m_flags & M_COPYFLAGS;
1859
1860 if (first_mbufp != NULL) {
1861 /* Every pass through loop but first */
1862 *mnext = new_m;
1863 last_mbufp = new_m;
1864 } else {
1865 /* This is the first element of the fragment chain */
1866 first_mbufp = new_m;
1867 last_mbufp = new_m;
1c79356b 1868 }
3e170ce0
A
1869 mnext = &new_m->m_nextpkt;
1870
1871 new_m->m_data += max_linkhdr;
1872 new_mhip6 = mtod(new_m, struct ip6_hdr *);
1873 *new_mhip6 = *ip6;
0a7de745 1874 new_m->m_len = sizeof(*new_mhip6);
3e170ce0
A
1875
1876 error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
39236c6e 1877 if (error) {
1c79356b 1878 ip6stat.ip6s_odropped++;
3e170ce0 1879 break;
1c79356b 1880 }
3e170ce0 1881
1c79356b 1882 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
0a7de745 1883 if (off + len >= tlen) {
1c79356b 1884 len = tlen - off;
0a7de745 1885 } else {
1c79356b 1886 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
0a7de745 1887 }
3e170ce0 1888 new_mhip6->ip6_plen = htons((u_short)(len + hlen +
0a7de745 1889 sizeof(*ip6f) - sizeof(struct ip6_hdr)));
3e170ce0
A
1890
1891 if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
1c79356b
A
1892 error = ENOBUFS;
1893 ip6stat.ip6s_odropped++;
3e170ce0 1894 break;
1c79356b 1895 }
3e170ce0 1896 m_cat(new_m, m_frgpart);
0a7de745 1897 new_m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
3e170ce0 1898 new_m->m_pkthdr.rcvif = NULL;
316670eb 1899
3e170ce0
A
1900 M_COPY_CLASSIFIER(new_m, morig);
1901 M_COPY_PFTAG(new_m, morig);
316670eb 1902
39236c6e 1903#ifdef notyet
2d21ac55 1904#if CONFIG_MACF_NET
3e170ce0 1905 mac_create_fragment(morig, new_m);
39236c6e
A
1906#endif /* CONFIG_MACF_NET */
1907#endif /* notyet */
1908
1c79356b
A
1909 ip6f->ip6f_reserved = 0;
1910 ip6f->ip6f_ident = id;
1911 ip6f->ip6f_nxt = nextproto;
1912 ip6stat.ip6s_ofragments++;
1913 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1914 }
1915
3e170ce0
A
1916 if (error) {
1917 /* free all the fragments created */
1918 if (first_mbufp != NULL) {
1919 m_freem_list(first_mbufp);
1920 first_mbufp = NULL;
1921 }
1922 last_mbufp = NULL;
39236c6e 1923 } else {
3e170ce0
A
1924 /* successful fragmenting */
1925 m_freem(morig);
1926 *mptr = first_mbufp;
1927 last_mbufp->m_nextpkt = NULL;
1928 ip6stat.ip6s_fragmented++;
1929 in6_ifstat_inc(ifp, ifs6_out_fragok);
39236c6e 1930 }
1c79356b 1931 }
3e170ce0 1932 return error;
1c79356b
A
1933}
1934
1935static int
39236c6e 1936ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1c79356b
A
1937{
1938 struct mbuf *m;
1939
0a7de745
A
1940 if (hlen > MCLBYTES) {
1941 return ENOBUFS; /* XXX */
1942 }
1c79356b 1943 MGET(m, M_DONTWAIT, MT_DATA);
0a7de745
A
1944 if (m == NULL) {
1945 return ENOBUFS;
1946 }
1c79356b
A
1947
1948 if (hlen > MLEN) {
1949 MCLGET(m, M_DONTWAIT);
39236c6e 1950 if (!(m->m_flags & M_EXT)) {
1c79356b 1951 m_free(m);
0a7de745 1952 return ENOBUFS;
1c79356b
A
1953 }
1954 }
1955 m->m_len = hlen;
0a7de745 1956 if (hdr != NULL) {
1c79356b 1957 bcopy(hdr, mtod(m, caddr_t), hlen);
0a7de745 1958 }
1c79356b
A
1959
1960 *mp = m;
0a7de745 1961 return 0;
1c79356b
A
1962}
1963
39236c6e
A
1964static void
1965ip6_out_cksum_stats(int proto, u_int32_t len)
1966{
1967 switch (proto) {
1968 case IPPROTO_TCP:
1969 tcp_out6_cksum_stats(len);
1970 break;
1971 case IPPROTO_UDP:
1972 udp_out6_cksum_stats(len);
1973 break;
1974 default:
1975 /* keep only TCP or UDP stats for now */
1976 break;
1977 }
1978}
1979
6d2010ae 1980/*
39236c6e
A
1981 * Process a delayed payload checksum calculation (outbound path.)
1982 *
1983 * hoff is the number of bytes beyond the mbuf data pointer which
1984 * points to the IPv6 header. optlen is the number of bytes, if any,
1985 * between the end of IPv6 header and the beginning of the ULP payload
1986 * header, which represents the extension headers. If optlen is less
1987 * than zero, this routine will bail when it detects extension headers.
1988 *
1989 * Returns a bitmask representing all the work done in software.
6d2010ae 1990 */
39236c6e
A
1991uint32_t
1992in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
1993 int32_t nxt0, uint32_t csum_flags)
6d2010ae 1994{
0a7de745 1995 unsigned char buf[sizeof(struct ip6_hdr)] __attribute__((aligned(8)));
39236c6e
A
1996 struct ip6_hdr *ip6;
1997 uint32_t offset, mlen, hlen, olen, sw_csum;
1998 uint16_t csum, ulpoff, plen;
1999 uint8_t nxt;
6d2010ae 2000
0a7de745 2001 _CASSERT(sizeof(csum) == sizeof(uint16_t));
39236c6e
A
2002 VERIFY(m->m_flags & M_PKTHDR);
2003
2004 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
2005
0a7de745 2006 if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0) {
39236c6e 2007 goto done;
0a7de745 2008 }
39236c6e 2009
0a7de745
A
2010 mlen = m->m_pkthdr.len; /* total mbuf len */
2011 hlen = sizeof(*ip6); /* IPv6 header len */
39236c6e
A
2012
2013 /* sanity check (need at least IPv6 header) */
2014 if (mlen < (hoff + hlen)) {
2015 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
2016 "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
2017 /* NOTREACHED */
2018 }
2019
2020 /*
2021 * In case the IPv6 header is not contiguous, or not 32-bit
2022 * aligned, copy it to a local buffer.
2023 */
2024 if ((hoff + hlen) > m->m_len ||
2025 !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
2026 m_copydata(m, hoff, hlen, (caddr_t)buf);
2027 ip6 = (struct ip6_hdr *)(void *)buf;
2028 } else {
2029 ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
2030 }
2031
2032 nxt = ip6->ip6_nxt;
2033 plen = ntohs(ip6->ip6_plen);
2034 if (plen != (mlen - (hoff + hlen))) {
2035 plen = OSSwapInt16(plen);
2036 if (plen != (mlen - (hoff + hlen))) {
2037 /* Don't complain for jumbograms */
2038 if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
2039 printf("%s: mbuf 0x%llx proto %d IPv6 "
2040 "plen %d (%x) [swapped %d (%x)] doesn't "
2041 "match actual packet length; %d is used "
2042 "instead\n", __func__,
2043 (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
2044 ip6->ip6_plen, ip6->ip6_plen, plen, plen,
2045 (mlen - (hoff + hlen)));
2046 }
2047 plen = mlen - (hoff + hlen);
2048 }
2049 }
2050
2051 if (optlen < 0) {
2052 /* next header isn't TCP/UDP and we don't know optlen, bail */
2053 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2054 sw_csum = 0;
2055 goto done;
2056 }
2057 olen = 0;
2058 } else {
2059 /* caller supplied the original transport number; use it */
0a7de745 2060 if (nxt0 >= 0) {
39236c6e 2061 nxt = nxt0;
0a7de745 2062 }
39236c6e 2063 olen = optlen;
6d2010ae
A
2064 }
2065
0a7de745 2066 offset = hoff + hlen + olen; /* ULP header */
39236c6e
A
2067
2068 /* sanity check */
2069 if (mlen < offset) {
2070 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
2071 "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
2072 /* NOTREACHED */
2073 }
2074
2075 /*
2076 * offset is added to the lower 16-bit value of csum_data,
2077 * which is expected to contain the ULP offset; therefore
2078 * CSUM_PARTIAL offset adjustment must be undone.
2079 */
0a7de745
A
2080 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL | CSUM_DATA_VALID)) ==
2081 (CSUM_PARTIAL | CSUM_DATA_VALID)) {
39236c6e
A
2082 /*
2083 * Get back the original ULP offset (this will
2084 * undo the CSUM_PARTIAL logic in ip6_output.)
2085 */
2086 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
2087 m->m_pkthdr.csum_tx_start);
2088 }
2089
0a7de745 2090 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
39236c6e 2091
0a7de745 2092 if (mlen < (ulpoff + sizeof(csum))) {
39236c6e
A
2093 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2094 "cksum offset (%u) cksum flags 0x%x\n", __func__,
2095 m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
2096 /* NOTREACHED */
2097 }
2098
2099 csum = inet6_cksum(m, 0, offset, plen - olen);
2100
2101 /* Update stats */
2102 ip6_out_cksum_stats(nxt, plen - olen);
2103
2104 /* RFC1122 4.1.3.4 */
5ba3f43e 2105 if (csum == 0 &&
0a7de745 2106 (m->m_pkthdr.csum_flags & (CSUM_UDPIPV6 | CSUM_ZERO_INVERT))) {
39236c6e 2107 csum = 0xffff;
0a7de745 2108 }
39236c6e
A
2109
2110 /* Insert the checksum in the ULP csum field */
2111 offset += ulpoff;
0a7de745
A
2112 if ((offset + sizeof(csum)) > m->m_len) {
2113 m_copyback(m, offset, sizeof(csum), &csum);
39236c6e 2114 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
316670eb 2115 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
6d2010ae 2116 } else {
0a7de745 2117 bcopy(&csum, (mtod(m, char *) + offset), sizeof(csum));
6d2010ae 2118 }
5ba3f43e
A
2119 m->m_pkthdr.csum_flags &= ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID |
2120 CSUM_PARTIAL | CSUM_ZERO_INVERT);
39236c6e
A
2121
2122done:
0a7de745 2123 return sw_csum;
6d2010ae 2124}
39236c6e 2125
1c79356b
A
2126/*
2127 * Insert jumbo payload option.
2128 */
2129static int
39236c6e 2130ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1c79356b
A
2131{
2132 struct mbuf *mopt;
2133 u_char *optbuf;
9bccf70c 2134 u_int32_t v;
1c79356b 2135
0a7de745 2136#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1c79356b
A
2137
2138 /*
2139 * If there is no hop-by-hop options header, allocate new one.
2140 * If there is one but it doesn't have enough space to store the
2141 * jumbo payload option, allocate a cluster to store the whole options.
2142 * Otherwise, use it to store the options.
2143 */
39236c6e 2144 if (exthdrs->ip6e_hbh == NULL) {
1c79356b 2145 MGET(mopt, M_DONTWAIT, MT_DATA);
0a7de745
A
2146 if (mopt == NULL) {
2147 return ENOBUFS;
2148 }
1c79356b
A
2149 mopt->m_len = JUMBOOPTLEN;
2150 optbuf = mtod(mopt, u_char *);
0a7de745 2151 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1c79356b
A
2152 exthdrs->ip6e_hbh = mopt;
2153 } else {
2154 struct ip6_hbh *hbh;
2155
2156 mopt = exthdrs->ip6e_hbh;
2157 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
9bccf70c
A
2158 /*
2159 * XXX assumption:
2160 * - exthdrs->ip6e_hbh is not referenced from places
2161 * other than exthdrs.
2162 * - exthdrs->ip6e_hbh is not an mbuf chain.
2163 */
6d2010ae 2164 u_int32_t oldoptlen = mopt->m_len;
9bccf70c 2165 struct mbuf *n;
1c79356b 2166
9bccf70c
A
2167 /*
2168 * XXX: give up if the whole (new) hbh header does
2169 * not fit even in an mbuf cluster.
2170 */
0a7de745
A
2171 if (oldoptlen + JUMBOOPTLEN > MCLBYTES) {
2172 return ENOBUFS;
2173 }
1c79356b 2174
9bccf70c
A
2175 /*
2176 * As a consequence, we must always prepare a cluster
2177 * at this point.
2178 */
2179 MGET(n, M_DONTWAIT, MT_DATA);
39236c6e 2180 if (n != NULL) {
9bccf70c 2181 MCLGET(n, M_DONTWAIT);
39236c6e 2182 if (!(n->m_flags & M_EXT)) {
9bccf70c
A
2183 m_freem(n);
2184 n = NULL;
2185 }
2186 }
0a7de745
A
2187 if (n == NULL) {
2188 return ENOBUFS;
2189 }
9bccf70c
A
2190 n->m_len = oldoptlen + JUMBOOPTLEN;
2191 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
6d2010ae
A
2192 oldoptlen);
2193 optbuf = mtod(n, u_char *) + oldoptlen;
9bccf70c
A
2194 m_freem(mopt);
2195 mopt = exthdrs->ip6e_hbh = n;
1c79356b
A
2196 } else {
2197 optbuf = mtod(mopt, u_char *) + mopt->m_len;
2198 mopt->m_len += JUMBOOPTLEN;
2199 }
2200 optbuf[0] = IP6OPT_PADN;
2201 optbuf[1] = 1;
2202
2203 /*
2204 * Adjust the header length according to the pad and
2205 * the jumbo payload option.
2206 */
2207 hbh = mtod(mopt, struct ip6_hbh *);
2208 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
2209 }
2210
2211 /* fill in the option. */
2212 optbuf[2] = IP6OPT_JUMBO;
2213 optbuf[3] = 4;
9bccf70c 2214 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
0a7de745 2215 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1c79356b
A
2216
2217 /* finally, adjust the packet header length */
2218 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
2219
0a7de745 2220 return 0;
1c79356b
A
2221#undef JUMBOOPTLEN
2222}
2223
2224/*
2225 * Insert fragment header and copy unfragmentable header portions.
2226 */
2227static int
39236c6e
A
2228ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
2229 struct ip6_frag **frghdrp)
1c79356b
A
2230{
2231 struct mbuf *n, *mlast;
2232
0a7de745
A
2233 if (hlen > sizeof(struct ip6_hdr)) {
2234 n = m_copym(m0, sizeof(struct ip6_hdr),
2235 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
2236 if (n == NULL) {
2237 return ENOBUFS;
2238 }
1c79356b 2239 m->m_next = n;
0a7de745 2240 } else {
1c79356b 2241 n = m;
0a7de745 2242 }
1c79356b
A
2243
2244 /* Search for the last mbuf of unfragmentable part. */
0a7de745 2245 for (mlast = n; mlast->m_next; mlast = mlast->m_next) {
1c79356b 2246 ;
0a7de745 2247 }
1c79356b 2248
39236c6e 2249 if (!(mlast->m_flags & M_EXT) &&
0a7de745 2250 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
39236c6e 2251 /* use the trailing space of the last mbuf for the frag hdr */
6d2010ae
A
2252 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
2253 mlast->m_len);
0a7de745
A
2254 mlast->m_len += sizeof(struct ip6_frag);
2255 m->m_pkthdr.len += sizeof(struct ip6_frag);
1c79356b
A
2256 } else {
2257 /* allocate a new mbuf for the fragment header */
2258 struct mbuf *mfrg;
2259
2260 MGET(mfrg, M_DONTWAIT, MT_DATA);
0a7de745
A
2261 if (mfrg == NULL) {
2262 return ENOBUFS;
2263 }
2264 mfrg->m_len = sizeof(struct ip6_frag);
1c79356b
A
2265 *frghdrp = mtod(mfrg, struct ip6_frag *);
2266 mlast->m_next = mfrg;
2267 }
2268
0a7de745 2269 return 0;
1c79356b
A
2270}
2271
6d2010ae
A
2272static int
2273ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
cb323159 2274 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup)
6d2010ae
A
2275{
2276 u_int32_t mtu = 0;
6d2010ae 2277 int error = 0;
5ba3f43e 2278
55e303ae 2279
6d2010ae
A
2280 if (ro_pmtu != ro) {
2281 /* The first hop and the final destination may differ. */
39236c6e
A
2282 struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
2283 if (ROUTE_UNUSABLE(ro_pmtu) ||
0a7de745 2284 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) {
39236c6e 2285 ROUTE_RELEASE(ro_pmtu);
0a7de745 2286 }
39236c6e 2287
6d2010ae 2288 if (ro_pmtu->ro_rt == NULL) {
0a7de745 2289 bzero(sa6_dst, sizeof(*sa6_dst));
6d2010ae 2290 sa6_dst->sin6_family = AF_INET6;
0a7de745 2291 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
6d2010ae
A
2292 sa6_dst->sin6_addr = *dst;
2293
2294 rtalloc_scoped((struct route *)ro_pmtu,
2295 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
2296 }
2297 }
2298
6d2010ae
A
2299 if (ro_pmtu->ro_rt != NULL) {
2300 u_int32_t ifmtu;
2301
0a7de745 2302 if (ifp == NULL) {
fe8ab488 2303 ifp = ro_pmtu->ro_rt->rt_ifp;
0a7de745 2304 }
316670eb 2305 /* Access without acquiring nd_ifinfo lock for performance */
6d2010ae 2306 ifmtu = IN6_LINKMTU(ifp);
6d2010ae 2307
39236c6e
A
2308 /*
2309 * Access rmx_mtu without holding the route entry lock,
2310 * for performance; this isn't something that changes
2311 * often, so optimize.
2312 */
6d2010ae
A
2313 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2314 if (mtu > ifmtu || mtu == 0) {
2315 /*
2316 * The MTU on the route is larger than the MTU on
2317 * the interface! This shouldn't happen, unless the
2318 * MTU of the interface has been changed after the
2319 * interface was brought up. Change the MTU in the
2320 * route to match the interface MTU (as long as the
2321 * field isn't locked).
2322 *
2323 * if MTU on the route is 0, we need to fix the MTU.
2324 * this case happens with path MTU discovery timeouts.
2325 */
39236c6e 2326 mtu = ifmtu;
0a7de745 2327 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU)) {
39236c6e 2328 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
0a7de745 2329 }
39236c6e 2330 }
6d2010ae
A
2331 } else {
2332 if (ifp) {
316670eb 2333 /* Don't hold nd_ifinfo lock for performance */
6d2010ae 2334 mtu = IN6_LINKMTU(ifp);
39236c6e 2335 } else {
6d2010ae 2336 error = EHOSTUNREACH; /* XXX */
39236c6e 2337 }
6d2010ae
A
2338 }
2339
2340 *mtup = mtu;
0a7de745 2341 return error;
6d2010ae
A
2342}
2343
2344/*
1c79356b
A
2345 * IP6 socket option processing.
2346 */
1c79356b 2347int
39236c6e 2348ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2349{
6d2010ae
A
2350 int optdatalen, uproto;
2351 void *optdata;
9bccf70c
A
2352 int privileged;
2353 struct inpcb *in6p = sotoinpcb(so);
b0d623f7 2354 int error = 0, optval = 0;
2d21ac55
A
2355 int level, op = -1, optname = 0;
2356 int optlen = 0;
b0d623f7 2357 struct proc *p;
cb323159 2358 lck_mtx_t *mutex_held = NULL;
1c79356b 2359
39236c6e
A
2360 VERIFY(sopt != NULL);
2361
b0d623f7
A
2362 level = sopt->sopt_level;
2363 op = sopt->sopt_dir;
2364 optname = sopt->sopt_name;
2365 optlen = sopt->sopt_valsize;
2366 p = sopt->sopt_p;
39236c6e 2367 uproto = (int)SOCK_PROTO(so);
1c79356b 2368
b0d623f7 2369 privileged = (proc_suser(p) == 0);
1c79356b
A
2370
2371 if (level == IPPROTO_IPV6) {
5ba3f43e 2372 boolean_t capture_exthdrstat_in = FALSE;
1c79356b 2373 switch (op) {
1c79356b 2374 case SOPT_SET:
cb323159
A
2375 mutex_held = socket_getlock(so, PR_F_WILLUNLOCK);
2376 /*
2377 * Wait if we are in the middle of ip6_output
2378 * as we unlocked the socket there and don't
2379 * want to overwrite the IP options
2380 */
2381 if (in6p->inp_sndinprog_cnt > 0) {
2382 in6p->inp_sndingprog_waiters++;
2383
2384 while (in6p->inp_sndinprog_cnt > 0) {
2385 msleep(&in6p->inp_sndinprog_cnt, mutex_held,
2386 PSOCK | PCATCH, "inp_sndinprog_cnt",
2387 NULL);
2388 }
2389 in6p->inp_sndingprog_waiters--;
2390 }
1c79356b 2391 switch (optname) {
39236c6e 2392 case IPV6_2292PKTOPTIONS: {
1c79356b
A
2393 struct mbuf *m;
2394
39236c6e 2395 error = soopt_getm(sopt, &m);
0a7de745 2396 if (error != 0) {
1c79356b 2397 break;
0a7de745 2398 }
39236c6e 2399 error = soopt_mcopyin(sopt, m);
0a7de745 2400 if (error != 0) {
1c79356b 2401 break;
0a7de745 2402 }
1c79356b 2403 error = ip6_pcbopts(&in6p->in6p_outputopts,
39236c6e
A
2404 m, so, sopt);
2405 m_freem(m);
1c79356b
A
2406 break;
2407 }
9bccf70c 2408
1c79356b
A
2409 /*
2410 * Use of some Hop-by-Hop options or some
2411 * Destination options, might require special
2412 * privilege. That is, normal applications
2413 * (without special privilege) might be forbidden
2414 * from setting certain options in outgoing packets,
2415 * and might never see certain options in received
2416 * packets. [RFC 2292 Section 6]
2417 * KAME specific note:
2418 * KAME prevents non-privileged users from sending or
2419 * receiving ANY hbh/dst options in order to avoid
2420 * overhead of parsing options in the kernel.
2421 */
6d2010ae
A
2422 case IPV6_RECVHOPOPTS:
2423 case IPV6_RECVDSTOPTS:
2424 case IPV6_RECVRTHDRDSTOPTS:
0a7de745 2425 if (!privileged) {
39236c6e 2426 break;
0a7de745
A
2427 }
2428 /* FALLTHROUGH */
1c79356b 2429 case IPV6_UNICAST_HOPS:
6d2010ae 2430 case IPV6_HOPLIMIT:
6d2010ae
A
2431 case IPV6_RECVPKTINFO:
2432 case IPV6_RECVHOPLIMIT:
2433 case IPV6_RECVRTHDR:
2434 case IPV6_RECVPATHMTU:
b0d623f7 2435 case IPV6_RECVTCLASS:
9bccf70c 2436 case IPV6_V6ONLY:
6d2010ae 2437 case IPV6_AUTOFLOWLABEL:
0a7de745 2438 if (optlen != sizeof(int)) {
1c79356b 2439 error = EINVAL;
9bccf70c
A
2440 break;
2441 }
2442 error = sooptcopyin(sopt, &optval,
0a7de745
A
2443 sizeof(optval), sizeof(optval));
2444 if (error) {
9bccf70c 2445 break;
0a7de745 2446 }
1c79356b 2447
39236c6e 2448 switch (optname) {
9bccf70c 2449 case IPV6_UNICAST_HOPS:
39236c6e 2450 if (optval < -1 || optval >= 256) {
9bccf70c 2451 error = EINVAL;
39236c6e 2452 } else {
9bccf70c
A
2453 /* -1 = kernel default */
2454 in6p->in6p_hops = optval;
39236c6e
A
2455 if (in6p->inp_vflag &
2456 INP_IPV4) {
2457 in6p->inp_ip_ttl =
2458 optval;
2459 }
9bccf70c
A
2460 }
2461 break;
0a7de745
A
2462#define OPTSET(bit) do { \
2463 if (optval) \
2464 in6p->inp_flags |= (bit); \
2465 else \
2466 in6p->inp_flags &= ~(bit); \
39236c6e
A
2467} while (0)
2468
0a7de745
A
2469#define OPTSET2292(bit) do { \
2470 in6p->inp_flags |= IN6P_RFC2292; \
2471 if (optval) \
2472 in6p->inp_flags |= (bit); \
2473 else \
2474 in6p->inp_flags &= ~(bit); \
39236c6e
A
2475} while (0)
2476
0a7de745 2477#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
6d2010ae
A
2478
2479 case IPV6_RECVPKTINFO:
2480 /* cannot mix with RFC2292 */
2481 if (OPTBIT(IN6P_RFC2292)) {
2482 error = EINVAL;
2483 break;
2484 }
2485 OPTSET(IN6P_PKTINFO);
2486 break;
2487
39236c6e 2488 case IPV6_HOPLIMIT: {
6d2010ae
A
2489 struct ip6_pktopts **optp;
2490
2491 /* cannot mix with RFC2292 */
2492 if (OPTBIT(IN6P_RFC2292)) {
2493 error = EINVAL;
2494 break;
2495 }
2496 optp = &in6p->in6p_outputopts;
2497 error = ip6_pcbopt(IPV6_HOPLIMIT,
0a7de745 2498 (u_char *)&optval, sizeof(optval),
6d2010ae
A
2499 optp, uproto);
2500 break;
2501 }
2502
2503 case IPV6_RECVHOPLIMIT:
2504 /* cannot mix with RFC2292 */
2505 if (OPTBIT(IN6P_RFC2292)) {
2506 error = EINVAL;
2507 break;
2508 }
2509 OPTSET(IN6P_HOPLIMIT);
2510 break;
2511
2512 case IPV6_RECVHOPOPTS:
2513 /* cannot mix with RFC2292 */
2514 if (OPTBIT(IN6P_RFC2292)) {
2515 error = EINVAL;
2516 break;
2517 }
2518 OPTSET(IN6P_HOPOPTS);
5ba3f43e 2519 capture_exthdrstat_in = TRUE;
6d2010ae
A
2520 break;
2521
2522 case IPV6_RECVDSTOPTS:
2523 /* cannot mix with RFC2292 */
2524 if (OPTBIT(IN6P_RFC2292)) {
2525 error = EINVAL;
2526 break;
2527 }
2528 OPTSET(IN6P_DSTOPTS);
5ba3f43e 2529 capture_exthdrstat_in = TRUE;
6d2010ae 2530 break;
1c79356b 2531
6d2010ae
A
2532 case IPV6_RECVRTHDRDSTOPTS:
2533 /* cannot mix with RFC2292 */
2534 if (OPTBIT(IN6P_RFC2292)) {
2535 error = EINVAL;
2536 break;
2537 }
2538 OPTSET(IN6P_RTHDRDSTOPTS);
5ba3f43e 2539 capture_exthdrstat_in = TRUE;
6d2010ae
A
2540 break;
2541
2542 case IPV6_RECVRTHDR:
2543 /* cannot mix with RFC2292 */
2544 if (OPTBIT(IN6P_RFC2292)) {
2545 error = EINVAL;
2546 break;
2547 }
2548 OPTSET(IN6P_RTHDR);
5ba3f43e 2549 capture_exthdrstat_in = TRUE;
9bccf70c 2550 break;
1c79356b 2551
6d2010ae
A
2552 case IPV6_RECVPATHMTU:
2553 /*
2554 * We ignore this option for TCP
2555 * sockets.
2556 * (RFC3542 leaves this case
2557 * unspecified.)
2558 */
0a7de745 2559 if (uproto != IPPROTO_TCP) {
6d2010ae 2560 OPTSET(IN6P_MTU);
0a7de745 2561 }
9bccf70c 2562 break;
1c79356b 2563
9bccf70c
A
2564 case IPV6_V6ONLY:
2565 /*
2566 * make setsockopt(IPV6_V6ONLY)
2567 * available only prior to bind(2).
2568 * see ipng mailing list, Jun 22 2001.
2569 */
6d2010ae 2570 if (in6p->inp_lport ||
39236c6e 2571 !IN6_IS_ADDR_UNSPECIFIED(
0a7de745 2572 &in6p->in6p_laddr)) {
9bccf70c 2573 error = EINVAL;
1c79356b 2574 break;
1c79356b 2575 }
9bccf70c 2576 OPTSET(IN6P_IPV6_V6ONLY);
0a7de745 2577 if (optval) {
6d2010ae 2578 in6p->inp_vflag &= ~INP_IPV4;
0a7de745 2579 } else {
6d2010ae 2580 in6p->inp_vflag |= INP_IPV4;
0a7de745 2581 }
9bccf70c 2582 break;
39236c6e 2583
b0d623f7 2584 case IPV6_RECVTCLASS:
6d2010ae 2585 /* we can mix with RFC2292 */
b0d623f7
A
2586 OPTSET(IN6P_TCLASS);
2587 break;
39236c6e 2588
6d2010ae
A
2589 case IPV6_AUTOFLOWLABEL:
2590 OPTSET(IN6P_AUTOFLOWLABEL);
2591 break;
1c79356b
A
2592 }
2593 break;
9bccf70c 2594
6d2010ae
A
2595 case IPV6_TCLASS:
2596 case IPV6_DONTFRAG:
2597 case IPV6_USE_MIN_MTU:
39236c6e
A
2598 case IPV6_PREFER_TEMPADDR: {
2599 struct ip6_pktopts **optp;
2600
0a7de745 2601 if (optlen != sizeof(optval)) {
6d2010ae
A
2602 error = EINVAL;
2603 break;
2604 }
2605 error = sooptcopyin(sopt, &optval,
0a7de745
A
2606 sizeof(optval), sizeof(optval));
2607 if (error) {
6d2010ae 2608 break;
0a7de745 2609 }
39236c6e
A
2610
2611 optp = &in6p->in6p_outputopts;
2612 error = ip6_pcbopt(optname, (u_char *)&optval,
0a7de745 2613 sizeof(optval), optp, uproto);
d9a64523
A
2614
2615 if (optname == IPV6_TCLASS) {
2616 // Add in the ECN flags
2617 u_int8_t tos = (in6p->inp_ip_tos & ~IPTOS_ECN_MASK);
2618 u_int8_t ecn = optval & IPTOS_ECN_MASK;
2619 in6p->inp_ip_tos = tos | ecn;
2620 }
39236c6e
A
2621 break;
2622 }
6d2010ae
A
2623
2624 case IPV6_2292PKTINFO:
2625 case IPV6_2292HOPLIMIT:
2626 case IPV6_2292HOPOPTS:
2627 case IPV6_2292DSTOPTS:
2628 case IPV6_2292RTHDR:
9bccf70c 2629 /* RFC 2292 */
0a7de745 2630 if (optlen != sizeof(int)) {
9bccf70c
A
2631 error = EINVAL;
2632 break;
2633 }
2634 error = sooptcopyin(sopt, &optval,
0a7de745
A
2635 sizeof(optval), sizeof(optval));
2636 if (error) {
9bccf70c 2637 break;
0a7de745 2638 }
9bccf70c 2639 switch (optname) {
6d2010ae
A
2640 case IPV6_2292PKTINFO:
2641 OPTSET2292(IN6P_PKTINFO);
9bccf70c 2642 break;
6d2010ae
A
2643 case IPV6_2292HOPLIMIT:
2644 OPTSET2292(IN6P_HOPLIMIT);
9bccf70c 2645 break;
6d2010ae 2646 case IPV6_2292HOPOPTS:
9bccf70c
A
2647 /*
2648 * Check super-user privilege.
2649 * See comments for IPV6_RECVHOPOPTS.
2650 */
0a7de745
A
2651 if (!privileged) {
2652 return EPERM;
2653 }
6d2010ae 2654 OPTSET2292(IN6P_HOPOPTS);
5ba3f43e 2655 capture_exthdrstat_in = TRUE;
9bccf70c 2656 break;
6d2010ae 2657 case IPV6_2292DSTOPTS:
0a7de745
A
2658 if (!privileged) {
2659 return EPERM;
2660 }
2661 OPTSET2292(IN6P_DSTOPTS |
39236c6e 2662 IN6P_RTHDRDSTOPTS); /* XXX */
5ba3f43e 2663 capture_exthdrstat_in = TRUE;
9bccf70c 2664 break;
6d2010ae
A
2665 case IPV6_2292RTHDR:
2666 OPTSET2292(IN6P_RTHDR);
5ba3f43e 2667 capture_exthdrstat_in = TRUE;
1c79356b 2668 break;
1c79356b
A
2669 }
2670 break;
39236c6e 2671
6d2010ae
A
2672 case IPV6_3542PKTINFO:
2673 case IPV6_3542HOPOPTS:
2674 case IPV6_3542RTHDR:
2675 case IPV6_3542DSTOPTS:
2676 case IPV6_RTHDRDSTOPTS:
39236c6e 2677 case IPV6_3542NEXTHOP: {
316670eb 2678 struct ip6_pktopts **optp;
6d2010ae
A
2679 /* new advanced API (RFC3542) */
2680 struct mbuf *m;
1c79356b 2681
6d2010ae
A
2682 /* cannot mix with RFC2292 */
2683 if (OPTBIT(IN6P_RFC2292)) {
b0d623f7
A
2684 error = EINVAL;
2685 break;
2686 }
6d2010ae 2687 error = soopt_getm(sopt, &m);
0a7de745 2688 if (error != 0) {
6d2010ae 2689 break;
0a7de745 2690 }
6d2010ae 2691 error = soopt_mcopyin(sopt, m);
0a7de745 2692 if (error != 0) {
b0d623f7 2693 break;
0a7de745 2694 }
39236c6e 2695
316670eb 2696 optp = &in6p->in6p_outputopts;
39236c6e
A
2697 error = ip6_pcbopt(optname, mtod(m, u_char *),
2698 m->m_len, optp, uproto);
6d2010ae 2699 m_freem(m);
b0d623f7 2700 break;
6d2010ae
A
2701 }
2702#undef OPTSET
1c79356b
A
2703 case IPV6_MULTICAST_IF:
2704 case IPV6_MULTICAST_HOPS:
2705 case IPV6_MULTICAST_LOOP:
2706 case IPV6_JOIN_GROUP:
2707 case IPV6_LEAVE_GROUP:
6d2010ae
A
2708 case IPV6_MSFILTER:
2709 case MCAST_BLOCK_SOURCE:
2710 case MCAST_UNBLOCK_SOURCE:
2711 case MCAST_JOIN_GROUP:
2712 case MCAST_LEAVE_GROUP:
2713 case MCAST_JOIN_SOURCE_GROUP:
2714 case MCAST_LEAVE_SOURCE_GROUP:
2715 error = ip6_setmoptions(in6p, sopt);
1c79356b
A
2716 break;
2717
9bccf70c
A
2718 case IPV6_PORTRANGE:
2719 error = sooptcopyin(sopt, &optval,
0a7de745
A
2720 sizeof(optval), sizeof(optval));
2721 if (error) {
9bccf70c 2722 break;
0a7de745 2723 }
1c79356b 2724
9bccf70c
A
2725 switch (optval) {
2726 case IPV6_PORTRANGE_DEFAULT:
6d2010ae
A
2727 in6p->inp_flags &= ~(INP_LOWPORT);
2728 in6p->inp_flags &= ~(INP_HIGHPORT);
9bccf70c 2729 break;
1c79356b 2730
9bccf70c 2731 case IPV6_PORTRANGE_HIGH:
6d2010ae
A
2732 in6p->inp_flags &= ~(INP_LOWPORT);
2733 in6p->inp_flags |= INP_HIGHPORT;
9bccf70c 2734 break;
1c79356b 2735
9bccf70c 2736 case IPV6_PORTRANGE_LOW:
6d2010ae
A
2737 in6p->inp_flags &= ~(INP_HIGHPORT);
2738 in6p->inp_flags |= INP_LOWPORT;
9bccf70c 2739 break;
1c79356b 2740
9bccf70c
A
2741 default:
2742 error = EINVAL;
2743 break;
2744 }
1c79356b 2745 break;
1c79356b 2746#if IPSEC
39236c6e 2747 case IPV6_IPSEC_POLICY: {
1c79356b
A
2748 caddr_t req = NULL;
2749 size_t len = 0;
1c79356b 2750 struct mbuf *m;
39037602 2751
0a7de745 2752 if ((error = soopt_getm(sopt, &m)) != 0) {
1c79356b 2753 break;
0a7de745
A
2754 }
2755 if ((error = soopt_mcopyin(sopt, m)) != 0) {
1c79356b 2756 break;
0a7de745 2757 }
39037602 2758
39236c6e
A
2759 req = mtod(m, caddr_t);
2760 len = m->m_len;
1c79356b 2761 error = ipsec6_set_policy(in6p, optname, req,
0a7de745 2762 len, privileged);
1c79356b 2763 m_freem(m);
1c79356b 2764 break;
39236c6e
A
2765 }
2766#endif /* IPSEC */
6d2010ae
A
2767 /*
2768 * IPv6 variant of IP_BOUND_IF; for details see
2769 * comments on IP_BOUND_IF in ip_ctloutput().
2770 */
2771 case IPV6_BOUND_IF:
2772 /* This option is settable only on IPv6 */
2773 if (!(in6p->inp_vflag & INP_IPV6)) {
2774 error = EINVAL;
2775 break;
2776 }
2777
2778 error = sooptcopyin(sopt, &optval,
0a7de745 2779 sizeof(optval), sizeof(optval));
6d2010ae 2780
0a7de745 2781 if (error) {
6d2010ae 2782 break;
0a7de745 2783 }
6d2010ae 2784
39236c6e 2785 error = inp_bindif(in6p, optval, NULL);
6d2010ae
A
2786 break;
2787
2788 case IPV6_NO_IFT_CELLULAR:
2789 /* This option is settable only for IPv6 */
2790 if (!(in6p->inp_vflag & INP_IPV6)) {
2791 error = EINVAL;
2792 break;
2793 }
2794
2795 error = sooptcopyin(sopt, &optval,
0a7de745 2796 sizeof(optval), sizeof(optval));
6d2010ae 2797
0a7de745 2798 if (error) {
6d2010ae 2799 break;
0a7de745 2800 }
6d2010ae 2801
39236c6e 2802 /* once set, it cannot be unset */
fe8ab488 2803 if (!optval && INP_NO_CELLULAR(in6p)) {
39236c6e
A
2804 error = EINVAL;
2805 break;
2806 }
2807
2808 error = so_set_restrictions(so,
2809 SO_RESTRICT_DENY_CELLULAR);
6d2010ae
A
2810 break;
2811
2812 case IPV6_OUT_IF:
2813 /* This option is not settable */
2814 error = EINVAL;
2815 break;
2816
1c79356b
A
2817 default:
2818 error = ENOPROTOOPT;
2819 break;
2820 }
5ba3f43e
A
2821 if (capture_exthdrstat_in) {
2822 if (uproto == IPPROTO_TCP) {
2823 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_in);
2824 } else if (uproto == IPPROTO_UDP) {
2825 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_in);
0a7de745
A
2826 }
2827 }
1c79356b
A
2828 break;
2829
1c79356b 2830 case SOPT_GET:
1c79356b 2831 switch (optname) {
6d2010ae
A
2832 case IPV6_2292PKTOPTIONS:
2833 /*
2834 * RFC3542 (effectively) deprecated the
2835 * semantics of the 2292-style pktoptions.
2836 * Since it was not reliable in nature (i.e.,
2837 * applications had to expect the lack of some
2838 * information after all), it would make sense
2839 * to simplify this part by always returning
2840 * empty data.
2841 */
2842 sopt->sopt_valsize = 0;
1c79356b
A
2843 break;
2844
6d2010ae
A
2845 case IPV6_RECVHOPOPTS:
2846 case IPV6_RECVDSTOPTS:
2847 case IPV6_RECVRTHDRDSTOPTS:
1c79356b 2848 case IPV6_UNICAST_HOPS:
6d2010ae
A
2849 case IPV6_RECVPKTINFO:
2850 case IPV6_RECVHOPLIMIT:
2851 case IPV6_RECVRTHDR:
2852 case IPV6_RECVPATHMTU:
9bccf70c 2853 case IPV6_V6ONLY:
1c79356b 2854 case IPV6_PORTRANGE:
b0d623f7 2855 case IPV6_RECVTCLASS:
6d2010ae 2856 case IPV6_AUTOFLOWLABEL:
1c79356b 2857 switch (optname) {
6d2010ae
A
2858 case IPV6_RECVHOPOPTS:
2859 optval = OPTBIT(IN6P_HOPOPTS);
2860 break;
2861
2862 case IPV6_RECVDSTOPTS:
2863 optval = OPTBIT(IN6P_DSTOPTS);
2864 break;
2865
2866 case IPV6_RECVRTHDRDSTOPTS:
2867 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2868 break;
2869
1c79356b 2870 case IPV6_UNICAST_HOPS:
1c79356b 2871 optval = in6p->in6p_hops;
1c79356b
A
2872 break;
2873
6d2010ae
A
2874 case IPV6_RECVPKTINFO:
2875 optval = OPTBIT(IN6P_PKTINFO);
2876 break;
2877
2878 case IPV6_RECVHOPLIMIT:
2879 optval = OPTBIT(IN6P_HOPLIMIT);
2880 break;
2881
2882 case IPV6_RECVRTHDR:
2883 optval = OPTBIT(IN6P_RTHDR);
2884 break;
2885
2886 case IPV6_RECVPATHMTU:
2887 optval = OPTBIT(IN6P_MTU);
1c79356b
A
2888 break;
2889
9bccf70c 2890 case IPV6_V6ONLY:
55e303ae 2891 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1c79356b 2892 break;
1c79356b 2893
39236c6e 2894 case IPV6_PORTRANGE: {
1c79356b 2895 int flags;
6d2010ae 2896 flags = in6p->inp_flags;
0a7de745 2897 if (flags & INP_HIGHPORT) {
1c79356b 2898 optval = IPV6_PORTRANGE_HIGH;
0a7de745 2899 } else if (flags & INP_LOWPORT) {
1c79356b 2900 optval = IPV6_PORTRANGE_LOW;
0a7de745 2901 } else {
1c79356b 2902 optval = 0;
0a7de745 2903 }
1c79356b 2904 break;
39236c6e 2905 }
b0d623f7
A
2906 case IPV6_RECVTCLASS:
2907 optval = OPTBIT(IN6P_TCLASS);
2908 break;
2909
6d2010ae
A
2910 case IPV6_AUTOFLOWLABEL:
2911 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2912 break;
1c79356b 2913 }
0a7de745 2914 if (error) {
6d2010ae 2915 break;
0a7de745 2916 }
1c79356b 2917 error = sooptcopyout(sopt, &optval,
0a7de745 2918 sizeof(optval));
1c79356b
A
2919 break;
2920
39236c6e 2921 case IPV6_PATHMTU: {
6d2010ae
A
2922 u_int32_t pmtu = 0;
2923 struct ip6_mtuinfo mtuinfo;
2924 struct route_in6 sro;
2925
0a7de745 2926 bzero(&sro, sizeof(sro));
6d2010ae 2927
0a7de745
A
2928 if (!(so->so_state & SS_ISCONNECTED)) {
2929 return ENOTCONN;
2930 }
6d2010ae
A
2931 /*
2932 * XXX: we dot not consider the case of source
2933 * routing, or optional information to specify
2934 * the outgoing interface.
2935 */
2936 error = ip6_getpmtu(&sro, NULL, NULL,
cb323159 2937 &in6p->in6p_faddr, &pmtu);
39236c6e 2938 ROUTE_RELEASE(&sro);
0a7de745 2939 if (error) {
6d2010ae 2940 break;
0a7de745
A
2941 }
2942 if (pmtu > IPV6_MAXPACKET) {
6d2010ae 2943 pmtu = IPV6_MAXPACKET;
0a7de745 2944 }
6d2010ae 2945
0a7de745 2946 bzero(&mtuinfo, sizeof(mtuinfo));
6d2010ae
A
2947 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2948 optdata = (void *)&mtuinfo;
0a7de745 2949 optdatalen = sizeof(mtuinfo);
6d2010ae
A
2950 error = sooptcopyout(sopt, optdata,
2951 optdatalen);
2952 break;
2953 }
2954
2955 case IPV6_2292PKTINFO:
2956 case IPV6_2292HOPLIMIT:
2957 case IPV6_2292HOPOPTS:
2958 case IPV6_2292RTHDR:
2959 case IPV6_2292DSTOPTS:
9bccf70c 2960 switch (optname) {
6d2010ae 2961 case IPV6_2292PKTINFO:
9bccf70c 2962 optval = OPTBIT(IN6P_PKTINFO);
1c79356b 2963 break;
6d2010ae 2964 case IPV6_2292HOPLIMIT:
1c79356b
A
2965 optval = OPTBIT(IN6P_HOPLIMIT);
2966 break;
6d2010ae 2967 case IPV6_2292HOPOPTS:
9bccf70c 2968 optval = OPTBIT(IN6P_HOPOPTS);
1c79356b 2969 break;
6d2010ae 2970 case IPV6_2292RTHDR:
9bccf70c 2971 optval = OPTBIT(IN6P_RTHDR);
1c79356b 2972 break;
6d2010ae 2973 case IPV6_2292DSTOPTS:
0a7de745 2974 optval = OPTBIT(IN6P_DSTOPTS |
39236c6e 2975 IN6P_RTHDRDSTOPTS);
1c79356b 2976 break;
1c79356b 2977 }
1c79356b 2978 error = sooptcopyout(sopt, &optval,
0a7de745 2979 sizeof(optval));
1c79356b 2980 break;
39236c6e 2981
6d2010ae
A
2982 case IPV6_PKTINFO:
2983 case IPV6_HOPOPTS:
2984 case IPV6_RTHDR:
2985 case IPV6_DSTOPTS:
2986 case IPV6_RTHDRDSTOPTS:
2987 case IPV6_NEXTHOP:
b0d623f7 2988 case IPV6_TCLASS:
6d2010ae
A
2989 case IPV6_DONTFRAG:
2990 case IPV6_USE_MIN_MTU:
2991 case IPV6_PREFER_TEMPADDR:
2992 error = ip6_getpcbopt(in6p->in6p_outputopts,
2993 optname, sopt);
b0d623f7
A
2994 break;
2995
1c79356b
A
2996 case IPV6_MULTICAST_IF:
2997 case IPV6_MULTICAST_HOPS:
2998 case IPV6_MULTICAST_LOOP:
6d2010ae
A
2999 case IPV6_MSFILTER:
3000 error = ip6_getmoptions(in6p, sopt);
1c79356b 3001 break;
1c79356b 3002#if IPSEC
39236c6e 3003 case IPV6_IPSEC_POLICY: {
fe8ab488 3004 error = 0; /* This option is no longer supported */
1c79356b 3005 break;
39236c6e
A
3006 }
3007#endif /* IPSEC */
6d2010ae 3008 case IPV6_BOUND_IF:
0a7de745 3009 if (in6p->inp_flags & INP_BOUND_IF) {
316670eb 3010 optval = in6p->inp_boundifp->if_index;
0a7de745 3011 }
6d2010ae 3012 error = sooptcopyout(sopt, &optval,
0a7de745 3013 sizeof(optval));
6d2010ae
A
3014 break;
3015
3016 case IPV6_NO_IFT_CELLULAR:
fe8ab488 3017 optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
6d2010ae 3018 error = sooptcopyout(sopt, &optval,
0a7de745 3019 sizeof(optval));
6d2010ae
A
3020 break;
3021
3022 case IPV6_OUT_IF:
316670eb
A
3023 optval = (in6p->in6p_last_outifp != NULL) ?
3024 in6p->in6p_last_outifp->if_index : 0;
6d2010ae 3025 error = sooptcopyout(sopt, &optval,
0a7de745 3026 sizeof(optval));
6d2010ae
A
3027 break;
3028
1c79356b
A
3029 default:
3030 error = ENOPROTOOPT;
3031 break;
3032 }
3033 break;
3034 }
3e170ce0
A
3035 } else if (level == IPPROTO_UDP) {
3036 error = udp_ctloutput(so, sopt);
1c79356b
A
3037 } else {
3038 error = EINVAL;
1c79356b 3039 }
0a7de745 3040 return error;
1c79356b
A
3041}
3042
6d2010ae
A
3043int
3044ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 3045{
6d2010ae
A
3046 int error = 0, optval, optlen;
3047 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
3048 struct inpcb *in6p = sotoinpcb(so);
3049 int level, op, optname;
1c79356b 3050
6d2010ae
A
3051 level = sopt->sopt_level;
3052 op = sopt->sopt_dir;
3053 optname = sopt->sopt_name;
3054 optlen = sopt->sopt_valsize;
3055
0a7de745
A
3056 if (level != IPPROTO_IPV6) {
3057 return EINVAL;
3058 }
6d2010ae
A
3059
3060 switch (optname) {
3061 case IPV6_CHECKSUM:
3062 /*
3063 * For ICMPv6 sockets, no modification allowed for checksum
3064 * offset, permit "no change" values to help existing apps.
3065 *
3066 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
3067 * for an ICMPv6 socket will fail."
3068 * The current behavior does not meet RFC3542.
3069 */
3070 switch (op) {
3071 case SOPT_SET:
0a7de745 3072 if (optlen != sizeof(int)) {
6d2010ae
A
3073 error = EINVAL;
3074 break;
3075 }
0a7de745
A
3076 error = sooptcopyin(sopt, &optval, sizeof(optval),
3077 sizeof(optval));
3078 if (error) {
6d2010ae 3079 break;
0a7de745 3080 }
6d2010ae
A
3081 if ((optval % 2) != 0) {
3082 /* the API assumes even offset values */
3083 error = EINVAL;
39236c6e 3084 } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
0a7de745 3085 if (optval != icmp6off) {
6d2010ae 3086 error = EINVAL;
0a7de745 3087 }
39236c6e 3088 } else {
6d2010ae 3089 in6p->in6p_cksum = optval;
39236c6e 3090 }
6d2010ae
A
3091 break;
3092
3093 case SOPT_GET:
0a7de745 3094 if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
6d2010ae 3095 optval = icmp6off;
0a7de745 3096 } else {
6d2010ae 3097 optval = in6p->in6p_cksum;
0a7de745 3098 }
6d2010ae 3099
0a7de745 3100 error = sooptcopyout(sopt, &optval, sizeof(optval));
6d2010ae
A
3101 break;
3102
3103 default:
3104 error = EINVAL;
3105 break;
3106 }
3107 break;
3108
3109 default:
3110 error = ENOPROTOOPT;
3111 break;
3112 }
3113
0a7de745 3114 return error;
6d2010ae
A
3115}
3116
3117/*
3118 * Set up IP6 options in pcb for insertion in output packets or
3119 * specifying behavior of outgoing packets.
3120 */
3121static int
39236c6e
A
3122ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
3123 struct sockopt *sopt)
6d2010ae 3124{
39236c6e 3125#pragma unused(sopt)
6d2010ae
A
3126 struct ip6_pktopts *opt = *pktopt;
3127 int error = 0;
3128
3129 /* turn off any old options. */
39236c6e 3130 if (opt != NULL) {
6d2010ae
A
3131#if DIAGNOSTIC
3132 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
3133 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
0a7de745 3134 opt->ip6po_rhinfo.ip6po_rhi_rthdr) {
39236c6e
A
3135 printf("%s: all specified options are cleared.\n",
3136 __func__);
0a7de745 3137 }
6d2010ae
A
3138#endif
3139 ip6_clearpktopts(opt, -1);
3140 } else {
0a7de745
A
3141 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
3142 if (opt == NULL) {
3143 return ENOBUFS;
3144 }
6d2010ae 3145 }
1c79356b
A
3146 *pktopt = NULL;
3147
39236c6e 3148 if (m == NULL || m->m_len == 0) {
1c79356b 3149 /*
55e303ae
A
3150 * Only turning off any previous options, regardless of
3151 * whether the opt is just created or given.
1c79356b 3152 */
0a7de745 3153 if (opt != NULL) {
9bccf70c 3154 FREE(opt, M_IP6OPT);
0a7de745
A
3155 }
3156 return 0;
1c79356b
A
3157 }
3158
3159 /* set options specified by user. */
39236c6e 3160 if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) {
6d2010ae 3161 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
55e303ae 3162 FREE(opt, M_IP6OPT);
0a7de745 3163 return error;
1c79356b
A
3164 }
3165 *pktopt = opt;
0a7de745 3166 return 0;
1c79356b
A
3167}
3168
6d2010ae
A
3169/*
3170 * initialize ip6_pktopts. beware that there are non-zero default values in
3171 * the struct.
3172 */
3173void
3174ip6_initpktopts(struct ip6_pktopts *opt)
3175{
0a7de745
A
3176 bzero(opt, sizeof(*opt));
3177 opt->ip6po_hlim = -1; /* -1 means default hop limit */
3178 opt->ip6po_tclass = -1; /* -1 means default traffic class */
6d2010ae
A
3179 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
3180 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
3181}
3182
b0d623f7 3183static int
316670eb
A
3184ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
3185 int uproto)
b0d623f7
A
3186{
3187 struct ip6_pktopts *opt;
3188
3189 opt = *pktopt;
3190 if (opt == NULL) {
0a7de745
A
3191 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
3192 if (opt == NULL) {
3193 return ENOBUFS;
3194 }
b0d623f7
A
3195 ip6_initpktopts(opt);
3196 *pktopt = opt;
3197 }
3198
0a7de745 3199 return ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto);
b0d623f7
A
3200}
3201
3202static int
3203ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
3204{
3205 void *optdata = NULL;
3206 int optdatalen = 0;
6d2010ae 3207 struct ip6_ext *ip6e;
6d2010ae
A
3208 struct in6_pktinfo null_pktinfo;
3209 int deftclass = 0, on;
3210 int defminmtu = IP6PO_MINMTU_MCASTONLY;
3211 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
b0d623f7 3212
316670eb 3213
b0d623f7 3214 switch (optname) {
6d2010ae 3215 case IPV6_PKTINFO:
0a7de745 3216 if (pktopt && pktopt->ip6po_pktinfo) {
6d2010ae 3217 optdata = (void *)pktopt->ip6po_pktinfo;
0a7de745 3218 } else {
6d2010ae 3219 /* XXX: we don't have to do this every time... */
0a7de745 3220 bzero(&null_pktinfo, sizeof(null_pktinfo));
6d2010ae
A
3221 optdata = (void *)&null_pktinfo;
3222 }
0a7de745 3223 optdatalen = sizeof(struct in6_pktinfo);
6d2010ae 3224 break;
39236c6e 3225
b0d623f7 3226 case IPV6_TCLASS:
0a7de745 3227 if (pktopt && pktopt->ip6po_tclass >= 0) {
6d2010ae 3228 optdata = (void *)&pktopt->ip6po_tclass;
0a7de745 3229 } else {
6d2010ae 3230 optdata = (void *)&deftclass;
0a7de745
A
3231 }
3232 optdatalen = sizeof(int);
6d2010ae 3233 break;
39236c6e 3234
6d2010ae
A
3235 case IPV6_HOPOPTS:
3236 if (pktopt && pktopt->ip6po_hbh) {
3237 optdata = (void *)pktopt->ip6po_hbh;
3238 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
3239 optdatalen = (ip6e->ip6e_len + 1) << 3;
3240 }
3241 break;
39236c6e 3242
6d2010ae
A
3243 case IPV6_RTHDR:
3244 if (pktopt && pktopt->ip6po_rthdr) {
3245 optdata = (void *)pktopt->ip6po_rthdr;
3246 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
3247 optdatalen = (ip6e->ip6e_len + 1) << 3;
3248 }
3249 break;
39236c6e 3250
6d2010ae
A
3251 case IPV6_RTHDRDSTOPTS:
3252 if (pktopt && pktopt->ip6po_dest1) {
3253 optdata = (void *)pktopt->ip6po_dest1;
3254 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
3255 optdatalen = (ip6e->ip6e_len + 1) << 3;
3256 }
3257 break;
39236c6e 3258
6d2010ae
A
3259 case IPV6_DSTOPTS:
3260 if (pktopt && pktopt->ip6po_dest2) {
3261 optdata = (void *)pktopt->ip6po_dest2;
3262 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
3263 optdatalen = (ip6e->ip6e_len + 1) << 3;
3264 }
3265 break;
39236c6e 3266
6d2010ae
A
3267 case IPV6_NEXTHOP:
3268 if (pktopt && pktopt->ip6po_nexthop) {
3269 optdata = (void *)pktopt->ip6po_nexthop;
3270 optdatalen = pktopt->ip6po_nexthop->sa_len;
3271 }
3272 break;
39236c6e 3273
6d2010ae 3274 case IPV6_USE_MIN_MTU:
0a7de745 3275 if (pktopt) {
6d2010ae 3276 optdata = (void *)&pktopt->ip6po_minmtu;
0a7de745 3277 } else {
6d2010ae 3278 optdata = (void *)&defminmtu;
0a7de745
A
3279 }
3280 optdatalen = sizeof(int);
6d2010ae 3281 break;
39236c6e 3282
6d2010ae 3283 case IPV6_DONTFRAG:
0a7de745 3284 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) {
6d2010ae 3285 on = 1;
0a7de745 3286 } else {
6d2010ae 3287 on = 0;
0a7de745 3288 }
6d2010ae 3289 optdata = (void *)&on;
0a7de745 3290 optdatalen = sizeof(on);
6d2010ae 3291 break;
39236c6e 3292
6d2010ae 3293 case IPV6_PREFER_TEMPADDR:
0a7de745 3294 if (pktopt) {
6d2010ae 3295 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
0a7de745 3296 } else {
6d2010ae 3297 optdata = (void *)&defpreftemp;
0a7de745
A
3298 }
3299 optdatalen = sizeof(int);
b0d623f7 3300 break;
39236c6e 3301
0a7de745 3302 default: /* should not happen */
b0d623f7
A
3303#ifdef DIAGNOSTIC
3304 panic("ip6_getpcbopt: unexpected option\n");
3305#endif
0a7de745 3306 return ENOPROTOOPT;
b0d623f7
A
3307 }
3308
0a7de745 3309 return sooptcopyout(sopt, optdata, optdatalen);
1c79356b
A
3310}
3311
3312void
316670eb 3313ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
1c79356b 3314{
0a7de745 3315 if (pktopt == NULL) {
1c79356b 3316 return;
0a7de745 3317 }
1c79356b 3318
6d2010ae 3319 if (optname == -1 || optname == IPV6_PKTINFO) {
0a7de745 3320 if (pktopt->ip6po_pktinfo) {
9bccf70c 3321 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
0a7de745 3322 }
1c79356b
A
3323 pktopt->ip6po_pktinfo = NULL;
3324 }
0a7de745 3325 if (optname == -1 || optname == IPV6_HOPLIMIT) {
1c79356b 3326 pktopt->ip6po_hlim = -1;
0a7de745
A
3327 }
3328 if (optname == -1 || optname == IPV6_TCLASS) {
b0d623f7 3329 pktopt->ip6po_tclass = -1;
0a7de745 3330 }
6d2010ae 3331 if (optname == -1 || optname == IPV6_NEXTHOP) {
39236c6e 3332 ROUTE_RELEASE(&pktopt->ip6po_nextroute);
0a7de745 3333 if (pktopt->ip6po_nexthop) {
9bccf70c 3334 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
0a7de745 3335 }
1c79356b
A
3336 pktopt->ip6po_nexthop = NULL;
3337 }
6d2010ae 3338 if (optname == -1 || optname == IPV6_HOPOPTS) {
0a7de745 3339 if (pktopt->ip6po_hbh) {
9bccf70c 3340 FREE(pktopt->ip6po_hbh, M_IP6OPT);
0a7de745 3341 }
1c79356b
A
3342 pktopt->ip6po_hbh = NULL;
3343 }
6d2010ae 3344 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
0a7de745 3345 if (pktopt->ip6po_dest1) {
9bccf70c 3346 FREE(pktopt->ip6po_dest1, M_IP6OPT);
0a7de745 3347 }
1c79356b
A
3348 pktopt->ip6po_dest1 = NULL;
3349 }
6d2010ae 3350 if (optname == -1 || optname == IPV6_RTHDR) {
0a7de745 3351 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) {
9bccf70c 3352 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
0a7de745 3353 }
1c79356b 3354 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
39236c6e 3355 ROUTE_RELEASE(&pktopt->ip6po_route);
1c79356b 3356 }
6d2010ae 3357 if (optname == -1 || optname == IPV6_DSTOPTS) {
0a7de745 3358 if (pktopt->ip6po_dest2) {
9bccf70c 3359 FREE(pktopt->ip6po_dest2, M_IP6OPT);
0a7de745 3360 }
1c79356b
A
3361 pktopt->ip6po_dest2 = NULL;
3362 }
3363}
3364
0a7de745
A
3365#define PKTOPT_EXTHDRCPY(type) do { \
3366 if (src->type) { \
3367 int hlen = \
3368 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \
3369 dst->type = _MALLOC(hlen, M_IP6OPT, canwait); \
3370 if (dst->type == NULL && canwait == M_NOWAIT) \
3371 goto bad; \
3372 bcopy(src->type, dst->type, hlen); \
3373 } \
9bccf70c 3374} while (0)
1c79356b 3375
6d2010ae
A
3376static int
3377copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
1c79356b 3378{
0a7de745 3379 if (dst == NULL || src == NULL) {
316670eb 3380 printf("copypktopts: invalid argument\n");
0a7de745 3381 return EINVAL;
1c79356b
A
3382 }
3383
1c79356b 3384 dst->ip6po_hlim = src->ip6po_hlim;
b0d623f7 3385 dst->ip6po_tclass = src->ip6po_tclass;
6d2010ae 3386 dst->ip6po_flags = src->ip6po_flags;
1c79356b 3387 if (src->ip6po_pktinfo) {
0a7de745 3388 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
39236c6e 3389 M_IP6OPT, canwait);
0a7de745 3390 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) {
1c79356b 3391 goto bad;
0a7de745 3392 }
1c79356b
A
3393 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3394 }
3395 if (src->ip6po_nexthop) {
3396 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
39236c6e 3397 M_IP6OPT, canwait);
0a7de745 3398 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) {
1c79356b 3399 goto bad;
0a7de745 3400 }
1c79356b 3401 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
39236c6e 3402 src->ip6po_nexthop->sa_len);
1c79356b
A
3403 }
3404 PKTOPT_EXTHDRCPY(ip6po_hbh);
3405 PKTOPT_EXTHDRCPY(ip6po_dest1);
3406 PKTOPT_EXTHDRCPY(ip6po_dest2);
3407 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
0a7de745 3408 return 0;
1c79356b 3409
39236c6e 3410bad:
6d2010ae 3411 ip6_clearpktopts(dst, -1);
0a7de745 3412 return ENOBUFS;
1c79356b
A
3413}
3414#undef PKTOPT_EXTHDRCPY
3415
6d2010ae
A
3416struct ip6_pktopts *
3417ip6_copypktopts(struct ip6_pktopts *src, int canwait)
3418{
3419 int error;
3420 struct ip6_pktopts *dst;
3421
0a7de745
A
3422 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
3423 if (dst == NULL) {
3424 return NULL;
3425 }
6d2010ae
A
3426 ip6_initpktopts(dst);
3427
3428 if ((error = copypktopts(dst, src, canwait)) != 0) {
3429 FREE(dst, M_IP6OPT);
0a7de745 3430 return NULL;
6d2010ae
A
3431 }
3432
0a7de745 3433 return dst;
6d2010ae
A
3434}
3435
1c79356b 3436void
316670eb 3437ip6_freepcbopts(struct ip6_pktopts *pktopt)
1c79356b 3438{
0a7de745 3439 if (pktopt == NULL) {
1c79356b 3440 return;
0a7de745 3441 }
1c79356b 3442
6d2010ae 3443 ip6_clearpktopts(pktopt, -1);
1c79356b 3444
9bccf70c 3445 FREE(pktopt, M_IP6OPT);
1c79356b
A
3446}
3447
6d2010ae
A
3448void
3449ip6_moptions_init(void)
1c79356b 3450{
0a7de745 3451 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof(im6o_debug));
1c79356b 3452
0a7de745
A
3453 im6o_size = (im6o_debug == 0) ? sizeof(struct ip6_moptions) :
3454 sizeof(struct ip6_moptions_dbg);
6d2010ae
A
3455
3456 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
3457 IM6O_ZONE_NAME);
3458 if (im6o_zone == NULL) {
3459 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
3460 /* NOTREACHED */
1c79356b 3461 }
6d2010ae
A
3462 zone_change(im6o_zone, Z_EXPAND, TRUE);
3463}
1c79356b 3464
6d2010ae
A
3465void
3466im6o_addref(struct ip6_moptions *im6o, int locked)
3467{
0a7de745 3468 if (!locked) {
6d2010ae 3469 IM6O_LOCK(im6o);
0a7de745 3470 } else {
6d2010ae 3471 IM6O_LOCK_ASSERT_HELD(im6o);
0a7de745 3472 }
1c79356b 3473
6d2010ae
A
3474 if (++im6o->im6o_refcnt == 0) {
3475 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
3476 /* NOTREACHED */
3477 } else if (im6o->im6o_trace != NULL) {
3478 (*im6o->im6o_trace)(im6o, TRUE);
3479 }
3480
0a7de745 3481 if (!locked) {
6d2010ae 3482 IM6O_UNLOCK(im6o);
0a7de745 3483 }
1c79356b
A
3484}
3485
6d2010ae
A
3486void
3487im6o_remref(struct ip6_moptions *im6o)
1c79356b 3488{
6d2010ae 3489 int i;
1c79356b 3490
6d2010ae
A
3491 IM6O_LOCK(im6o);
3492 if (im6o->im6o_refcnt == 0) {
3493 panic("%s: im6o %p negative refcnt", __func__, im6o);
3494 /* NOTREACHED */
3495 } else if (im6o->im6o_trace != NULL) {
3496 (*im6o->im6o_trace)(im6o, FALSE);
3497 }
1c79356b 3498
6d2010ae
A
3499 --im6o->im6o_refcnt;
3500 if (im6o->im6o_refcnt > 0) {
3501 IM6O_UNLOCK(im6o);
3502 return;
3503 }
1c79356b 3504
6d2010ae
A
3505 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3506 struct in6_mfilter *imf;
1c79356b 3507
6d2010ae 3508 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
0a7de745 3509 if (imf != NULL) {
6d2010ae 3510 im6f_leave(imf);
0a7de745 3511 }
1c79356b 3512
6d2010ae 3513 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
1c79356b 3514
0a7de745 3515 if (imf != NULL) {
6d2010ae 3516 im6f_purge(imf);
0a7de745 3517 }
6d2010ae
A
3518
3519 IN6M_REMREF(im6o->im6o_membership[i]);
3520 im6o->im6o_membership[i] = NULL;
3521 }
3522 im6o->im6o_num_memberships = 0;
3523 if (im6o->im6o_mfilters != NULL) {
3524 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
3525 im6o->im6o_mfilters = NULL;
3526 }
3527 if (im6o->im6o_membership != NULL) {
3528 FREE(im6o->im6o_membership, M_IP6MOPTS);
3529 im6o->im6o_membership = NULL;
3530 }
3531 IM6O_UNLOCK(im6o);
3532
3533 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
3534
3535 if (!(im6o->im6o_debug & IFD_ALLOC)) {
3536 panic("%s: im6o %p cannot be freed", __func__, im6o);
3537 /* NOTREACHED */
1c79356b 3538 }
6d2010ae 3539 zfree(im6o_zone, im6o);
1c79356b
A
3540}
3541
6d2010ae
A
3542static void
3543im6o_trace(struct ip6_moptions *im6o, int refhold)
1c79356b 3544{
6d2010ae
A
3545 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3546 ctrace_t *tr;
3547 u_int32_t idx;
3548 u_int16_t *cnt;
1c79356b 3549
6d2010ae
A
3550 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3551 panic("%s: im6o %p has no debug structure", __func__, im6o);
3552 /* NOTREACHED */
3553 }
3554 if (refhold) {
3555 cnt = &im6o_dbg->im6o_refhold_cnt;
3556 tr = im6o_dbg->im6o_refhold;
3557 } else {
3558 cnt = &im6o_dbg->im6o_refrele_cnt;
3559 tr = im6o_dbg->im6o_refrele;
3560 }
3561
3562 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3563 ctrace_record(&tr[idx]);
3564}
3565
3566struct ip6_moptions *
3567ip6_allocmoptions(int how)
3568{
3569 struct ip6_moptions *im6o;
3570
3571 im6o = (how == M_WAITOK) ?
3572 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3573 if (im6o != NULL) {
3574 bzero(im6o, im6o_size);
3575 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3576 im6o->im6o_debug |= IFD_ALLOC;
3577 if (im6o_debug != 0) {
3578 im6o->im6o_debug |= IFD_DEBUG;
3579 im6o->im6o_trace = im6o_trace;
3580 }
3581 IM6O_ADDREF(im6o);
3582 }
3583
0a7de745 3584 return im6o;
1c79356b
A
3585}
3586
3587/*
3588 * Set IPv6 outgoing packet options based on advanced API.
3589 */
3590int
6d2010ae
A
3591ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3592 struct ip6_pktopts *stickyopt, int uproto)
1c79356b 3593{
39236c6e 3594 struct cmsghdr *cm = NULL;
1c79356b 3595
0a7de745
A
3596 if (control == NULL || opt == NULL) {
3597 return EINVAL;
3598 }
1c79356b 3599
b0d623f7 3600 ip6_initpktopts(opt);
6d2010ae
A
3601 if (stickyopt) {
3602 int error;
3603
3604 /*
3605 * If stickyopt is provided, make a local copy of the options
3606 * for this particular packet, then override them by ancillary
3607 * objects.
3608 * XXX: copypktopts() does not copy the cached route to a next
3609 * hop (if any). This is not very good in terms of efficiency,
3610 * but we can allow this since this option should be rarely
3611 * used.
3612 */
0a7de745
A
3613 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) {
3614 return error;
3615 }
6d2010ae 3616 }
1c79356b
A
3617
3618 /*
3619 * XXX: Currently, we assume all the optional information is stored
3620 * in a single mbuf.
3621 */
0a7de745
A
3622 if (control->m_next) {
3623 return EINVAL;
3624 }
1c79356b 3625
0a7de745
A
3626 if (control->m_len < CMSG_LEN(0)) {
3627 return EINVAL;
3628 }
6d2010ae 3629
0a7de745
A
3630 for (cm = M_FIRST_CMSGHDR(control);
3631 is_cmsg_valid(control, cm);
39236c6e 3632 cm = M_NXT_CMSGHDR(control, cm)) {
6d2010ae
A
3633 int error;
3634
0a7de745 3635 if (cm->cmsg_level != IPPROTO_IPV6) {
1c79356b 3636 continue;
0a7de745 3637 }
1c79356b 3638
6d2010ae
A
3639 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3640 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
0a7de745
A
3641 if (error) {
3642 return error;
3643 }
6d2010ae
A
3644 }
3645
0a7de745 3646 return 0;
6d2010ae
A
3647}
3648/*
3649 * Set a particular packet option, as a sticky option or an ancillary data
3650 * item. "len" can be 0 only when it's a sticky option.
3651 * We have 4 cases of combination of "sticky" and "cmsg":
3652 * "sticky=0, cmsg=0": impossible
3653 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3654 * "sticky=1, cmsg=0": RFC3542 socket option
3655 * "sticky=1, cmsg=1": RFC2292 socket option
3656 */
3657static int
3658ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3659 int sticky, int cmsg, int uproto)
3660{
3661 int minmtupolicy, preftemp;
3662 int error;
5ba3f43e 3663 boolean_t capture_exthdrstat_out = FALSE;
6d2010ae
A
3664
3665 if (!sticky && !cmsg) {
3666#ifdef DIAGNOSTIC
3667 printf("ip6_setpktopt: impossible case\n");
3668#endif
0a7de745 3669 return EINVAL;
6d2010ae
A
3670 }
3671
316670eb
A
3672 /*
3673 * Caller must have ensured that the buffer is at least
3674 * aligned on 32-bit boundary.
3675 */
0a7de745 3676 VERIFY(IS_P2ALIGNED(buf, sizeof(u_int32_t)));
316670eb 3677
6d2010ae
A
3678 /*
3679 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3680 * not be specified in the context of RFC3542. Conversely,
3681 * RFC3542 types should not be specified in the context of RFC2292.
3682 */
3683 if (!cmsg) {
3684 switch (optname) {
3685 case IPV6_2292PKTINFO:
3686 case IPV6_2292HOPLIMIT:
3687 case IPV6_2292NEXTHOP:
3688 case IPV6_2292HOPOPTS:
3689 case IPV6_2292DSTOPTS:
3690 case IPV6_2292RTHDR:
3691 case IPV6_2292PKTOPTIONS:
0a7de745 3692 return ENOPROTOOPT;
6d2010ae
A
3693 }
3694 }
3695 if (sticky && cmsg) {
3696 switch (optname) {
3697 case IPV6_PKTINFO:
3698 case IPV6_HOPLIMIT:
3699 case IPV6_NEXTHOP:
3700 case IPV6_HOPOPTS:
3701 case IPV6_DSTOPTS:
3702 case IPV6_RTHDRDSTOPTS:
3703 case IPV6_RTHDR:
3704 case IPV6_USE_MIN_MTU:
3705 case IPV6_DONTFRAG:
3706 case IPV6_TCLASS:
3707 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
0a7de745 3708 return ENOPROTOOPT;
6d2010ae
A
3709 }
3710 }
3711
3712 switch (optname) {
3713 case IPV6_2292PKTINFO:
39236c6e 3714 case IPV6_PKTINFO: {
6d2010ae
A
3715 struct ifnet *ifp = NULL;
3716 struct in6_pktinfo *pktinfo;
3717
0a7de745
A
3718 if (len != sizeof(struct in6_pktinfo)) {
3719 return EINVAL;
3720 }
6d2010ae 3721
316670eb 3722 pktinfo = (struct in6_pktinfo *)(void *)buf;
6d2010ae 3723
9bccf70c 3724 /*
6d2010ae
A
3725 * An application can clear any sticky IPV6_PKTINFO option by
3726 * doing a "regular" setsockopt with ipi6_addr being
3727 * in6addr_any and ipi6_ifindex being zero.
3728 * [RFC 3542, Section 6]
9bccf70c 3729 */
6d2010ae
A
3730 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3731 pktinfo->ipi6_ifindex == 0 &&
3732 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3733 ip6_clearpktopts(opt, optname);
3734 break;
3735 }
1c79356b 3736
6d2010ae
A
3737 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3738 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
0a7de745 3739 return EINVAL;
6d2010ae
A
3740 }
3741
3742 /* validate the interface index if specified. */
3743 ifnet_head_lock_shared();
3744
3745 if (pktinfo->ipi6_ifindex > if_index) {
3746 ifnet_head_done();
0a7de745 3747 return ENXIO;
6d2010ae 3748 }
39236c6e 3749
6d2010ae
A
3750 if (pktinfo->ipi6_ifindex) {
3751 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3752 if (ifp == NULL) {
3753 ifnet_head_done();
0a7de745 3754 return ENXIO;
1c79356b 3755 }
6d2010ae 3756 }
39236c6e 3757
6d2010ae 3758 ifnet_head_done();
1c79356b 3759
6d2010ae
A
3760 /*
3761 * We store the address anyway, and let in6_selectsrc()
3762 * validate the specified address. This is because ipi6_addr
3763 * may not have enough information about its scope zone, and
3764 * we may need additional information (such as outgoing
3765 * interface or the scope zone of a destination address) to
3766 * disambiguate the scope.
3767 * XXX: the delay of the validation may confuse the
3768 * application when it is used as a sticky option.
3769 */
3770 if (opt->ip6po_pktinfo == NULL) {
0a7de745 3771 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
6d2010ae 3772 M_IP6OPT, M_NOWAIT);
0a7de745
A
3773 if (opt->ip6po_pktinfo == NULL) {
3774 return ENOBUFS;
3775 }
6d2010ae 3776 }
0a7de745 3777 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
6d2010ae
A
3778 break;
3779 }
1c79356b 3780
6d2010ae 3781 case IPV6_2292HOPLIMIT:
39236c6e 3782 case IPV6_HOPLIMIT: {
6d2010ae 3783 int *hlimp;
1c79356b 3784
6d2010ae
A
3785 /*
3786 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3787 * to simplify the ordering among hoplimit options.
3788 */
0a7de745
A
3789 if (optname == IPV6_HOPLIMIT && sticky) {
3790 return ENOPROTOOPT;
3791 }
b0d623f7 3792
0a7de745
A
3793 if (len != sizeof(int)) {
3794 return EINVAL;
3795 }
316670eb 3796 hlimp = (int *)(void *)buf;
cb323159 3797 if (*hlimp < -1 || *hlimp > IPV6_MAXHLIM) {
0a7de745
A
3798 return EINVAL;
3799 }
b0d623f7 3800
6d2010ae
A
3801 opt->ip6po_hlim = *hlimp;
3802 break;
3803 }
3804
39236c6e 3805 case IPV6_TCLASS: {
6d2010ae
A
3806 int tclass;
3807
0a7de745
A
3808 if (len != sizeof(int)) {
3809 return EINVAL;
3810 }
316670eb 3811 tclass = *(int *)(void *)buf;
0a7de745
A
3812 if (tclass < -1 || tclass > 255) {
3813 return EINVAL;
3814 }
6d2010ae
A
3815
3816 opt->ip6po_tclass = tclass;
3817 break;
3818 }
3819
3820 case IPV6_2292NEXTHOP:
3821 case IPV6_NEXTHOP:
3822 error = suser(kauth_cred_get(), 0);
0a7de745
A
3823 if (error) {
3824 return EACCES;
3825 }
6d2010ae 3826
0a7de745 3827 if (len == 0) { /* just remove the option */
6d2010ae 3828 ip6_clearpktopts(opt, IPV6_NEXTHOP);
1c79356b 3829 break;
6d2010ae 3830 }
1c79356b 3831
6d2010ae 3832 /* check if cmsg_len is large enough for sa_len */
0a7de745
A
3833 if (len < sizeof(struct sockaddr) || len < *buf) {
3834 return EINVAL;
3835 }
6d2010ae 3836
39236c6e
A
3837 switch (SA(buf)->sa_family) {
3838 case AF_INET6: {
3839 struct sockaddr_in6 *sa6 = SIN6(buf);
6d2010ae 3840
0a7de745
A
3841 if (sa6->sin6_len != sizeof(struct sockaddr_in6)) {
3842 return EINVAL;
3843 }
6d2010ae
A
3844
3845 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3846 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
0a7de745 3847 return EINVAL;
6d2010ae
A
3848 }
3849 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3850 != 0) {
0a7de745 3851 return error;
6d2010ae 3852 }
1c79356b
A
3853 break;
3854 }
0a7de745 3855 case AF_LINK: /* should eventually be supported */
6d2010ae 3856 default:
0a7de745 3857 return EAFNOSUPPORT;
6d2010ae 3858 }
1c79356b 3859
6d2010ae
A
3860 /* turn off the previous option, then set the new option. */
3861 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3862 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
0a7de745
A
3863 if (opt->ip6po_nexthop == NULL) {
3864 return ENOBUFS;
3865 }
6d2010ae
A
3866 bcopy(buf, opt->ip6po_nexthop, *buf);
3867 break;
3868
3869 case IPV6_2292HOPOPTS:
39236c6e 3870 case IPV6_HOPOPTS: {
6d2010ae
A
3871 struct ip6_hbh *hbh;
3872 int hbhlen;
3873
3874 /*
3875 * XXX: We don't allow a non-privileged user to set ANY HbH
3876 * options, since per-option restriction has too much
3877 * overhead.
3878 */
3879 error = suser(kauth_cred_get(), 0);
0a7de745
A
3880 if (error) {
3881 return EACCES;
3882 }
6d2010ae
A
3883
3884 if (len == 0) {
3885 ip6_clearpktopts(opt, IPV6_HOPOPTS);
0a7de745 3886 break; /* just remove the option */
6d2010ae
A
3887 }
3888
3889 /* message length validation */
0a7de745
A
3890 if (len < sizeof(struct ip6_hbh)) {
3891 return EINVAL;
3892 }
316670eb 3893 hbh = (struct ip6_hbh *)(void *)buf;
6d2010ae 3894 hbhlen = (hbh->ip6h_len + 1) << 3;
0a7de745
A
3895 if (len != hbhlen) {
3896 return EINVAL;
3897 }
6d2010ae
A
3898
3899 /* turn off the previous option, then set the new option. */
3900 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3901 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
0a7de745
A
3902 if (opt->ip6po_hbh == NULL) {
3903 return ENOBUFS;
3904 }
6d2010ae 3905 bcopy(hbh, opt->ip6po_hbh, hbhlen);
5ba3f43e 3906 capture_exthdrstat_out = TRUE;
6d2010ae
A
3907 break;
3908 }
3909
3910 case IPV6_2292DSTOPTS:
3911 case IPV6_DSTOPTS:
39236c6e 3912 case IPV6_RTHDRDSTOPTS: {
6d2010ae
A
3913 struct ip6_dest *dest, **newdest = NULL;
3914 int destlen;
3915
3916 error = suser(kauth_cred_get(), 0);
0a7de745
A
3917 if (error) {
3918 return EACCES;
3919 }
6d2010ae
A
3920
3921 if (len == 0) {
3922 ip6_clearpktopts(opt, optname);
0a7de745 3923 break; /* just remove the option */
6d2010ae
A
3924 }
3925
3926 /* message length validation */
0a7de745
A
3927 if (len < sizeof(struct ip6_dest)) {
3928 return EINVAL;
3929 }
316670eb 3930 dest = (struct ip6_dest *)(void *)buf;
6d2010ae 3931 destlen = (dest->ip6d_len + 1) << 3;
0a7de745
A
3932 if (len != destlen) {
3933 return EINVAL;
3934 }
6d2010ae
A
3935
3936 /*
3937 * Determine the position that the destination options header
3938 * should be inserted; before or after the routing header.
3939 */
3940 switch (optname) {
3941 case IPV6_2292DSTOPTS:
3942 /*
3943 * The old advacned API is ambiguous on this point.
3944 * Our approach is to determine the position based
3945 * according to the existence of a routing header.
3946 * Note, however, that this depends on the order of the
3947 * extension headers in the ancillary data; the 1st
3948 * part of the destination options header must appear
3949 * before the routing header in the ancillary data,
3950 * too.
3951 * RFC3542 solved the ambiguity by introducing
3952 * separate ancillary data or option types.
1c79356b 3953 */
0a7de745 3954 if (opt->ip6po_rthdr == NULL) {
9bccf70c 3955 newdest = &opt->ip6po_dest1;
0a7de745 3956 } else {
9bccf70c 3957 newdest = &opt->ip6po_dest2;
0a7de745 3958 }
6d2010ae
A
3959 break;
3960 case IPV6_RTHDRDSTOPTS:
3961 newdest = &opt->ip6po_dest1;
3962 break;
3963 case IPV6_DSTOPTS:
3964 newdest = &opt->ip6po_dest2;
1c79356b
A
3965 break;
3966 }
3967
6d2010ae
A
3968 /* turn off the previous option, then set the new option. */
3969 ip6_clearpktopts(opt, optname);
3970 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
0a7de745
A
3971 if (*newdest == NULL) {
3972 return ENOBUFS;
3973 }
6d2010ae 3974 bcopy(dest, *newdest, destlen);
5ba3f43e 3975 capture_exthdrstat_out = TRUE;
6d2010ae
A
3976 break;
3977 }
1c79356b 3978
6d2010ae 3979 case IPV6_2292RTHDR:
39236c6e 3980 case IPV6_RTHDR: {
6d2010ae
A
3981 struct ip6_rthdr *rth;
3982 int rthlen;
3983
3984 if (len == 0) {
3985 ip6_clearpktopts(opt, IPV6_RTHDR);
0a7de745 3986 break; /* just remove the option */
1c79356b
A
3987 }
3988
6d2010ae 3989 /* message length validation */
0a7de745
A
3990 if (len < sizeof(struct ip6_rthdr)) {
3991 return EINVAL;
3992 }
316670eb 3993 rth = (struct ip6_rthdr *)(void *)buf;
6d2010ae 3994 rthlen = (rth->ip6r_len + 1) << 3;
0a7de745
A
3995 if (len != rthlen) {
3996 return EINVAL;
3997 }
6d2010ae
A
3998
3999 switch (rth->ip6r_type) {
4000 case IPV6_RTHDR_TYPE_0:
0a7de745
A
4001 if (rth->ip6r_len == 0) { /* must contain one addr */
4002 return EINVAL;
4003 }
4004 if (rth->ip6r_len % 2) { /* length must be even */
4005 return EINVAL;
4006 }
4007 if (rth->ip6r_len / 2 != rth->ip6r_segleft) {
4008 return EINVAL;
4009 }
6d2010ae 4010 break;
1c79356b 4011 default:
0a7de745 4012 return EINVAL; /* not supported */
1c79356b 4013 }
6d2010ae
A
4014
4015 /* turn off the previous option */
4016 ip6_clearpktopts(opt, IPV6_RTHDR);
4017 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
0a7de745
A
4018 if (opt->ip6po_rthdr == NULL) {
4019 return ENOBUFS;
4020 }
6d2010ae 4021 bcopy(rth, opt->ip6po_rthdr, rthlen);
5ba3f43e 4022 capture_exthdrstat_out = TRUE;
6d2010ae 4023 break;
1c79356b
A
4024 }
4025
6d2010ae 4026 case IPV6_USE_MIN_MTU:
0a7de745
A
4027 if (len != sizeof(int)) {
4028 return EINVAL;
4029 }
316670eb 4030 minmtupolicy = *(int *)(void *)buf;
6d2010ae
A
4031 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
4032 minmtupolicy != IP6PO_MINMTU_DISABLE &&
4033 minmtupolicy != IP6PO_MINMTU_ALL) {
0a7de745 4034 return EINVAL;
6d2010ae
A
4035 }
4036 opt->ip6po_minmtu = minmtupolicy;
4037 break;
4038
4039 case IPV6_DONTFRAG:
0a7de745
A
4040 if (len != sizeof(int)) {
4041 return EINVAL;
4042 }
6d2010ae 4043
316670eb 4044 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
6d2010ae
A
4045 /*
4046 * we ignore this option for TCP sockets.
4047 * (RFC3542 leaves this case unspecified.)
4048 */
4049 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
39236c6e 4050 } else {
6d2010ae 4051 opt->ip6po_flags |= IP6PO_DONTFRAG;
39236c6e 4052 }
6d2010ae
A
4053 break;
4054
4055 case IPV6_PREFER_TEMPADDR:
0a7de745
A
4056 if (len != sizeof(int)) {
4057 return EINVAL;
4058 }
316670eb 4059 preftemp = *(int *)(void *)buf;
6d2010ae
A
4060 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
4061 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
4062 preftemp != IP6PO_TEMPADDR_PREFER) {
0a7de745 4063 return EINVAL;
6d2010ae
A
4064 }
4065 opt->ip6po_prefer_tempaddr = preftemp;
4066 break;
4067
4068 default:
0a7de745 4069 return ENOPROTOOPT;
6d2010ae
A
4070 } /* end of switch */
4071
5ba3f43e
A
4072 if (capture_exthdrstat_out) {
4073 if (uproto == IPPROTO_TCP) {
4074 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_stream_exthdr_out);
4075 } else if (uproto == IPPROTO_UDP) {
4076 INC_ATOMIC_INT64_LIM(net_api_stats.nas_sock_inet6_dgram_exthdr_out);
4077 }
4078 }
4079
0a7de745 4080 return 0;
1c79356b
A
4081}
4082
4083/*
4084 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
4085 * packet to the input queue of a specified interface. Note that this
4086 * calls the output routine of the loopback "driver", but with an interface
4087 * pointer that might NOT be &loif -- easier than replicating that code here.
4088 */
4089void
39236c6e
A
4090ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
4091 struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
1c79356b 4092{
9bccf70c
A
4093 struct mbuf *copym;
4094 struct ip6_hdr *ip6;
39236c6e 4095 struct in6_addr src;
1c79356b 4096
0a7de745 4097 if (lo_ifp == NULL) {
9bccf70c 4098 return;
0a7de745 4099 }
9bccf70c
A
4100
4101 /*
39236c6e 4102 * Copy the packet header as it's needed for the checksum.
9bccf70c
A
4103 * Make sure to deep-copy IPv6 header portion in case the data
4104 * is in an mbuf cluster, so that we can safely override the IPv6
4105 * header portion later.
4106 */
39236c6e
A
4107 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
4108 if (copym != NULL && ((copym->m_flags & M_EXT) ||
0a7de745
A
4109 copym->m_len < sizeof(struct ip6_hdr))) {
4110 copym = m_pullup(copym, sizeof(struct ip6_hdr));
4111 }
9bccf70c 4112
0a7de745 4113 if (copym == NULL) {
9bccf70c 4114 return;
0a7de745 4115 }
9bccf70c
A
4116
4117 ip6 = mtod(copym, struct ip6_hdr *);
39236c6e 4118 src = ip6->ip6_src;
9bccf70c
A
4119 /*
4120 * clear embedded scope identifiers if necessary.
4121 * in6_clearscope will touch the addresses only when necessary.
4122 */
4123 in6_clearscope(&ip6->ip6_src);
4124 in6_clearscope(&ip6->ip6_dst);
9bccf70c 4125
0a7de745 4126 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) {
39236c6e 4127 in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
0a7de745 4128 }
9bccf70c 4129
39236c6e
A
4130 /*
4131 * Stuff the 'real' ifp into the pkthdr, to be used in matching
4132 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
4133 * to make the loopback driver compliant with the data link
4134 * requirements.
4135 */
4136 copym->m_pkthdr.rcvif = origifp;
9bccf70c 4137
39236c6e
A
4138 /*
4139 * Also record the source interface (which owns the source address).
4140 * This is basically a stripped down version of ifa_foraddr6().
4141 */
4142 if (srcifp == NULL) {
4143 struct in6_ifaddr *ia;
4144
4145 lck_rw_lock_shared(&in6_ifaddr_rwlock);
4146 for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) {
4147 IFA_LOCK_SPIN(&ia->ia_ifa);
4148 /* compare against src addr with embedded scope */
4149 if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) {
4150 srcifp = ia->ia_ifp;
4151 IFA_UNLOCK(&ia->ia_ifa);
4152 break;
4153 }
4154 IFA_UNLOCK(&ia->ia_ifa);
4155 }
4156 lck_rw_done(&in6_ifaddr_rwlock);
6d2010ae 4157 }
0a7de745 4158 if (srcifp != NULL) {
39236c6e 4159 ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
0a7de745 4160 }
39236c6e 4161 ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
9bccf70c 4162
39236c6e 4163 dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
1c79356b
A
4164}
4165
4166/*
4167 * Chop IPv6 header off from the payload.
4168 */
4169static int
39236c6e 4170ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
1c79356b
A
4171{
4172 struct mbuf *mh;
4173 struct ip6_hdr *ip6;
4174
4175 ip6 = mtod(m, struct ip6_hdr *);
0a7de745
A
4176 if (m->m_len > sizeof(*ip6)) {
4177 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
39236c6e 4178 if (mh == NULL) {
1c79356b 4179 m_freem(m);
0a7de745 4180 return ENOBUFS;
1c79356b
A
4181 }
4182 M_COPY_PKTHDR(mh, m);
0a7de745 4183 MH_ALIGN(mh, sizeof(*ip6));
1c79356b 4184 m->m_flags &= ~M_PKTHDR;
0a7de745
A
4185 m->m_len -= sizeof(*ip6);
4186 m->m_data += sizeof(*ip6);
1c79356b
A
4187 mh->m_next = m;
4188 m = mh;
0a7de745
A
4189 m->m_len = sizeof(*ip6);
4190 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
1c79356b
A
4191 }
4192 exthdrs->ip6e_ip6 = m;
0a7de745 4193 return 0;
39236c6e
A
4194}
4195
4196static void
4197ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
4198 int nxt0, uint32_t tlen, uint32_t optlen)
4199{
4200 uint32_t sw_csum, hwcap = ifp->if_hwassist;
4201 int tso = TSO_IPV6_OK(ifp, m);
4202
4203 if (!hwcksum_tx) {
4204 /* do all in software; checksum offload is disabled */
4205 sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
4206 } else {
4207 /* do in software what the hardware cannot */
4208 sw_csum = m->m_pkthdr.csum_flags &
4209 ~IF_HWASSIST_CSUM_FLAGS(hwcap);
4210 }
4211
4212 if (optlen != 0) {
4213 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4214 m->m_pkthdr.csum_flags);
4215 } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
4216 (hwcap & CSUM_PARTIAL)) {
4217 /*
5ba3f43e
A
4218 * Partial checksum offload, ere), if no extension headers,
4219 * and TCP only (no UDP support, as the hardware may not be
4220 * able to convert +0 to -0 (0xffff) per RFC1122 4.1.3.4.
4221 * unless the interface supports "invert zero" capability.)
39236c6e
A
4222 */
4223 if (hwcksum_tx && !tso &&
5ba3f43e
A
4224 ((m->m_pkthdr.csum_flags & CSUM_TCPIPV6) ||
4225 ((hwcap & CSUM_ZERO_INVERT) &&
4226 (m->m_pkthdr.csum_flags & CSUM_ZERO_INVERT))) &&
39236c6e 4227 tlen <= mtu) {
0a7de745 4228 uint16_t start = sizeof(struct ip6_hdr);
39236c6e
A
4229 uint16_t ulpoff =
4230 m->m_pkthdr.csum_data & 0xffff;
4231 m->m_pkthdr.csum_flags |=
4232 (CSUM_DATA_VALID | CSUM_PARTIAL);
4233 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
4234 m->m_pkthdr.csum_tx_start = start;
4235 sw_csum = 0;
4236 } else {
4237 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4238 m->m_pkthdr.csum_flags);
4239 }
4240 }
4241
4242 if (sw_csum & CSUM_DELAY_IPV6_DATA) {
4243 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
4244 sw_csum &= ~CSUM_DELAY_IPV6_DATA;
4245 }
4246
4247 if (hwcksum_tx) {
4248 /*
4249 * Drop off bits that aren't supported by hardware;
4250 * also make sure to preserve non-checksum related bits.
4251 */
4252 m->m_pkthdr.csum_flags =
4253 ((m->m_pkthdr.csum_flags &
4254 (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
4255 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
4256 } else {
4257 /* drop all bits; checksum offload is disabled */
4258 m->m_pkthdr.csum_flags = 0;
4259 }
1c79356b
A
4260}
4261
4262/*
4263 * Compute IPv6 extension header length.
4264 */
1c79356b 4265int
39236c6e 4266ip6_optlen(struct in6pcb *in6p)
1c79356b
A
4267{
4268 int len;
4269
0a7de745
A
4270 if (!in6p->in6p_outputopts) {
4271 return 0;
4272 }
1c79356b
A
4273
4274 len = 0;
0a7de745
A
4275#define elen(x) \
4276 (((struct ip6_ext *)(x)) ? \
39236c6e 4277 (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
1c79356b
A
4278
4279 len += elen(in6p->in6p_outputopts->ip6po_hbh);
39236c6e 4280 if (in6p->in6p_outputopts->ip6po_rthdr) {
1c79356b
A
4281 /* dest1 is valid with rthdr only */
4282 len += elen(in6p->in6p_outputopts->ip6po_dest1);
39236c6e 4283 }
1c79356b
A
4284 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
4285 len += elen(in6p->in6p_outputopts->ip6po_dest2);
0a7de745 4286 return len;
1c79356b
A
4287#undef elen
4288}
3e170ce0
A
4289
4290static int
4291sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
4292{
4293#pragma unused(arg1, arg2)
4294 int error, i;
4295
4296 i = ip6_output_measure;
4297 error = sysctl_handle_int(oidp, &i, 0, req);
0a7de745 4298 if (error || req->newptr == USER_ADDR_NULL) {
3e170ce0 4299 goto done;
0a7de745 4300 }
3e170ce0
A
4301 /* impose bounds */
4302 if (i < 0 || i > 1) {
4303 error = EINVAL;
4304 goto done;
4305 }
4306 if (ip6_output_measure != i && i == 1) {
4307 net_perf_initialize(&net_perf, ip6_output_measure_bins);
4308 }
4309 ip6_output_measure = i;
4310done:
0a7de745 4311 return error;
3e170ce0
A
4312}
4313
4314static int
4315sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
4316{
4317#pragma unused(arg1, arg2)
4318 int error;
4319 uint64_t i;
4320
4321 i = ip6_output_measure_bins;
4322 error = sysctl_handle_quad(oidp, &i, 0, req);
0a7de745 4323 if (error || req->newptr == USER_ADDR_NULL) {
3e170ce0 4324 goto done;
0a7de745 4325 }
3e170ce0
A
4326 /* validate data */
4327 if (!net_perf_validate_bins(i)) {
4328 error = EINVAL;
4329 goto done;
4330 }
4331 ip6_output_measure_bins = i;
4332done:
0a7de745 4333 return error;
3e170ce0
A
4334}
4335
4336static int
4337sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
4338{
4339#pragma unused(oidp, arg1, arg2)
0a7de745
A
4340 if (req->oldptr == USER_ADDR_NULL) {
4341 req->oldlen = (size_t)sizeof(struct ipstat);
4342 }
3e170ce0 4343
0a7de745 4344 return SYSCTL_OUT(req, &net_perf, MIN(sizeof(net_perf), req->oldlen));
3e170ce0 4345}