]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet6/ip6_output.c
xnu-3248.20.55.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
CommitLineData
b0d623f7 1/*
3e170ce0 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
b0d623f7
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
b0d623f7
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
b0d623f7
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
b0d623f7
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
1c79356b
A
29/*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * Copyright (c) 1982, 1986, 1988, 1990, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
91 */
2d21ac55
A
92/*
93 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
94 * support for mandatory and extensible security protections. This notice
95 * is included in support of clause 2.2 (b) of the Apple Public License,
96 * Version 2.0.
97 */
1c79356b
A
98
99#include <sys/param.h>
100#include <sys/malloc.h>
101#include <sys/mbuf.h>
102#include <sys/errno.h>
103#include <sys/protosw.h>
104#include <sys/socket.h>
105#include <sys/socketvar.h>
106#include <sys/systm.h>
1c79356b 107#include <sys/kernel.h>
1c79356b 108#include <sys/proc.h>
91447636 109#include <sys/kauth.h>
6d2010ae
A
110#include <sys/mcache.h>
111#include <sys/sysctl.h>
112#include <kern/zalloc.h>
39236c6e 113#include <libkern/OSByteOrder.h>
6d2010ae
A
114
115#include <pexpert/pexpert.h>
39236c6e 116#include <mach/sdt.h>
1c79356b
A
117
118#include <net/if.h>
119#include <net/route.h>
2d21ac55 120#include <net/dlil.h>
39236c6e 121#include <net/net_osdep.h>
3e170ce0 122#include <net/net_perf.h>
1c79356b
A
123
124#include <netinet/in.h>
125#include <netinet/in_var.h>
55e303ae 126#include <netinet/ip_var.h>
9bccf70c 127#include <netinet6/in6_var.h>
1c79356b 128#include <netinet/ip6.h>
39236c6e
A
129#include <netinet/kpi_ipfilter_var.h>
130
6d2010ae 131#include <netinet6/ip6protosw.h>
1c79356b
A
132#include <netinet/icmp6.h>
133#include <netinet6/ip6_var.h>
1c79356b 134#include <netinet/in_pcb.h>
1c79356b 135#include <netinet6/nd6.h>
6d2010ae 136#include <netinet6/scope6_var.h>
1c79356b
A
137#if IPSEC
138#include <netinet6/ipsec.h>
9bccf70c 139#include <netinet6/ipsec6.h>
1c79356b 140#include <netkey/key.h>
9bccf70c 141extern int ipsec_bypass;
1c79356b 142#endif /* IPSEC */
2d21ac55 143
fe8ab488
A
144#if NECP
145#include <net/necp.h>
146#endif /* NECP */
147
2d21ac55
A
148#if CONFIG_MACF_NET
149#include <security/mac.h>
39236c6e 150#endif /* CONFIG_MACF_NET */
1c79356b 151
316670eb 152#if DUMMYNET
39236c6e 153#include <netinet6/ip6_fw.h>
316670eb
A
154#include <netinet/ip_fw.h>
155#include <netinet/ip_dummynet.h>
156#endif /* DUMMYNET */
157
b0d623f7
A
158#if PF
159#include <net/pfvar.h>
160#endif /* PF */
161
3e170ce0
A
162static int sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS;
163static int sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS;
164static int sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS;
91447636 165static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
39236c6e 166static void ip6_out_cksum_stats(int, u_int32_t);
91447636 167static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
39236c6e
A
168static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
169 struct ip6_frag **);
170static int ip6_getpmtu(struct route_in6 *, struct route_in6 *,
171 struct ifnet *, struct in6_addr *, u_int32_t *, boolean_t *);
172static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *,
173 struct sockopt *sopt);
174static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, int);
175static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
176static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
177static void im6o_trace(struct ip6_moptions *, int);
178static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, int,
179 int, int);
91447636 180static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
39236c6e
A
181static void ip6_output_checksum(struct ifnet *, uint32_t, struct mbuf *,
182 int, uint32_t, uint32_t);
3e170ce0
A
183extern int udp_ctloutput(struct socket *, struct sockopt *);
184static int ip6_do_fragmentation(struct mbuf **morig,
185 uint32_t optlen, struct ifnet *ifp, uint32_t unfragpartlen,
186 struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp, uint32_t mtu,
187 int nxt0);
188static int ip6_fragment_packet(struct mbuf **m,
189 struct ip6_pktopts *opt, struct ip6_exthdrs *exthdrsp, struct ifnet *ifp,
190 uint32_t mtu, boolean_t alwaysfrag, uint32_t unfragpartlen,
191 struct route_in6 *ro_pmtu, int nxt0, uint32_t optlen);
192
193SYSCTL_DECL(_net_inet6_ip6);
194
195static int ip6_output_measure = 0;
196SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf,
197 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
198 &ip6_output_measure, 0, sysctl_reset_ip6_output_stats, "I", "Do time measurement");
199
200static uint64_t ip6_output_measure_bins = 0;
201SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_bins,
202 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_output_measure_bins, 0,
203 sysctl_ip6_output_measure_bins, "I",
204 "bins for chaining performance data histogram");
205
206static net_perf_t net_perf;
207SYSCTL_PROC(_net_inet6_ip6, OID_AUTO, output_perf_data,
208 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,
209 0, 0, sysctl_ip6_output_getperf, "S,net_perf",
210 "IP6 output performance data (struct net_perf, net/net_perf.h)");
6d2010ae
A
211
212#define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
213
214/* For gdb */
215__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
216
217struct ip6_moptions_dbg {
218 struct ip6_moptions im6o; /* ip6_moptions */
219 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
220 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
221 /*
222 * Alloc and free callers.
223 */
224 ctrace_t im6o_alloc;
225 ctrace_t im6o_free;
226 /*
227 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
228 */
229 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
230 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
231};
232
233#if DEBUG
234static unsigned int im6o_debug = 1; /* debugging (enabled) */
235#else
236static unsigned int im6o_debug; /* debugging (disabled) */
237#endif /* !DEBUG */
238
239static unsigned int im6o_size; /* size of zone element */
240static struct zone *im6o_zone; /* zone for ip6_moptions */
241
242#define IM6O_ZONE_MAX 64 /* maximum elements in zone */
243#define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
1c79356b 244
316670eb 245/*
3e170ce0 246 * ip6_output() calls ip6_output_list() to do the work
316670eb
A
247 */
248int
3e170ce0 249ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
39236c6e
A
250 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
251 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
316670eb 252{
3e170ce0 253 return ip6_output_list(m0, 0, opt, ro, flags, im6o, ifpp, ip6oa);
316670eb 254}
55e303ae 255
1c79356b 256/*
3e170ce0 257 * IP6 output. Each packet in mbuf chain m contains a skeletal IP6
1c79356b
A
258 * header (with pri, len, nxt, hlim, src, dst).
259 * This function may modify ver and hlim only.
260 * The mbuf chain containing the packet will be freed.
261 * The mbuf opt, if present, will not be freed.
9bccf70c 262 *
39236c6e
A
263 * If ro is non-NULL and has valid ro->ro_rt, route lookup would be
264 * skipped and ro->ro_rt would be used. Otherwise the result of route
265 * lookup is stored in ro->ro_rt.
266 *
b0d623f7
A
267 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
268 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
9bccf70c 269 * which is rt_rmx.rmx_mtu.
1c79356b
A
270 */
271int
3e170ce0
A
272ip6_output_list(struct mbuf *m0, int packetchain, struct ip6_pktopts *opt,
273 struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
274 struct ifnet **ifpp, struct ip6_out_args *ip6oa)
1c79356b 275{
39236c6e
A
276 struct ip6_hdr *ip6;
277 u_char *nexthdrp;
278 struct ifnet *ifp = NULL, *origifp = NULL; /* refcnt'd */
3e170ce0 279 struct ifnet **ifpp_save = ifpp;
39236c6e 280 struct mbuf *m, *mprev;
3e170ce0
A
281 struct mbuf *sendchain = NULL, *sendchain_last = NULL;
282 struct mbuf *inputchain = NULL;
283 int nxt0;
39236c6e 284 struct route_in6 *ro_pmtu = NULL;
6d2010ae
A
285 struct rtentry *rt = NULL;
286 struct sockaddr_in6 *dst, src_sa, dst_sa;
1c79356b 287 int error = 0;
39236c6e 288 struct in6_ifaddr *ia = NULL, *src_ia = NULL;
b0d623f7 289 u_int32_t mtu;
39236c6e 290 boolean_t alwaysfrag = FALSE;
1c79356b 291 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
39236c6e
A
292 struct ip6_rthdr *rh;
293 struct in6_addr finaldst;
91447636 294 ipfilter_t inject_filter_ref;
39236c6e 295 struct ipf_pktopts *ippo = NULL;
316670eb 296 struct flowadv *adv = NULL;
3e170ce0
A
297 uint32_t pktcnt = 0;
298 uint32_t packets_processed = 0;
299 struct timeval start_tv;
316670eb
A
300#if DUMMYNET
301 struct m_tag *tag;
39236c6e 302 struct ip6_out_args saved_ip6oa;
316670eb 303 struct sockaddr_in6 dst_buf;
316670eb 304#endif /* DUMMYNET */
1c79356b 305#if IPSEC
9bccf70c 306 struct socket *so = NULL;
1c79356b 307 struct secpolicy *sp = NULL;
ebb1b9f4 308 struct route_in6 *ipsec_saved_route = NULL;
39236c6e 309 boolean_t needipsectun = FALSE;
1c79356b 310#endif /* IPSEC */
fe8ab488
A
311#if NECP
312 necp_kernel_policy_result necp_result = 0;
313 necp_kernel_policy_result_parameter necp_result_parameter;
314 necp_kernel_policy_id necp_matched_policy_id = 0;
315#endif /* NECP */
39236c6e
A
316 struct {
317 struct ipf_pktopts ipf_pktopts;
318 struct ip6_exthdrs exthdrs;
319 struct route_in6 ip6route;
320#if IPSEC
321 struct ipsec_output_state ipsec_state;
322#endif /* IPSEC */
fe8ab488
A
323#if NECP
324 struct route_in6 necp_route;
325#endif /* NECP */
39236c6e
A
326#if DUMMYNET
327 struct route_in6 saved_route;
328 struct route_in6 saved_ro_pmtu;
329 struct ip_fw_args args;
330#endif /* DUMMYNET */
331 } ip6obz;
332#define ipf_pktopts ip6obz.ipf_pktopts
333#define exthdrs ip6obz.exthdrs
334#define ip6route ip6obz.ip6route
335#define ipsec_state ip6obz.ipsec_state
fe8ab488 336#define necp_route ip6obz.necp_route
39236c6e
A
337#define saved_route ip6obz.saved_route
338#define saved_ro_pmtu ip6obz.saved_ro_pmtu
339#define args ip6obz.args
340 union {
341 struct {
342 boolean_t select_srcif : 1;
343 boolean_t hdrsplit : 1;
3e170ce0 344 boolean_t route_selected : 1;
39236c6e
A
345 boolean_t dontfrag : 1;
346#if IPSEC
347 boolean_t needipsec : 1;
348 boolean_t noipsec : 1;
349#endif /* IPSEC */
350 };
351 uint32_t raw;
352 } ip6obf = { .raw = 0 };
1c79356b 353
3e170ce0
A
354 if (ip6_output_measure)
355 net_perf_start_time(&net_perf, &start_tv);
356
39236c6e 357 VERIFY(m0->m_flags & M_PKTHDR);
6d2010ae 358
39236c6e
A
359 /* zero out {saved_route, saved_ro_pmtu, ip6route, exthdrs, args} */
360 bzero(&ip6obz, sizeof (ip6obz));
316670eb 361
316670eb 362#if DUMMYNET
39236c6e
A
363 if (SLIST_EMPTY(&m0->m_pkthdr.tags))
364 goto tags_done;
365
366 /* Grab info from mtags prepended to the chain */
316670eb
A
367 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
368 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
369 struct dn_pkt_tag *dn_tag;
370
3e170ce0
A
371 /*
372 * ip6_output_list() cannot handle chains of packets reinjected
373 * by dummynet. The same restriction applies to
374 * ip_output_list().
375 */
376 VERIFY(0 == packetchain);
377
316670eb
A
378 dn_tag = (struct dn_pkt_tag *)(tag+1);
379 args.fwa_pf_rule = dn_tag->dn_pf_rule;
380
39236c6e 381 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof (dst_buf));
316670eb
A
382 dst = &dst_buf;
383 ifp = dn_tag->dn_ifp;
39236c6e 384 if (ifp != NULL)
316670eb
A
385 ifnet_reference(ifp);
386 flags = dn_tag->dn_flags;
39236c6e
A
387 if (dn_tag->dn_flags & IPV6_OUTARGS) {
388 saved_ip6oa = dn_tag->dn_ip6oa;
389 ip6oa = &saved_ip6oa;
390 }
316670eb
A
391
392 saved_route = dn_tag->dn_ro6;
393 ro = &saved_route;
394 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
395 ro_pmtu = &saved_ro_pmtu;
396 origifp = dn_tag->dn_origifp;
39236c6e 397 if (origifp != NULL)
316670eb
A
398 ifnet_reference(origifp);
399 mtu = dn_tag->dn_mtu;
39236c6e 400 alwaysfrag = (dn_tag->dn_alwaysfrag != 0);
316670eb
A
401 unfragpartlen = dn_tag->dn_unfragpartlen;
402
39236c6e 403 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof (exthdrs));
316670eb
A
404
405 m_tag_delete(m0, tag);
406 }
39236c6e
A
407
408tags_done:
316670eb
A
409#endif /* DUMMYNET */
410
39236c6e 411 m = m0;
39236c6e
A
412
413#if IPSEC
39236c6e
A
414 if (ipsec_bypass == 0) {
415 so = ipsec_getsocket(m);
fe8ab488
A
416 if (so != NULL) {
417 (void) ipsec_setsocket(m, NULL);
418 }
39236c6e
A
419 /* If packet is bound to an interface, check bound policies */
420 if ((flags & IPV6_OUTARGS) &&
fe8ab488
A
421 (ip6oa->ip6oa_flags & IPOAF_BOUND_IF) &&
422 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
39236c6e
A
423 /* ip6obf.noipsec is a bitfield, use temp integer */
424 int noipsec = 0;
425
426 if (ipsec6_getpolicybyinterface(m, IPSEC_DIR_OUTBOUND,
fe8ab488 427 flags, ip6oa, &noipsec, &sp) != 0)
39236c6e
A
428 goto bad;
429
430 ip6obf.noipsec = (noipsec != 0);
431 }
432 }
433#endif /* IPSEC */
fe8ab488 434
39236c6e 435 ippo = &ipf_pktopts;
91447636 436
6d2010ae 437 if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
39236c6e
A
438 /*
439 * In the forwarding case, only the ifscope value is used,
440 * as source interface selection doesn't take place.
441 */
442 if ((ip6obf.select_srcif = (!(flags & (IPV6_FORWARDING |
316670eb 443 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
39236c6e 444 (ip6oa->ip6oa_flags & IP6OAF_SELECT_SRCIF))))
316670eb
A
445 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
446
39236c6e
A
447 if ((ip6oa->ip6oa_flags & IP6OAF_BOUND_IF) &&
448 ip6oa->ip6oa_boundif != IFSCOPE_NONE) {
316670eb 449 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
39236c6e 450 (ip6oa->ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
316670eb
A
451 }
452
39236c6e 453 if (ip6oa->ip6oa_flags & IP6OAF_BOUND_SRCADDR)
316670eb 454 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
6d2010ae 455 } else {
39236c6e
A
456 ip6obf.select_srcif = FALSE;
457 if (flags & IPV6_OUTARGS) {
458 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
459 ip6oa->ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF |
460 IP6OAF_BOUND_IF | IP6OAF_BOUND_SRCADDR);
461 }
6d2010ae
A
462 }
463
39236c6e 464 if (flags & IPV6_OUTARGS) {
fe8ab488
A
465 if (ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR)
466 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
467 if (ip6oa->ip6oa_flags & IP6OAF_NO_EXPENSIVE)
468 ipf_pktopts.ippo_flags |= IPPOF_NO_IFF_EXPENSIVE;
39236c6e
A
469 adv = &ip6oa->ip6oa_flowadv;
470 adv->code = FADV_SUCCESS;
471 ip6oa->ip6oa_retflags = 0;
472 }
473
3e170ce0
A
474 /*
475 * Clear out ifpp to be filled in after determining route. ifpp_save is
476 * used to keep old value to release reference properly and dtrace
477 * ipsec tunnel traffic properly.
478 */
479 if (ifpp != NULL && *ifpp != NULL)
480 *ifpp = NULL;
481
316670eb
A
482#if DUMMYNET
483 if (args.fwa_pf_rule) {
484 ip6 = mtod(m, struct ip6_hdr *);
39236c6e 485 VERIFY(ro != NULL); /* ro == saved_route */
316670eb 486 goto check_with_pf;
6d2010ae 487 }
316670eb 488#endif /* DUMMYNET */
6d2010ae 489
3e170ce0
A
490#if NECP
491 /*
492 * Since all packets are assumed to come from same socket, necp lookup
493 * only needs to happen once per function entry.
494 */
495 necp_matched_policy_id = necp_ip6_output_find_policy_match(m, flags,
496 (flags & IPV6_OUTARGS) ? ip6oa : NULL, &necp_result,
497 &necp_result_parameter);
498#endif /* NECP */
499
500 /*
501 * If a chain was passed in, prepare for ther first iteration. For all
502 * other iterations, this work will be done at evaluateloop: label.
503 */
504 if (packetchain) {
505 /*
506 * Remove m from the chain during processing to avoid
507 * accidental frees on entire list.
508 */
509 inputchain = m->m_nextpkt;
510 m->m_nextpkt = NULL;
511 }
512
513loopit:
514 packets_processed++;
515 m->m_pkthdr.pkt_flags &= ~(PKTF_LOOP|PKTF_IFAINFO);
516 ip6 = mtod(m, struct ip6_hdr *);
517 nxt0 = ip6->ip6_nxt;
518 finaldst = ip6->ip6_dst;
519 ip6obf.hdrsplit = FALSE;
520 ro_pmtu = NULL;
521
522 if (!SLIST_EMPTY(&m->m_pkthdr.tags))
523 inject_filter_ref = ipf_get_inject_filter(m);
524 else
525 inject_filter_ref = NULL;
526
39236c6e
A
527#define MAKE_EXTHDR(hp, mp) do { \
528 if (hp != NULL) { \
1c79356b 529 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
39236c6e
A
530 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
531 ((eh)->ip6e_len + 1) << 3); \
1c79356b
A
532 if (error) \
533 goto freehdrs; \
534 } \
39236c6e 535} while (0)
316670eb 536
39236c6e 537 if (opt != NULL) {
1c79356b
A
538 /* Hop-by-Hop options header */
539 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
9bccf70c 540 /* Destination options header(1st part) */
6d2010ae
A
541 if (opt->ip6po_rthdr) {
542 /*
543 * Destination options header(1st part)
544 * This only makes sense with a routing header.
545 * See Section 9.2 of RFC 3542.
546 * Disabling this part just for MIP6 convenience is
547 * a bad idea. We need to think carefully about a
548 * way to make the advanced API coexist with MIP6
549 * options, which might automatically be inserted in
550 * the kernel.
551 */
552 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
553 }
1c79356b
A
554 /* Routing header */
555 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
556 /* Destination options header(2nd part) */
557 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
558 }
559
39236c6e
A
560#undef MAKE_EXTHDR
561
fe8ab488 562#if NECP
fe8ab488
A
563 if (necp_matched_policy_id) {
564 necp_mark_packet_from_ip(m, necp_matched_policy_id);
3e170ce0 565
fe8ab488 566 switch (necp_result) {
3e170ce0
A
567 case NECP_KERNEL_POLICY_RESULT_PASS:
568 goto skip_ipsec;
569 case NECP_KERNEL_POLICY_RESULT_DROP:
570 case NECP_KERNEL_POLICY_RESULT_SOCKET_DIVERT:
571 /*
572 * Flow divert packets should be blocked at the IP
573 * layer.
574 */
575 error = EHOSTUNREACH;
576 goto freehdrs;
577 case NECP_KERNEL_POLICY_RESULT_IP_TUNNEL: {
578 /*
579 * Verify that the packet is being routed to the tunnel
580 */
581 struct ifnet *policy_ifp =
582 necp_get_ifnet_from_result_parameter(
583 &necp_result_parameter);
584
585 if (policy_ifp == ifp) {
fe8ab488 586 goto skip_ipsec;
3e170ce0
A
587 } else {
588 if (necp_packet_can_rebind_to_ifnet(m,
589 policy_ifp, (struct route *)&necp_route,
590 AF_INET6)) {
591 /*
592 * Set scoped index to the tunnel
593 * interface, since it is compatible
594 * with the packet. This will only work
595 * for callers who pass IPV6_OUTARGS,
596 * but that covers all of the clients
597 * we care about today.
598 */
599 if (flags & IPV6_OUTARGS) {
600 ip6oa->ip6oa_boundif =
601 policy_ifp->if_index;
602 ip6oa->ip6oa_flags |=
603 IP6OAF_BOUND_IF;
604 }
605 if (opt != NULL
606 && opt->ip6po_pktinfo != NULL) {
607 opt->ip6po_pktinfo->
608 ipi6_ifindex =
609 policy_ifp->if_index;
610 }
611 ro = &necp_route;
fe8ab488
A
612 goto skip_ipsec;
613 } else {
3e170ce0
A
614 error = ENETUNREACH;
615 goto freehdrs;
fe8ab488 616 }
fe8ab488 617 }
3e170ce0
A
618 break;
619 }
620 default:
621 break;
fe8ab488
A
622 }
623 }
624#endif /* NECP */
625
1c79356b 626#if IPSEC
39236c6e 627 if (ipsec_bypass != 0 || ip6obf.noipsec)
9bccf70c 628 goto skip_ipsec;
316670eb 629
1c79356b 630 if (sp == NULL) {
39236c6e 631 /* get a security policy for this packet */
fe8ab488 632 if (so != NULL) {
39236c6e 633 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND,
fe8ab488
A
634 so, &error);
635 } else {
636 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND,
637 0, &error);
39236c6e
A
638 }
639 if (sp == NULL) {
640 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
641 goto freehdrs;
642 }
1c79356b
A
643 }
644
645 error = 0;
646
647 /* check policy */
648 switch (sp->policy) {
649 case IPSEC_POLICY_DISCARD:
2d21ac55 650 case IPSEC_POLICY_GENERATE:
1c79356b
A
651 /*
652 * This packet is just discarded.
653 */
2d21ac55 654 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
9bccf70c 655 goto freehdrs;
1c79356b
A
656
657 case IPSEC_POLICY_BYPASS:
658 case IPSEC_POLICY_NONE:
659 /* no need to do IPsec. */
39236c6e 660 ip6obf.needipsec = FALSE;
1c79356b 661 break;
316670eb 662
1c79356b
A
663 case IPSEC_POLICY_IPSEC:
664 if (sp->req == NULL) {
665 /* acquire a policy */
666 error = key_spdacquire(sp);
9bccf70c 667 goto freehdrs;
1c79356b 668 }
39236c6e 669 if (sp->ipsec_if) {
fe8ab488 670 goto skip_ipsec;
39236c6e
A
671 } else {
672 ip6obf.needipsec = TRUE;
673 }
1c79356b
A
674 break;
675
676 case IPSEC_POLICY_ENTRUST:
677 default:
39236c6e
A
678 printf("%s: Invalid policy found: %d\n", __func__, sp->policy);
679 break;
1c79356b 680 }
39236c6e 681skip_ipsec:
1c79356b
A
682#endif /* IPSEC */
683
684 /*
685 * Calculate the total length of the extension header chain.
686 * Keep the length of the unfragmentable part for fragmentation.
687 */
688 optlen = 0;
39236c6e 689 if (exthdrs.ip6e_hbh != NULL)
6d2010ae 690 optlen += exthdrs.ip6e_hbh->m_len;
39236c6e 691 if (exthdrs.ip6e_dest1 != NULL)
6d2010ae 692 optlen += exthdrs.ip6e_dest1->m_len;
39236c6e 693 if (exthdrs.ip6e_rthdr != NULL)
6d2010ae 694 optlen += exthdrs.ip6e_rthdr->m_len;
39236c6e 695 unfragpartlen = optlen + sizeof (struct ip6_hdr);
6d2010ae 696
1c79356b 697 /* NOTE: we don't add AH/ESP length here. do that later. */
39236c6e 698 if (exthdrs.ip6e_dest2 != NULL)
6d2010ae
A
699 optlen += exthdrs.ip6e_dest2->m_len;
700
1c79356b
A
701 /*
702 * If we need IPsec, or there is at least one extension header,
703 * separate IP6 header from the payload.
704 */
39236c6e
A
705 if ((
706#if IPSEC
707 ip6obf.needipsec ||
708#endif /* IPSEC */
709 optlen) && !ip6obf.hdrsplit) {
1c79356b
A
710 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
711 m = NULL;
712 goto freehdrs;
713 }
714 m = exthdrs.ip6e_ip6;
39236c6e 715 ip6obf.hdrsplit = TRUE;
1c79356b
A
716 }
717
718 /* adjust pointer */
719 ip6 = mtod(m, struct ip6_hdr *);
720
721 /* adjust mbuf packet header length */
722 m->m_pkthdr.len += optlen;
39236c6e 723 plen = m->m_pkthdr.len - sizeof (*ip6);
1c79356b
A
724
725 /* If this is a jumbo payload, insert a jumbo payload option. */
726 if (plen > IPV6_MAXPACKET) {
39236c6e 727 if (!ip6obf.hdrsplit) {
1c79356b
A
728 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
729 m = NULL;
730 goto freehdrs;
731 }
732 m = exthdrs.ip6e_ip6;
39236c6e 733 ip6obf.hdrsplit = TRUE;
1c79356b
A
734 }
735 /* adjust pointer */
736 ip6 = mtod(m, struct ip6_hdr *);
737 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
738 goto freehdrs;
739 ip6->ip6_plen = 0;
39236c6e 740 } else {
1c79356b 741 ip6->ip6_plen = htons(plen);
39236c6e 742 }
1c79356b
A
743 /*
744 * Concatenate headers and fill in next header fields.
745 * Here we have, on "m"
746 * IPv6 payload
747 * and we insert headers accordingly. Finally, we should be getting:
748 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
749 *
750 * during the header composing process, "m" points to IPv6 header.
751 * "mprev" points to an extension header prior to esp.
752 */
39236c6e
A
753 nexthdrp = &ip6->ip6_nxt;
754 mprev = m;
1c79356b 755
39236c6e
A
756 /*
757 * we treat dest2 specially. this makes IPsec processing
758 * much easier. the goal here is to make mprev point the
759 * mbuf prior to dest2.
760 *
761 * result: IPv6 dest2 payload
762 * m and mprev will point to IPv6 header.
763 */
764 if (exthdrs.ip6e_dest2 != NULL) {
765 if (!ip6obf.hdrsplit) {
766 panic("assumption failed: hdr not split");
767 /* NOTREACHED */
768 }
769 exthdrs.ip6e_dest2->m_next = m->m_next;
770 m->m_next = exthdrs.ip6e_dest2;
771 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
772 ip6->ip6_nxt = IPPROTO_DSTOPTS;
773 }
774
775#define MAKE_CHAIN(m, mp, p, i) do { \
776 if (m != NULL) { \
777 if (!ip6obf.hdrsplit) { \
778 panic("assumption failed: hdr not split"); \
779 /* NOTREACHED */ \
780 } \
781 *mtod((m), u_char *) = *(p); \
782 *(p) = (i); \
783 p = mtod((m), u_char *); \
784 (m)->m_next = (mp)->m_next; \
785 (mp)->m_next = (m); \
786 (mp) = (m); \
787 } \
788} while (0)
789 /*
790 * result: IPv6 hbh dest1 rthdr dest2 payload
791 * m will point to IPv6 header. mprev will point to the
792 * extension header prior to dest2 (rthdr in the above case).
793 */
794 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
795 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS);
796 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING);
91447636 797
3e170ce0
A
798 /* It is no longer safe to free the pointers in exthdrs. */
799 exthdrs.merged = TRUE;
800
39236c6e
A
801#undef MAKE_CHAIN
802
803#if IPSEC
804 if (ip6obf.needipsec && (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA))
805 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
806#endif /* IPSEC */
807
808 if (!TAILQ_EMPTY(&ipv6_filters)) {
809 struct ipfilter *filter;
810 int seen = (inject_filter_ref == NULL);
811 int fixscope = 0;
812
813 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
814 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
815 IM6O_LOCK(im6o);
816 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
817 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
818 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
819 IM6O_UNLOCK(im6o);
820 }
821
822 /* Hack: embed the scope_id in the destination */
823 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
824 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
825 fixscope = 1;
826 ip6->ip6_dst.s6_addr16[1] =
827 htons(ro->ro_dst.sin6_scope_id);
828 }
829
830 ipf_ref();
831 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
832 /*
833 * Don't process packet twice if we've already seen it.
834 */
835 if (seen == 0) {
836 if ((struct ipfilter *)inject_filter_ref ==
837 filter)
838 seen = 1;
839 } else if (filter->ipf_filter.ipf_output != NULL) {
840 errno_t result;
841
842 result = filter->ipf_filter.ipf_output(
843 filter->ipf_filter.cookie,
844 (mbuf_t *)&m, ippo);
845 if (result == EJUSTRETURN) {
846 ipf_unref();
3e170ce0
A
847 m = NULL;
848 goto evaluateloop;
39236c6e
A
849 }
850 if (result != 0) {
851 ipf_unref();
852 goto bad;
91447636 853 }
91447636 854 }
91447636 855 }
39236c6e
A
856 ipf_unref();
857
858 ip6 = mtod(m, struct ip6_hdr *);
859 /* Hack: cleanup embedded scope_id if we put it there */
860 if (fixscope)
861 ip6->ip6_dst.s6_addr16[1] = 0;
862 }
91447636 863
1c79356b 864#if IPSEC
39236c6e
A
865 if (ip6obf.needipsec) {
866 int segleft_org;
1c79356b
A
867
868 /*
869 * pointers after IPsec headers are not valid any more.
870 * other pointers need a great care too.
871 * (IPsec routines should not mangle mbufs prior to AH/ESP)
872 */
873 exthdrs.ip6e_dest2 = NULL;
874
39236c6e 875 if (exthdrs.ip6e_rthdr != NULL) {
1c79356b
A
876 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
877 segleft_org = rh->ip6r_segleft;
878 rh->ip6r_segleft = 0;
39236c6e
A
879 } else {
880 rh = NULL;
881 segleft_org = 0;
1c79356b
A
882 }
883
ebb1b9f4 884 ipsec_state.m = m;
39236c6e
A
885 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev,
886 sp, flags, &needipsectun);
ebb1b9f4 887 m = ipsec_state.m;
1c79356b
A
888 if (error) {
889 /* mbuf is already reclaimed in ipsec6_output_trans. */
890 m = NULL;
891 switch (error) {
892 case EHOSTUNREACH:
893 case ENETUNREACH:
894 case EMSGSIZE:
895 case ENOBUFS:
896 case ENOMEM:
897 break;
898 default:
39236c6e
A
899 printf("ip6_output (ipsec): error code %d\n",
900 error);
901 /* FALLTHRU */
1c79356b
A
902 case ENOENT:
903 /* don't show these error codes to the user */
904 error = 0;
905 break;
906 }
907 goto bad;
908 }
39236c6e 909 if (exthdrs.ip6e_rthdr != NULL) {
1c79356b
A
910 /* ah6_output doesn't modify mbuf chain */
911 rh->ip6r_segleft = segleft_org;
912 }
1c79356b 913 }
39236c6e 914#endif /* IPSEC */
1c79356b
A
915
916 /*
6d2010ae 917 * If there is a routing header, replace the destination address field
1c79356b
A
918 * with the first hop of the routing header.
919 */
39236c6e 920 if (exthdrs.ip6e_rthdr != NULL) {
1c79356b 921 struct ip6_rthdr0 *rh0;
6d2010ae
A
922 struct in6_addr *addr;
923 struct sockaddr_in6 sa;
1c79356b 924
39236c6e
A
925 rh = (struct ip6_rthdr *)
926 (mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *));
9bccf70c 927 switch (rh->ip6r_type) {
1c79356b 928 case IPV6_RTHDR_TYPE_0:
39236c6e
A
929 rh0 = (struct ip6_rthdr0 *)rh;
930 addr = (struct in6_addr *)(void *)(rh0 + 1);
931
932 /*
933 * construct a sockaddr_in6 form of
934 * the first hop.
935 *
936 * XXX: we may not have enough
937 * information about its scope zone;
938 * there is no standard API to pass
939 * the information from the
940 * application.
941 */
942 bzero(&sa, sizeof (sa));
943 sa.sin6_family = AF_INET6;
944 sa.sin6_len = sizeof (sa);
945 sa.sin6_addr = addr[0];
946 if ((error = sa6_embedscope(&sa,
947 ip6_use_defzone)) != 0) {
948 goto bad;
949 }
950 ip6->ip6_dst = sa.sin6_addr;
951 bcopy(&addr[1], &addr[0], sizeof (struct in6_addr) *
952 (rh0->ip6r0_segleft - 1));
953 addr[rh0->ip6r0_segleft - 1] = finaldst;
954 /* XXX */
955 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
956 break;
1c79356b 957 default: /* is it possible? */
39236c6e
A
958 error = EINVAL;
959 goto bad;
1c79356b
A
960 }
961 }
962
963 /* Source address validation */
964 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
39236c6e 965 !(flags & IPV6_UNSPECSRC)) {
1c79356b
A
966 error = EOPNOTSUPP;
967 ip6stat.ip6s_badscope++;
968 goto bad;
969 }
970 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
971 error = EOPNOTSUPP;
972 ip6stat.ip6s_badscope++;
973 goto bad;
974 }
975
976 ip6stat.ip6s_localout++;
977
978 /*
979 * Route packet.
980 */
39236c6e 981 if (ro == NULL) {
1c79356b 982 ro = &ip6route;
39236c6e 983 bzero((caddr_t)ro, sizeof (*ro));
1c79356b
A
984 }
985 ro_pmtu = ro;
39236c6e 986 if (opt != NULL && opt->ip6po_rthdr)
1c79356b 987 ro = &opt->ip6po_route;
39236c6e 988 dst = SIN6(&ro->ro_dst);
6d2010ae 989
39236c6e 990 if (ro->ro_rt != NULL)
6d2010ae
A
991 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
992 /*
993 * if specified, try to fill in the traffic class field.
994 * do not override if a non-zero value is already set.
995 * we check the diffserv field and the ecn field separately.
996 */
39236c6e 997 if (opt != NULL && opt->ip6po_tclass >= 0) {
6d2010ae
A
998 int mask = 0;
999
1000 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
1001 mask |= 0xfc;
1002 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
1003 mask |= 0x03;
39236c6e
A
1004 if (mask != 0) {
1005 ip6->ip6_flow |=
1006 htonl((opt->ip6po_tclass & mask) << 20);
1007 }
6d2010ae
A
1008 }
1009
1010 /* fill in or override the hop limit field, if necessary. */
39236c6e 1011 if (opt && opt->ip6po_hlim != -1) {
6d2010ae 1012 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
39236c6e 1013 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
6d2010ae
A
1014 if (im6o != NULL) {
1015 IM6O_LOCK(im6o);
1016 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
1017 IM6O_UNLOCK(im6o);
1018 } else {
1019 ip6->ip6_hlim = ip6_defmcasthlim;
1020 }
1021 }
1022
1c79356b 1023 /*
b0d623f7
A
1024 * If there is a cached route, check that it is to the same
1025 * destination and is still up. If not, free it and try again.
1026 * Test rt_flags without holding rt_lock for performance reasons;
1027 * if the route is down it will hopefully be caught by the layer
1028 * below (since it uses this route as a hint) or during the
1029 * next transmit.
1c79356b 1030 */
39236c6e
A
1031 if (ROUTE_UNUSABLE(ro) || dst->sin6_family != AF_INET6 ||
1032 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))
1033 ROUTE_RELEASE(ro);
1034
b0d623f7 1035 if (ro->ro_rt == NULL) {
39236c6e 1036 bzero(dst, sizeof (*dst));
1c79356b 1037 dst->sin6_family = AF_INET6;
39236c6e 1038 dst->sin6_len = sizeof (struct sockaddr_in6);
1c79356b
A
1039 dst->sin6_addr = ip6->ip6_dst;
1040 }
1041#if IPSEC
39236c6e 1042 if (ip6obf.needipsec && needipsectun) {
6d2010ae 1043#if CONFIG_DTRACE
3e170ce0 1044 struct ifnet *trace_ifp = (ifpp_save != NULL) ? (*ifpp_save) : NULL;
39236c6e 1045#endif /* CONFIG_DTRACE */
1c79356b
A
1046 /*
1047 * All the extension headers will become inaccessible
1048 * (since they can be encrypted).
1049 * Don't panic, we need no more updates to extension headers
1050 * on inner IPv6 packet (since they are now encapsulated).
1051 *
1052 * IPv6 [ESP|AH] IPv6 [extension headers] payload
1053 */
39236c6e 1054 bzero(&exthdrs, sizeof (exthdrs));
1c79356b
A
1055 exthdrs.ip6e_ip6 = m;
1056
ebb1b9f4 1057 ipsec_state.m = m;
39236c6e
A
1058 route_copyout(&ipsec_state.ro, (struct route *)ro,
1059 sizeof (ipsec_state.ro));
1060 ipsec_state.dst = SA(dst);
6d2010ae 1061
39236c6e 1062 /* So that we can see packets inside the tunnel */
6d2010ae 1063 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
39236c6e
A
1064 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
1065 struct ip *, NULL, struct ip6_hdr *, ip6);
6d2010ae 1066
ebb1b9f4 1067 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
39236c6e 1068 /* tunneled in IPv4? packet is gone */
3e170ce0
A
1069 if (ipsec_state.tunneled == 4) {
1070 m = NULL;
1071 goto evaluateloop;
1072 }
ebb1b9f4
A
1073 m = ipsec_state.m;
1074 ipsec_saved_route = ro;
1075 ro = (struct route_in6 *)&ipsec_state.ro;
39236c6e 1076 dst = SIN6(ipsec_state.dst);
1c79356b
A
1077 if (error) {
1078 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
1c79356b
A
1079 m = NULL;
1080 switch (error) {
1081 case EHOSTUNREACH:
1082 case ENETUNREACH:
1083 case EMSGSIZE:
1084 case ENOBUFS:
1085 case ENOMEM:
1086 break;
1087 default:
39236c6e
A
1088 printf("ip6_output (ipsec): error code %d\n",
1089 error);
1090 /* FALLTHRU */
1c79356b
A
1091 case ENOENT:
1092 /* don't show these error codes to the user */
1093 error = 0;
1094 break;
1095 }
1096 goto bad;
1097 }
316670eb 1098 /*
39236c6e
A
1099 * The packet has been encapsulated so the ifscope
1100 * is no longer valid since it does not apply to the
1101 * outer address: ignore the ifscope.
6d2010ae 1102 */
39236c6e
A
1103 if (flags & IPV6_OUTARGS) {
1104 ip6oa->ip6oa_boundif = IFSCOPE_NONE;
1105 ip6oa->ip6oa_flags &= ~IP6OAF_BOUND_IF;
1106 }
6d2010ae
A
1107 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
1108 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
1109 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
1110 }
1c79356b
A
1111 exthdrs.ip6e_ip6 = m;
1112 }
55e303ae 1113#endif /* IPSEC */
1c79356b 1114
3e170ce0
A
1115 /*
1116 * ifp should only be filled in for dummy net packets which will jump
1117 * to check_with_pf label.
1118 */
6d2010ae 1119 if (ifp != NULL) {
3e170ce0 1120 VERIFY(ip6obf.route_selected);
6d2010ae 1121 }
1c79356b 1122
6d2010ae
A
1123 /* adjust pointer */
1124 ip6 = mtod(m, struct ip6_hdr *);
1c79356b 1125
39236c6e
A
1126 if (ip6obf.select_srcif) {
1127 bzero(&src_sa, sizeof (src_sa));
6d2010ae 1128 src_sa.sin6_family = AF_INET6;
39236c6e 1129 src_sa.sin6_len = sizeof (src_sa);
6d2010ae
A
1130 src_sa.sin6_addr = ip6->ip6_src;
1131 }
39236c6e 1132 bzero(&dst_sa, sizeof (dst_sa));
6d2010ae 1133 dst_sa.sin6_family = AF_INET6;
39236c6e 1134 dst_sa.sin6_len = sizeof (dst_sa);
6d2010ae
A
1135 dst_sa.sin6_addr = ip6->ip6_dst;
1136
316670eb 1137 /*
3e170ce0
A
1138 * Only call in6_selectroute() on first iteration to avoid taking
1139 * multiple references on ifp and rt.
1140 *
316670eb
A
1141 * in6_selectroute() might return an ifp with its reference held
1142 * even in the error case, so make sure to release its reference.
39236c6e 1143 * ip6oa may be NULL if IPV6_OUTARGS isn't set.
316670eb 1144 */
3e170ce0
A
1145 if (!ip6obf.route_selected) {
1146 error = in6_selectroute( ip6obf.select_srcif ? &src_sa : NULL,
1147 &dst_sa, opt, im6o, &src_ia, ro, &ifp, &rt, 0, ip6oa);
1148
1149 if (error != 0) {
1150 switch (error) {
1151 case EHOSTUNREACH:
1152 ip6stat.ip6s_noroute++;
1153 break;
1154 case EADDRNOTAVAIL:
1155 default:
1156 break; /* XXX statistics? */
1157 }
1158 if (ifp != NULL)
1159 in6_ifstat_inc(ifp, ifs6_out_discard);
1160 /* ifp (if non-NULL) will be released at the end */
1161 goto bad;
1c79356b 1162 }
3e170ce0 1163 ip6obf.route_selected = TRUE;
6d2010ae
A
1164 }
1165 if (rt == NULL) {
b0d623f7 1166 /*
6d2010ae
A
1167 * If in6_selectroute() does not return a route entry,
1168 * dst may not have been updated.
b0d623f7 1169 */
6d2010ae
A
1170 *dst = dst_sa; /* XXX */
1171 }
b0d623f7 1172
3e170ce0
A
1173#if NECP
1174 /* Catch-all to check if the interface is allowed */
1175 if (!necp_packet_is_allowed_over_interface(m, ifp)) {
1176 error = EHOSTUNREACH;
1177 goto bad;
1178 }
1179#endif /* NECP */
1180
6d2010ae
A
1181 /*
1182 * then rt (for unicast) and ifp must be non-NULL valid values.
1183 */
39236c6e 1184 if (!(flags & IPV6_FORWARDING)) {
39236c6e 1185 in6_ifstat_inc_na(ifp, ifs6_out_request);
6d2010ae
A
1186 }
1187 if (rt != NULL) {
1188 RT_LOCK(rt);
3e170ce0
A
1189 if (ia == NULL) {
1190 ia = (struct in6_ifaddr *)(rt->rt_ifa);
1191 if (ia != NULL)
1192 IFA_ADDREF(&ia->ia_ifa);
1193 }
6d2010ae
A
1194 rt->rt_use++;
1195 RT_UNLOCK(rt);
1196 }
1c79356b 1197
6d2010ae
A
1198 /*
1199 * The outgoing interface must be in the zone of source and
39236c6e
A
1200 * destination addresses (except local/loopback). We should
1201 * use ia_ifp to support the case of sending packets to an
1202 * address of our own.
6d2010ae
A
1203 */
1204 if (ia != NULL && ia->ia_ifp) {
316670eb 1205 ifnet_reference(ia->ia_ifp); /* for origifp */
6d2010ae
A
1206 if (origifp != NULL)
1207 ifnet_release(origifp);
1208 origifp = ia->ia_ifp;
1209 } else {
1210 if (ifp != NULL)
316670eb 1211 ifnet_reference(ifp); /* for origifp */
6d2010ae
A
1212 if (origifp != NULL)
1213 ifnet_release(origifp);
1214 origifp = ifp;
1215 }
1c79356b 1216
39236c6e
A
1217 /* skip scope enforcements for local/loopback route */
1218 if (rt == NULL || !(rt->rt_ifp->if_flags & IFF_LOOPBACK)) {
1219 struct in6_addr src0, dst0;
1220 u_int32_t zone;
1c79356b 1221
39236c6e
A
1222 src0 = ip6->ip6_src;
1223 if (in6_setscope(&src0, origifp, &zone))
1224 goto badscope;
1225 bzero(&src_sa, sizeof (src_sa));
1226 src_sa.sin6_family = AF_INET6;
1227 src_sa.sin6_len = sizeof (src_sa);
1228 src_sa.sin6_addr = ip6->ip6_src;
1229 if ((sa6_recoverscope(&src_sa, TRUE) ||
1230 zone != src_sa.sin6_scope_id))
1231 goto badscope;
1232
1233 dst0 = ip6->ip6_dst;
1234 if ((in6_setscope(&dst0, origifp, &zone)))
1235 goto badscope;
1236 /* re-initialize to be sure */
1237 bzero(&dst_sa, sizeof (dst_sa));
1238 dst_sa.sin6_family = AF_INET6;
1239 dst_sa.sin6_len = sizeof (dst_sa);
1240 dst_sa.sin6_addr = ip6->ip6_dst;
1241 if ((sa6_recoverscope(&dst_sa, TRUE) ||
1242 zone != dst_sa.sin6_scope_id))
1243 goto badscope;
1244
1245 /* scope check is done. */
1246 goto routefound;
1247
1248badscope:
1249 ip6stat.ip6s_badscope++;
1250 in6_ifstat_inc(origifp, ifs6_out_discard);
1251 if (error == 0)
1252 error = EHOSTUNREACH; /* XXX */
1253 goto bad;
1254 }
1c79356b 1255
39236c6e
A
1256routefound:
1257 if (rt != NULL && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1258 if (opt != NULL && opt->ip6po_nextroute.ro_rt) {
1c79356b 1259 /*
6d2010ae
A
1260 * The nexthop is explicitly specified by the
1261 * application. We assume the next hop is an IPv6
1262 * address.
1c79356b 1263 */
39236c6e
A
1264 dst = SIN6(opt->ip6po_nexthop);
1265 } else if ((rt->rt_flags & RTF_GATEWAY)) {
1266 dst = SIN6(rt->rt_gateway);
1267 }
1268 /*
1269 * For packets destined to local/loopback, record the
1270 * source the source interface (which owns the source
1271 * address), as well as the output interface. This is
1272 * needed to reconstruct the embedded zone for the
1273 * link-local address case in ip6_input().
1274 */
1275 if (ia != NULL && (ifp->if_flags & IFF_LOOPBACK)) {
1276 uint32_t srcidx;
1277
1278 if (src_ia != NULL)
1279 srcidx = src_ia->ia_ifp->if_index;
1280 else if (ro->ro_srcia != NULL)
1281 srcidx = ro->ro_srcia->ifa_ifp->if_index;
1282 else
1283 srcidx = 0;
1284
1285 ip6_setsrcifaddr_info(m, srcidx, NULL);
1286 ip6_setdstifaddr_info(m, 0, ia);
b0d623f7 1287 }
6d2010ae 1288 }
b0d623f7 1289
6d2010ae
A
1290 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1291 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1292 } else {
1293 struct in6_multi *in6m;
1c79356b 1294
6d2010ae 1295 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
39236c6e 1296 in6_ifstat_inc_na(ifp, ifs6_out_mcast);
1c79356b
A
1297
1298 /*
1299 * Confirm that the outgoing interface supports multicast.
1300 */
39236c6e 1301 if (!(ifp->if_flags & IFF_MULTICAST)) {
1c79356b
A
1302 ip6stat.ip6s_noroute++;
1303 in6_ifstat_inc(ifp, ifs6_out_discard);
1304 error = ENETUNREACH;
1305 goto bad;
1306 }
6d2010ae
A
1307 in6_multihead_lock_shared();
1308 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1309 in6_multihead_lock_done();
1310 if (im6o != NULL)
1311 IM6O_LOCK(im6o);
1c79356b 1312 if (in6m != NULL &&
39236c6e 1313 (im6o == NULL || im6o->im6o_multicast_loop)) {
6d2010ae
A
1314 if (im6o != NULL)
1315 IM6O_UNLOCK(im6o);
1c79356b
A
1316 /*
1317 * If we belong to the destination multicast group
1318 * on the outgoing interface, and the caller did not
1319 * forbid loopback, loop back a copy.
1320 */
39236c6e 1321 ip6_mloopback(NULL, ifp, m, dst, optlen, nxt0);
fe8ab488
A
1322 } else if (im6o != NULL)
1323 IM6O_UNLOCK(im6o);
6d2010ae
A
1324 if (in6m != NULL)
1325 IN6M_REMREF(in6m);
1c79356b
A
1326 /*
1327 * Multicasts with a hoplimit of zero may be looped back,
1328 * above, but must not be transmitted on a network.
1329 * Also, multicasts addressed to the loopback interface
1330 * are not sent -- the above call to ip6_mloopback() will
1331 * loop back a copy if this host actually belongs to the
1332 * destination group on the loopback interface.
1333 */
6d2010ae
A
1334 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1335 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
3e170ce0
A
1336 /* remove m from the packetchain and continue looping */
1337 if (m != NULL)
1338 m_freem(m);
1339 m = NULL;
1340 goto evaluateloop;
1c79356b
A
1341 }
1342 }
1343
1344 /*
1345 * Fill the outgoing inteface to tell the upper layer
1346 * to increment per-interface statistics.
1347 */
3e170ce0 1348 if (ifpp != NULL && *ifpp == NULL) {
6d2010ae 1349 ifnet_reference(ifp); /* for caller */
1c79356b 1350 *ifpp = ifp;
1c79356b 1351 }
b0d623f7 1352
6d2010ae
A
1353 /* Determine path MTU. */
1354 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1355 &alwaysfrag)) != 0)
1356 goto bad;
1c79356b
A
1357
1358 /*
6d2010ae
A
1359 * The caller of this function may specify to use the minimum MTU
1360 * in some cases.
1361 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1362 * setting. The logic is a bit complicated; by default, unicast
1363 * packets will follow path MTU while multicast packets will be sent at
1364 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1365 * including unicast ones will be sent at the minimum MTU. Multicast
1366 * packets will always be sent at the minimum MTU unless
1367 * IP6PO_MINMTU_DISABLE is explicitly specified.
1368 * See RFC 3542 for more details.
1c79356b 1369 */
6d2010ae 1370 if (mtu > IPV6_MMTU) {
39236c6e 1371 if ((flags & IPV6_MINMTU)) {
6d2010ae 1372 mtu = IPV6_MMTU;
39236c6e 1373 } else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) {
6d2010ae 1374 mtu = IPV6_MMTU;
39236c6e
A
1375 } else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1376 (opt == NULL ||
1377 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
6d2010ae 1378 mtu = IPV6_MMTU;
b0d623f7 1379 }
1c79356b 1380 }
6d2010ae 1381
9bccf70c
A
1382 /*
1383 * clear embedded scope identifiers if necessary.
1384 * in6_clearscope will touch the addresses only when necessary.
1385 */
1386 in6_clearscope(&ip6->ip6_src);
1387 in6_clearscope(&ip6->ip6_dst);
1c79356b 1388
7e4a7d39 1389#if IPFW2
1c79356b
A
1390 /*
1391 * Check with the firewall...
1392 */
39236c6e 1393 if (ip6_fw_enable && ip6_fw_chk_ptr) {
1c79356b 1394 u_short port = 0;
55e303ae 1395 m->m_pkthdr.rcvif = NULL; /* XXX */
1c79356b 1396 /* If ipfw says divert, we have to just drop packet */
3e170ce0
A
1397 if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m) || m == NULL) {
1398 if (m != NULL) {
1399 m_freem(m);
1400 m = NULL;
1401 goto evaluateloop;
1402 } else {
1403 error = EACCES;
1404 goto bad;
1405 }
1c79356b
A
1406 }
1407 }
39236c6e 1408#endif /* IPFW2 */
1c79356b
A
1409
1410 /*
1411 * If the outgoing packet contains a hop-by-hop options header,
1412 * it must be examined and processed even by the source node.
1413 * (RFC 2460, section 4.)
1414 */
39236c6e 1415 if (exthdrs.ip6e_hbh != NULL) {
9bccf70c 1416 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
6d2010ae 1417 u_int32_t dummy; /* XXX unused */
39236c6e 1418 uint32_t oplen = 0; /* for ip6_process_hopopts() */
9bccf70c
A
1419#if DIAGNOSTIC
1420 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1421 panic("ip6e_hbh is not continuous");
1422#endif
1c79356b 1423 /*
39236c6e
A
1424 * XXX: If we have to send an ICMPv6 error to the sender,
1425 * we need the M_LOOP flag since icmp6_error() expects
1426 * the IPv6 and the hop-by-hop options header are
1427 * continuous unless the flag is set.
1c79356b
A
1428 */
1429 m->m_flags |= M_LOOP;
1430 m->m_pkthdr.rcvif = ifp;
6d2010ae 1431 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
39236c6e
A
1432 ((hbh->ip6h_len + 1) << 3) - sizeof (struct ip6_hbh),
1433 &dummy, &oplen) < 0) {
3e170ce0
A
1434 /*
1435 * m was already freed at this point. Set to NULL so it
1436 * is not re-freed at end of ip6_output_list.
1437 */
1438 m = NULL;
39236c6e 1439 error = EINVAL; /* better error? */
3e170ce0 1440 goto bad;
1c79356b
A
1441 }
1442 m->m_flags &= ~M_LOOP; /* XXX */
1443 m->m_pkthdr.rcvif = NULL;
1444 }
1445
316670eb
A
1446#if DUMMYNET
1447check_with_pf:
39236c6e 1448#endif /* DUMMYNET */
b0d623f7 1449#if PF
6d2010ae 1450 if (PF_IS_ENABLED) {
316670eb 1451#if DUMMYNET
3e170ce0 1452
316670eb 1453 /*
39236c6e
A
1454 * TODO: Need to save opt->ip6po_flags for reinjection
1455 * rdar://10434993
316670eb
A
1456 */
1457 args.fwa_m = m;
1458 args.fwa_oif = ifp;
1459 args.fwa_oflags = flags;
39236c6e
A
1460 if (flags & IPV6_OUTARGS)
1461 args.fwa_ip6oa = ip6oa;
316670eb
A
1462 args.fwa_ro6 = ro;
1463 args.fwa_dst6 = dst;
1464 args.fwa_ro6_pmtu = ro_pmtu;
1465 args.fwa_origifp = origifp;
1466 args.fwa_mtu = mtu;
1467 args.fwa_alwaysfrag = alwaysfrag;
1468 args.fwa_unfragpartlen = unfragpartlen;
1469 args.fwa_exthdrs = &exthdrs;
6d2010ae 1470 /* Invoke outbound packet filter */
316670eb 1471 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
39236c6e 1472#else /* !DUMMYNET */
316670eb 1473 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
39236c6e 1474#endif /* !DUMMYNET */
b0d623f7 1475
316670eb 1476 if (error != 0 || m == NULL) {
6d2010ae 1477 if (m != NULL) {
39236c6e
A
1478 panic("%s: unexpected packet %p\n",
1479 __func__, m);
6d2010ae
A
1480 /* NOTREACHED */
1481 }
3e170ce0
A
1482 /* m was already freed by callee and is now NULL. */
1483 goto evaluateloop;
b0d623f7 1484 }
6d2010ae 1485 ip6 = mtod(m, struct ip6_hdr *);
b0d623f7 1486 }
b0d623f7
A
1487#endif /* PF */
1488
3e170ce0
A
1489#ifdef IPSEC
1490 /* clean ipsec history before fragmentation */
1491 ipsec_delaux(m);
1492#endif /* IPSEC */
1493
1c79356b 1494 /*
3e170ce0
A
1495 * Determine whether fragmentation is necessary. If so, m is passed
1496 * back as a chain of packets and original mbuf is freed. Otherwise, m
1497 * is unchanged.
1c79356b 1498 */
3e170ce0
A
1499 error = ip6_fragment_packet(&m, opt,
1500 &exthdrs, ifp, mtu, alwaysfrag, unfragpartlen, ro_pmtu, nxt0,
1501 optlen);
6d2010ae 1502
3e170ce0 1503 if (error)
6d2010ae 1504 goto bad;
3e170ce0
A
1505
1506/*
1507 * The evaluateloop label is where we decide whether to continue looping over
1508 * packets or call into nd code to send.
1509 */
1510evaluateloop:
1511
1512 /*
1513 * m may be NULL when we jump to the evaluateloop label from PF or
1514 * other code that can drop packets.
1515 */
1516 if (m != NULL) {
1517 /*
1518 * If we already have a chain to send, tack m onto the end.
1519 * Otherwise make m the start and end of the to-be-sent chain.
1520 */
1521 if (sendchain != NULL) {
1522 sendchain_last->m_nextpkt = m;
1523 } else {
1524 sendchain = m;
1525 }
1526
1527 /* Fragmentation may mean m is a chain. Find the last packet. */
1528 while (m->m_nextpkt)
1529 m = m->m_nextpkt;
1530 sendchain_last = m;
1531 pktcnt++;
1532 }
1533
1534 /* Fill in next m from inputchain as appropriate. */
1535 m = inputchain;
1536 if (m != NULL) {
1537 /* Isolate m from rest of input chain. */
1538 inputchain = m->m_nextpkt;
1539 m->m_nextpkt = NULL;
1540
1541 /*
1542 * Clear exthdrs and ipsec_state so stale contents are not
1543 * reused. Note this also clears the exthdrs.merged flag.
1544 */
1545 bzero(&exthdrs, sizeof(exthdrs));
1546 bzero(&ipsec_state, sizeof(ipsec_state));
1547
1548 /* Continue looping. */
1549 goto loopit;
1550 }
1551
1552 /*
1553 * If we get here, there's no more mbufs in inputchain, so send the
1554 * sendchain if there is one.
1555 */
1556 if (pktcnt > 0) {
1557 error = nd6_output_list(ifp, origifp, sendchain, dst,
1558 ro->ro_rt, adv);
1559 /*
1560 * Fall through to done label even in error case because
1561 * nd6_output_list frees packetchain in both success and
1562 * failure cases.
1563 */
1564 }
1565
1566done:
1567 if (ifpp_save != NULL && *ifpp_save != NULL) {
1568 ifnet_release(*ifpp_save);
1569 *ifpp_save = NULL;
1570 }
1571 ROUTE_RELEASE(&ip6route);
1572#if IPSEC
1573 ROUTE_RELEASE(&ipsec_state.ro);
1574 if (sp != NULL)
1575 key_freesp(sp, KEY_SADB_UNLOCKED);
1576#endif /* IPSEC */
1577#if NECP
1578 ROUTE_RELEASE(&necp_route);
1579#endif /* NECP */
1580#if DUMMYNET
1581 ROUTE_RELEASE(&saved_route);
1582 ROUTE_RELEASE(&saved_ro_pmtu);
1583#endif /* DUMMYNET */
1584
1585 if (ia != NULL)
1586 IFA_REMREF(&ia->ia_ifa);
1587 if (src_ia != NULL)
1588 IFA_REMREF(&src_ia->ia_ifa);
1589 if (ifp != NULL)
1590 ifnet_release(ifp);
1591 if (origifp != NULL)
1592 ifnet_release(origifp);
1593 if (ip6_output_measure) {
1594 net_perf_measure_time(&net_perf, &start_tv, packets_processed);
1595 net_perf_histogram(&net_perf, packets_processed);
1596 }
1597 return (error);
1598
1599freehdrs:
1600 if (exthdrs.ip6e_hbh != NULL) {
1601 if (exthdrs.merged)
1602 panic("Double free of ip6e_hbh");
1603 m_freem(exthdrs.ip6e_hbh);
1604 }
1605 if (exthdrs.ip6e_dest1 != NULL) {
1606 if (exthdrs.merged)
1607 panic("Double free of ip6e_dest1");
1608 m_freem(exthdrs.ip6e_dest1);
1609 }
1610 if (exthdrs.ip6e_rthdr != NULL) {
1611 if (exthdrs.merged)
1612 panic("Double free of ip6e_rthdr");
1613 m_freem(exthdrs.ip6e_rthdr);
1614 }
1615 if (exthdrs.ip6e_dest2 != NULL) {
1616 if (exthdrs.merged)
1617 panic("Double free of ip6e_dest2");
1618 m_freem(exthdrs.ip6e_dest2);
1619 }
1620 /* FALLTHRU */
1621bad:
1622 if (inputchain != NULL)
1623 m_freem_list(inputchain);
1624 if (sendchain != NULL)
1625 m_freem_list(sendchain);
1626 if (m != NULL)
1627 m_freem(m);
1628
1629 goto done;
1630
1631#undef ipf_pktopts
1632#undef exthdrs
1633#undef ip6route
1634#undef ipsec_state
1635#undef saved_route
1636#undef saved_ro_pmtu
1637#undef args
1638}
1639
1640/* ip6_fragment_packet
1641 *
1642 * The fragmentation logic is rather complex:
1643 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1644 * 1-a: send as is if tlen <= path mtu
1645 * 1-b: fragment if tlen > path mtu
1646 *
1647 * 2: if user asks us not to fragment (dontfrag == 1)
1648 * 2-a: send as is if tlen <= interface mtu
1649 * 2-b: error if tlen > interface mtu
1650 *
1651 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1652 * always fragment
1653 *
1654 * 4: if dontfrag == 1 && alwaysfrag == 1
1655 * error, as we cannot handle this conflicting request
1656 */
1657
1658static int
1659ip6_fragment_packet(struct mbuf **mptr, struct ip6_pktopts *opt,
1660 struct ip6_exthdrs *exthdrsp, struct ifnet *ifp, uint32_t mtu,
1661 boolean_t alwaysfrag, uint32_t unfragpartlen, struct route_in6 *ro_pmtu,
1662 int nxt0, uint32_t optlen)
1663{
1664 VERIFY(NULL != mptr);
1665 struct mbuf *m = *mptr;
1666 int error = 0;
1667 size_t tlen = m->m_pkthdr.len;
1668 boolean_t dontfrag = (opt != NULL && (opt->ip6po_flags & IP6PO_DONTFRAG));
1669
1670 if (dontfrag && alwaysfrag) { /* case 4 */
1671 /* conflicting request - can't transmit */
1672 return EMSGSIZE;
6d2010ae
A
1673 }
1674
316670eb 1675 /* Access without acquiring nd_ifinfo lock for performance */
3e170ce0 1676 if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
6d2010ae
A
1677 /*
1678 * Even if the DONTFRAG option is specified, we cannot send the
1679 * packet when the data length is larger than the MTU of the
1680 * outgoing interface.
1681 * Notify the error by sending IPV6_PATHMTU ancillary data as
1682 * well as returning an error code (the latter is not described
1683 * in the API spec.)
1684 */
1685 u_int32_t mtu32;
1686 struct ip6ctlparam ip6cp;
1687
1688 mtu32 = (u_int32_t)mtu;
39236c6e 1689 bzero(&ip6cp, sizeof (ip6cp));
6d2010ae 1690 ip6cp.ip6c_cmdarg = (void *)&mtu32;
39236c6e 1691 pfctlinput2(PRC_MSGSIZE, SA(&ro_pmtu->ro_dst), (void *)&ip6cp);
3e170ce0 1692 return EMSGSIZE;
6d2010ae
A
1693 }
1694
1695 /*
1696 * transmit packet without fragmentation
1697 */
3e170ce0 1698 if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
39236c6e
A
1699 (tlen <= mtu || TSO_IPV6_OK(ifp, m) ||
1700 (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
3e170ce0
A
1701 /*
1702 * mppn not updated in this case because no new chain is formed
1703 * and inserted
1704 */
39236c6e 1705 ip6_output_checksum(ifp, mtu, m, nxt0, tlen, optlen);
3e170ce0
A
1706 } else {
1707 /*
1708 * time to fragment - cases 1-b and 3 are handled inside
1709 * ip6_do_fragmentation().
1710 * mppn is passed down to be updated to point at fragment chain.
1711 */
1712 error = ip6_do_fragmentation(mptr, optlen, ifp,
1713 unfragpartlen, mtod(m, struct ip6_hdr *), exthdrsp, mtu, nxt0);
6d2010ae
A
1714 }
1715
3e170ce0
A
1716 return error;
1717}
1718
1719/*
1720 * ip6_do_fragmentation() is called by ip6_fragment_packet() after determining
1721 * the packet needs to be fragmented. on success, morig is freed and a chain
1722 * of fragments is linked into the packet chain where morig existed. Otherwise,
1723 * an errno is returned.
1724 */
1725static int
1726ip6_do_fragmentation(struct mbuf **mptr, uint32_t optlen, struct ifnet *ifp,
1727 uint32_t unfragpartlen, struct ip6_hdr *ip6, struct ip6_exthdrs *exthdrsp,
1728 uint32_t mtu, int nxt0)
1729{
1730 VERIFY(NULL != mptr);
1731 int error = 0;
1732
1733 struct mbuf *morig = *mptr;
1734 struct mbuf *first_mbufp = NULL;
1735 struct mbuf *last_mbufp = NULL;
1736
1737 size_t tlen = morig->m_pkthdr.len;
1738
6d2010ae
A
1739 /*
1740 * try to fragment the packet. case 1-b and 3
1741 */
3e170ce0 1742 if ((morig->m_pkthdr.csum_flags & CSUM_TSO_IPV6)) {
39236c6e 1743 /* TSO and fragment aren't compatible */
39236c6e 1744 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1745 return EMSGSIZE;
39236c6e 1746 } else if (mtu < IPV6_MMTU) {
6d2010ae 1747 /* path MTU cannot be less than IPV6_MMTU */
1c79356b 1748 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1749 return EMSGSIZE;
6d2010ae
A
1750 } else if (ip6->ip6_plen == 0) {
1751 /* jumbo payload cannot be fragmented */
1c79356b 1752 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1753 return EMSGSIZE;
1c79356b 1754 } else {
3e170ce0
A
1755 size_t hlen, len, off;
1756 struct mbuf **mnext = NULL;
6d2010ae
A
1757 struct ip6_frag *ip6f;
1758 u_int32_t id = htonl(ip6_randomid());
1c79356b
A
1759 u_char nextproto;
1760
1761 /*
1762 * Too large for the destination or interface;
1763 * fragment if possible.
1764 * Must be able to put at least 8 bytes per fragment.
1765 */
1766 hlen = unfragpartlen;
1767 if (mtu > IPV6_MAXPACKET)
1768 mtu = IPV6_MAXPACKET;
9bccf70c 1769
39236c6e 1770 len = (mtu - hlen - sizeof (struct ip6_frag)) & ~7;
1c79356b 1771 if (len < 8) {
1c79356b 1772 in6_ifstat_inc(ifp, ifs6_out_fragfail);
3e170ce0 1773 return EMSGSIZE;
1c79356b
A
1774 }
1775
1c79356b
A
1776 /*
1777 * Change the next header field of the last header in the
1778 * unfragmentable part.
1779 */
3e170ce0
A
1780 if (exthdrsp->ip6e_rthdr != NULL) {
1781 nextproto = *mtod(exthdrsp->ip6e_rthdr, u_char *);
1782 *mtod(exthdrsp->ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1783 } else if (exthdrsp->ip6e_dest1 != NULL) {
1784 nextproto = *mtod(exthdrsp->ip6e_dest1, u_char *);
1785 *mtod(exthdrsp->ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1786 } else if (exthdrsp->ip6e_hbh != NULL) {
1787 nextproto = *mtod(exthdrsp->ip6e_hbh, u_char *);
1788 *mtod(exthdrsp->ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1c79356b
A
1789 } else {
1790 nextproto = ip6->ip6_nxt;
1791 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1792 }
1793
3e170ce0
A
1794 if (morig->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
1795 in6_delayed_cksum_offset(morig, 0, optlen, nxt0);
6d2010ae 1796
1c79356b
A
1797 /*
1798 * Loop through length of segment after first fragment,
55e303ae
A
1799 * make new header and copy data of each part and link onto
1800 * chain.
1c79356b 1801 */
1c79356b 1802 for (off = hlen; off < tlen; off += len) {
3e170ce0
A
1803 struct ip6_hdr *new_mhip6;
1804 struct mbuf *new_m;
1805 struct mbuf *m_frgpart;
39236c6e 1806
3e170ce0
A
1807 MGETHDR(new_m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1808 if (new_m == NULL) {
1c79356b
A
1809 error = ENOBUFS;
1810 ip6stat.ip6s_odropped++;
3e170ce0
A
1811 break;
1812 }
1813 new_m->m_pkthdr.rcvif = NULL;
1814 new_m->m_flags = morig->m_flags & M_COPYFLAGS;
1815
1816 if (first_mbufp != NULL) {
1817 /* Every pass through loop but first */
1818 *mnext = new_m;
1819 last_mbufp = new_m;
1820 } else {
1821 /* This is the first element of the fragment chain */
1822 first_mbufp = new_m;
1823 last_mbufp = new_m;
1c79356b 1824 }
3e170ce0
A
1825 mnext = &new_m->m_nextpkt;
1826
1827 new_m->m_data += max_linkhdr;
1828 new_mhip6 = mtod(new_m, struct ip6_hdr *);
1829 *new_mhip6 = *ip6;
1830 new_m->m_len = sizeof (*new_mhip6);
1831
1832 error = ip6_insertfraghdr(morig, new_m, hlen, &ip6f);
39236c6e 1833 if (error) {
1c79356b 1834 ip6stat.ip6s_odropped++;
3e170ce0 1835 break;
1c79356b 1836 }
3e170ce0 1837
1c79356b
A
1838 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1839 if (off + len >= tlen)
1840 len = tlen - off;
1841 else
1842 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
3e170ce0 1843 new_mhip6->ip6_plen = htons((u_short)(len + hlen +
39236c6e 1844 sizeof (*ip6f) - sizeof (struct ip6_hdr)));
3e170ce0
A
1845
1846 if ((m_frgpart = m_copy(morig, off, len)) == NULL) {
1c79356b
A
1847 error = ENOBUFS;
1848 ip6stat.ip6s_odropped++;
3e170ce0 1849 break;
1c79356b 1850 }
3e170ce0
A
1851 m_cat(new_m, m_frgpart);
1852 new_m->m_pkthdr.len = len + hlen + sizeof (*ip6f);
1853 new_m->m_pkthdr.rcvif = NULL;
316670eb 1854
3e170ce0
A
1855 M_COPY_CLASSIFIER(new_m, morig);
1856 M_COPY_PFTAG(new_m, morig);
316670eb 1857
39236c6e 1858#ifdef notyet
2d21ac55 1859#if CONFIG_MACF_NET
3e170ce0 1860 mac_create_fragment(morig, new_m);
39236c6e
A
1861#endif /* CONFIG_MACF_NET */
1862#endif /* notyet */
1863
1c79356b
A
1864 ip6f->ip6f_reserved = 0;
1865 ip6f->ip6f_ident = id;
1866 ip6f->ip6f_nxt = nextproto;
1867 ip6stat.ip6s_ofragments++;
1868 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1869 }
1870
3e170ce0
A
1871 if (error) {
1872 /* free all the fragments created */
1873 if (first_mbufp != NULL) {
1874 m_freem_list(first_mbufp);
1875 first_mbufp = NULL;
1876 }
1877 last_mbufp = NULL;
39236c6e 1878 } else {
3e170ce0
A
1879 /* successful fragmenting */
1880 m_freem(morig);
1881 *mptr = first_mbufp;
1882 last_mbufp->m_nextpkt = NULL;
1883 ip6stat.ip6s_fragmented++;
1884 in6_ifstat_inc(ifp, ifs6_out_fragok);
39236c6e 1885 }
1c79356b 1886 }
3e170ce0 1887 return error;
1c79356b
A
1888}
1889
1890static int
39236c6e 1891ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
1c79356b
A
1892{
1893 struct mbuf *m;
1894
1895 if (hlen > MCLBYTES)
39236c6e 1896 return (ENOBUFS); /* XXX */
1c79356b
A
1897
1898 MGET(m, M_DONTWAIT, MT_DATA);
39236c6e
A
1899 if (m == NULL)
1900 return (ENOBUFS);
1c79356b
A
1901
1902 if (hlen > MLEN) {
1903 MCLGET(m, M_DONTWAIT);
39236c6e 1904 if (!(m->m_flags & M_EXT)) {
1c79356b 1905 m_free(m);
6d2010ae 1906 return (ENOBUFS);
1c79356b
A
1907 }
1908 }
1909 m->m_len = hlen;
39236c6e 1910 if (hdr != NULL)
1c79356b
A
1911 bcopy(hdr, mtod(m, caddr_t), hlen);
1912
1913 *mp = m;
6d2010ae 1914 return (0);
1c79356b
A
1915}
1916
39236c6e
A
1917static void
1918ip6_out_cksum_stats(int proto, u_int32_t len)
1919{
1920 switch (proto) {
1921 case IPPROTO_TCP:
1922 tcp_out6_cksum_stats(len);
1923 break;
1924 case IPPROTO_UDP:
1925 udp_out6_cksum_stats(len);
1926 break;
1927 default:
1928 /* keep only TCP or UDP stats for now */
1929 break;
1930 }
1931}
1932
6d2010ae 1933/*
39236c6e
A
1934 * Process a delayed payload checksum calculation (outbound path.)
1935 *
1936 * hoff is the number of bytes beyond the mbuf data pointer which
1937 * points to the IPv6 header. optlen is the number of bytes, if any,
1938 * between the end of IPv6 header and the beginning of the ULP payload
1939 * header, which represents the extension headers. If optlen is less
1940 * than zero, this routine will bail when it detects extension headers.
1941 *
1942 * Returns a bitmask representing all the work done in software.
6d2010ae 1943 */
39236c6e
A
1944uint32_t
1945in6_finalize_cksum(struct mbuf *m, uint32_t hoff, int32_t optlen,
1946 int32_t nxt0, uint32_t csum_flags)
6d2010ae 1947{
39236c6e
A
1948 unsigned char buf[sizeof (struct ip6_hdr)] __attribute__((aligned(8)));
1949 struct ip6_hdr *ip6;
1950 uint32_t offset, mlen, hlen, olen, sw_csum;
1951 uint16_t csum, ulpoff, plen;
1952 uint8_t nxt;
6d2010ae 1953
39236c6e
A
1954 _CASSERT(sizeof (csum) == sizeof (uint16_t));
1955 VERIFY(m->m_flags & M_PKTHDR);
1956
1957 sw_csum = (csum_flags & m->m_pkthdr.csum_flags);
1958
1959 if ((sw_csum &= CSUM_DELAY_IPV6_DATA) == 0)
1960 goto done;
1961
1962 mlen = m->m_pkthdr.len; /* total mbuf len */
1963 hlen = sizeof (*ip6); /* IPv6 header len */
1964
1965 /* sanity check (need at least IPv6 header) */
1966 if (mlen < (hoff + hlen)) {
1967 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr "
1968 "(%u+%u)\n", __func__, m, mlen, hoff, hlen);
1969 /* NOTREACHED */
1970 }
1971
1972 /*
1973 * In case the IPv6 header is not contiguous, or not 32-bit
1974 * aligned, copy it to a local buffer.
1975 */
1976 if ((hoff + hlen) > m->m_len ||
1977 !IP6_HDR_ALIGNED_P(mtod(m, caddr_t) + hoff)) {
1978 m_copydata(m, hoff, hlen, (caddr_t)buf);
1979 ip6 = (struct ip6_hdr *)(void *)buf;
1980 } else {
1981 ip6 = (struct ip6_hdr *)(void *)(m->m_data + hoff);
1982 }
1983
1984 nxt = ip6->ip6_nxt;
1985 plen = ntohs(ip6->ip6_plen);
1986 if (plen != (mlen - (hoff + hlen))) {
1987 plen = OSSwapInt16(plen);
1988 if (plen != (mlen - (hoff + hlen))) {
1989 /* Don't complain for jumbograms */
1990 if (plen != 0 || nxt != IPPROTO_HOPOPTS) {
1991 printf("%s: mbuf 0x%llx proto %d IPv6 "
1992 "plen %d (%x) [swapped %d (%x)] doesn't "
1993 "match actual packet length; %d is used "
1994 "instead\n", __func__,
1995 (uint64_t)VM_KERNEL_ADDRPERM(m), nxt,
1996 ip6->ip6_plen, ip6->ip6_plen, plen, plen,
1997 (mlen - (hoff + hlen)));
1998 }
1999 plen = mlen - (hoff + hlen);
2000 }
2001 }
2002
2003 if (optlen < 0) {
2004 /* next header isn't TCP/UDP and we don't know optlen, bail */
2005 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
2006 sw_csum = 0;
2007 goto done;
2008 }
2009 olen = 0;
2010 } else {
2011 /* caller supplied the original transport number; use it */
2012 if (nxt0 >= 0)
2013 nxt = nxt0;
2014 olen = optlen;
6d2010ae
A
2015 }
2016
39236c6e
A
2017 offset = hoff + hlen + olen; /* ULP header */
2018
2019 /* sanity check */
2020 if (mlen < offset) {
2021 panic("%s: mbuf %p pkt len (%u) < hoff+ip6_hdr+ext_hdr "
2022 "(%u+%u+%u)\n", __func__, m, mlen, hoff, hlen, olen);
2023 /* NOTREACHED */
2024 }
2025
2026 /*
2027 * offset is added to the lower 16-bit value of csum_data,
2028 * which is expected to contain the ULP offset; therefore
2029 * CSUM_PARTIAL offset adjustment must be undone.
2030 */
2031 if ((m->m_pkthdr.csum_flags & (CSUM_PARTIAL|CSUM_DATA_VALID)) ==
2032 (CSUM_PARTIAL|CSUM_DATA_VALID)) {
2033 /*
2034 * Get back the original ULP offset (this will
2035 * undo the CSUM_PARTIAL logic in ip6_output.)
2036 */
2037 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_tx_stuff -
2038 m->m_pkthdr.csum_tx_start);
2039 }
2040
2041 ulpoff = (m->m_pkthdr.csum_data & 0xffff); /* ULP csum offset */
2042
2043 if (mlen < (ulpoff + sizeof (csum))) {
2044 panic("%s: mbuf %p pkt len (%u) proto %d invalid ULP "
2045 "cksum offset (%u) cksum flags 0x%x\n", __func__,
2046 m, mlen, nxt, ulpoff, m->m_pkthdr.csum_flags);
2047 /* NOTREACHED */
2048 }
2049
2050 csum = inet6_cksum(m, 0, offset, plen - olen);
2051
2052 /* Update stats */
2053 ip6_out_cksum_stats(nxt, plen - olen);
2054
2055 /* RFC1122 4.1.3.4 */
2056 if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6))
2057 csum = 0xffff;
2058
2059 /* Insert the checksum in the ULP csum field */
2060 offset += ulpoff;
2061 if ((offset + sizeof (csum)) > m->m_len) {
2062 m_copyback(m, offset, sizeof (csum), &csum);
2063 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *) + hoff)) {
316670eb 2064 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
6d2010ae 2065 } else {
316670eb 2066 bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
6d2010ae 2067 }
39236c6e
A
2068 m->m_pkthdr.csum_flags &=
2069 ~(CSUM_DELAY_IPV6_DATA | CSUM_DATA_VALID | CSUM_PARTIAL);
2070
2071done:
2072 return (sw_csum);
6d2010ae 2073}
39236c6e 2074
1c79356b
A
2075/*
2076 * Insert jumbo payload option.
2077 */
2078static int
39236c6e 2079ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
1c79356b
A
2080{
2081 struct mbuf *mopt;
2082 u_char *optbuf;
9bccf70c 2083 u_int32_t v;
1c79356b 2084
39236c6e 2085#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1c79356b
A
2086
2087 /*
2088 * If there is no hop-by-hop options header, allocate new one.
2089 * If there is one but it doesn't have enough space to store the
2090 * jumbo payload option, allocate a cluster to store the whole options.
2091 * Otherwise, use it to store the options.
2092 */
39236c6e 2093 if (exthdrs->ip6e_hbh == NULL) {
1c79356b 2094 MGET(mopt, M_DONTWAIT, MT_DATA);
39236c6e 2095 if (mopt == NULL)
6d2010ae 2096 return (ENOBUFS);
1c79356b
A
2097 mopt->m_len = JUMBOOPTLEN;
2098 optbuf = mtod(mopt, u_char *);
2099 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
2100 exthdrs->ip6e_hbh = mopt;
2101 } else {
2102 struct ip6_hbh *hbh;
2103
2104 mopt = exthdrs->ip6e_hbh;
2105 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
9bccf70c
A
2106 /*
2107 * XXX assumption:
2108 * - exthdrs->ip6e_hbh is not referenced from places
2109 * other than exthdrs.
2110 * - exthdrs->ip6e_hbh is not an mbuf chain.
2111 */
6d2010ae 2112 u_int32_t oldoptlen = mopt->m_len;
9bccf70c 2113 struct mbuf *n;
1c79356b 2114
9bccf70c
A
2115 /*
2116 * XXX: give up if the whole (new) hbh header does
2117 * not fit even in an mbuf cluster.
2118 */
2119 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
6d2010ae 2120 return (ENOBUFS);
1c79356b 2121
9bccf70c
A
2122 /*
2123 * As a consequence, we must always prepare a cluster
2124 * at this point.
2125 */
2126 MGET(n, M_DONTWAIT, MT_DATA);
39236c6e 2127 if (n != NULL) {
9bccf70c 2128 MCLGET(n, M_DONTWAIT);
39236c6e 2129 if (!(n->m_flags & M_EXT)) {
9bccf70c
A
2130 m_freem(n);
2131 n = NULL;
2132 }
2133 }
39236c6e 2134 if (n == NULL)
6d2010ae 2135 return (ENOBUFS);
9bccf70c
A
2136 n->m_len = oldoptlen + JUMBOOPTLEN;
2137 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
6d2010ae
A
2138 oldoptlen);
2139 optbuf = mtod(n, u_char *) + oldoptlen;
9bccf70c
A
2140 m_freem(mopt);
2141 mopt = exthdrs->ip6e_hbh = n;
1c79356b
A
2142 } else {
2143 optbuf = mtod(mopt, u_char *) + mopt->m_len;
2144 mopt->m_len += JUMBOOPTLEN;
2145 }
2146 optbuf[0] = IP6OPT_PADN;
2147 optbuf[1] = 1;
2148
2149 /*
2150 * Adjust the header length according to the pad and
2151 * the jumbo payload option.
2152 */
2153 hbh = mtod(mopt, struct ip6_hbh *);
2154 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
2155 }
2156
2157 /* fill in the option. */
2158 optbuf[2] = IP6OPT_JUMBO;
2159 optbuf[3] = 4;
9bccf70c 2160 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
39236c6e 2161 bcopy(&v, &optbuf[4], sizeof (u_int32_t));
1c79356b
A
2162
2163 /* finally, adjust the packet header length */
2164 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
2165
6d2010ae 2166 return (0);
1c79356b
A
2167#undef JUMBOOPTLEN
2168}
2169
2170/*
2171 * Insert fragment header and copy unfragmentable header portions.
2172 */
2173static int
39236c6e
A
2174ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
2175 struct ip6_frag **frghdrp)
1c79356b
A
2176{
2177 struct mbuf *n, *mlast;
2178
39236c6e
A
2179 if (hlen > sizeof (struct ip6_hdr)) {
2180 n = m_copym(m0, sizeof (struct ip6_hdr),
2181 hlen - sizeof (struct ip6_hdr), M_DONTWAIT);
2182 if (n == NULL)
6d2010ae 2183 return (ENOBUFS);
1c79356b
A
2184 m->m_next = n;
2185 } else
2186 n = m;
2187
2188 /* Search for the last mbuf of unfragmentable part. */
2189 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
2190 ;
2191
39236c6e
A
2192 if (!(mlast->m_flags & M_EXT) &&
2193 M_TRAILINGSPACE(mlast) >= sizeof (struct ip6_frag)) {
2194 /* use the trailing space of the last mbuf for the frag hdr */
6d2010ae
A
2195 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
2196 mlast->m_len);
39236c6e
A
2197 mlast->m_len += sizeof (struct ip6_frag);
2198 m->m_pkthdr.len += sizeof (struct ip6_frag);
1c79356b
A
2199 } else {
2200 /* allocate a new mbuf for the fragment header */
2201 struct mbuf *mfrg;
2202
2203 MGET(mfrg, M_DONTWAIT, MT_DATA);
39236c6e 2204 if (mfrg == NULL)
6d2010ae 2205 return (ENOBUFS);
39236c6e 2206 mfrg->m_len = sizeof (struct ip6_frag);
1c79356b
A
2207 *frghdrp = mtod(mfrg, struct ip6_frag *);
2208 mlast->m_next = mfrg;
2209 }
2210
6d2010ae 2211 return (0);
1c79356b
A
2212}
2213
6d2010ae
A
2214static int
2215ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
2216 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
39236c6e 2217 boolean_t *alwaysfragp)
6d2010ae
A
2218{
2219 u_int32_t mtu = 0;
39236c6e 2220 boolean_t alwaysfrag = FALSE;
6d2010ae 2221 int error = 0;
55e303ae 2222
6d2010ae
A
2223 if (ro_pmtu != ro) {
2224 /* The first hop and the final destination may differ. */
39236c6e
A
2225 struct sockaddr_in6 *sa6_dst = SIN6(&ro_pmtu->ro_dst);
2226 if (ROUTE_UNUSABLE(ro_pmtu) ||
2227 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
2228 ROUTE_RELEASE(ro_pmtu);
2229
6d2010ae 2230 if (ro_pmtu->ro_rt == NULL) {
39236c6e 2231 bzero(sa6_dst, sizeof (*sa6_dst));
6d2010ae 2232 sa6_dst->sin6_family = AF_INET6;
39236c6e 2233 sa6_dst->sin6_len = sizeof (struct sockaddr_in6);
6d2010ae
A
2234 sa6_dst->sin6_addr = *dst;
2235
2236 rtalloc_scoped((struct route *)ro_pmtu,
2237 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
2238 }
2239 }
2240
6d2010ae
A
2241 if (ro_pmtu->ro_rt != NULL) {
2242 u_int32_t ifmtu;
2243
fe8ab488
A
2244 if (ifp == NULL)
2245 ifp = ro_pmtu->ro_rt->rt_ifp;
316670eb 2246 /* Access without acquiring nd_ifinfo lock for performance */
6d2010ae 2247 ifmtu = IN6_LINKMTU(ifp);
6d2010ae 2248
39236c6e
A
2249 /*
2250 * Access rmx_mtu without holding the route entry lock,
2251 * for performance; this isn't something that changes
2252 * often, so optimize.
2253 */
6d2010ae
A
2254 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
2255 if (mtu > ifmtu || mtu == 0) {
2256 /*
2257 * The MTU on the route is larger than the MTU on
2258 * the interface! This shouldn't happen, unless the
2259 * MTU of the interface has been changed after the
2260 * interface was brought up. Change the MTU in the
2261 * route to match the interface MTU (as long as the
2262 * field isn't locked).
2263 *
2264 * if MTU on the route is 0, we need to fix the MTU.
2265 * this case happens with path MTU discovery timeouts.
2266 */
39236c6e
A
2267 mtu = ifmtu;
2268 if (!(ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU))
2269 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
2270 } else if (mtu < IPV6_MMTU) {
6d2010ae
A
2271 /*
2272 * RFC2460 section 5, last paragraph:
2273 * if we record ICMPv6 too big message with
2274 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
2275 * or smaller, with framgent header attached.
2276 * (fragment header is needed regardless from the
2277 * packet size, for translators to identify packets)
2278 */
39236c6e 2279 alwaysfrag = TRUE;
6d2010ae 2280 mtu = IPV6_MMTU;
39236c6e 2281 }
6d2010ae
A
2282 } else {
2283 if (ifp) {
316670eb 2284 /* Don't hold nd_ifinfo lock for performance */
6d2010ae 2285 mtu = IN6_LINKMTU(ifp);
39236c6e 2286 } else {
6d2010ae 2287 error = EHOSTUNREACH; /* XXX */
39236c6e 2288 }
6d2010ae
A
2289 }
2290
2291 *mtup = mtu;
39236c6e 2292 if (alwaysfragp != NULL)
6d2010ae
A
2293 *alwaysfragp = alwaysfrag;
2294 return (error);
2295}
2296
2297/*
1c79356b
A
2298 * IP6 socket option processing.
2299 */
1c79356b 2300int
39236c6e 2301ip6_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2302{
6d2010ae
A
2303 int optdatalen, uproto;
2304 void *optdata;
9bccf70c
A
2305 int privileged;
2306 struct inpcb *in6p = sotoinpcb(so);
b0d623f7 2307 int error = 0, optval = 0;
2d21ac55
A
2308 int level, op = -1, optname = 0;
2309 int optlen = 0;
b0d623f7 2310 struct proc *p;
1c79356b 2311
39236c6e
A
2312 VERIFY(sopt != NULL);
2313
b0d623f7
A
2314 level = sopt->sopt_level;
2315 op = sopt->sopt_dir;
2316 optname = sopt->sopt_name;
2317 optlen = sopt->sopt_valsize;
2318 p = sopt->sopt_p;
39236c6e 2319 uproto = (int)SOCK_PROTO(so);
1c79356b 2320
b0d623f7 2321 privileged = (proc_suser(p) == 0);
1c79356b
A
2322
2323 if (level == IPPROTO_IPV6) {
2324 switch (op) {
1c79356b 2325 case SOPT_SET:
1c79356b 2326 switch (optname) {
39236c6e 2327 case IPV6_2292PKTOPTIONS: {
1c79356b
A
2328 struct mbuf *m;
2329
39236c6e 2330 error = soopt_getm(sopt, &m);
2d21ac55 2331 if (error != 0)
1c79356b 2332 break;
39236c6e 2333 error = soopt_mcopyin(sopt, m);
2d21ac55 2334 if (error != 0)
1c79356b
A
2335 break;
2336 error = ip6_pcbopts(&in6p->in6p_outputopts,
39236c6e
A
2337 m, so, sopt);
2338 m_freem(m);
1c79356b
A
2339 break;
2340 }
9bccf70c 2341
1c79356b
A
2342 /*
2343 * Use of some Hop-by-Hop options or some
2344 * Destination options, might require special
2345 * privilege. That is, normal applications
2346 * (without special privilege) might be forbidden
2347 * from setting certain options in outgoing packets,
2348 * and might never see certain options in received
2349 * packets. [RFC 2292 Section 6]
2350 * KAME specific note:
2351 * KAME prevents non-privileged users from sending or
2352 * receiving ANY hbh/dst options in order to avoid
2353 * overhead of parsing options in the kernel.
2354 */
6d2010ae
A
2355 case IPV6_RECVHOPOPTS:
2356 case IPV6_RECVDSTOPTS:
2357 case IPV6_RECVRTHDRDSTOPTS:
39236c6e
A
2358 if (!privileged)
2359 break;
6d2010ae 2360 /* FALLTHROUGH */
1c79356b 2361 case IPV6_UNICAST_HOPS:
6d2010ae 2362 case IPV6_HOPLIMIT:
6d2010ae
A
2363 case IPV6_RECVPKTINFO:
2364 case IPV6_RECVHOPLIMIT:
2365 case IPV6_RECVRTHDR:
2366 case IPV6_RECVPATHMTU:
b0d623f7 2367 case IPV6_RECVTCLASS:
9bccf70c 2368 case IPV6_V6ONLY:
6d2010ae 2369 case IPV6_AUTOFLOWLABEL:
39236c6e 2370 if (optlen != sizeof (int)) {
1c79356b 2371 error = EINVAL;
9bccf70c
A
2372 break;
2373 }
2374 error = sooptcopyin(sopt, &optval,
39236c6e 2375 sizeof (optval), sizeof (optval));
9bccf70c
A
2376 if (error)
2377 break;
1c79356b 2378
39236c6e 2379 switch (optname) {
9bccf70c 2380 case IPV6_UNICAST_HOPS:
39236c6e 2381 if (optval < -1 || optval >= 256) {
9bccf70c 2382 error = EINVAL;
39236c6e 2383 } else {
9bccf70c
A
2384 /* -1 = kernel default */
2385 in6p->in6p_hops = optval;
39236c6e
A
2386 if (in6p->inp_vflag &
2387 INP_IPV4) {
2388 in6p->inp_ip_ttl =
2389 optval;
2390 }
9bccf70c
A
2391 }
2392 break;
39236c6e
A
2393#define OPTSET(bit) do { \
2394 if (optval) \
2395 in6p->inp_flags |= (bit); \
2396 else \
2397 in6p->inp_flags &= ~(bit); \
2398} while (0)
2399
2400#define OPTSET2292(bit) do { \
2401 in6p->inp_flags |= IN6P_RFC2292; \
2402 if (optval) \
2403 in6p->inp_flags |= (bit); \
2404 else \
2405 in6p->inp_flags &= ~(bit); \
2406} while (0)
2407
2408#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
6d2010ae
A
2409
2410 case IPV6_RECVPKTINFO:
2411 /* cannot mix with RFC2292 */
2412 if (OPTBIT(IN6P_RFC2292)) {
2413 error = EINVAL;
2414 break;
2415 }
2416 OPTSET(IN6P_PKTINFO);
2417 break;
2418
39236c6e 2419 case IPV6_HOPLIMIT: {
6d2010ae
A
2420 struct ip6_pktopts **optp;
2421
2422 /* cannot mix with RFC2292 */
2423 if (OPTBIT(IN6P_RFC2292)) {
2424 error = EINVAL;
2425 break;
2426 }
2427 optp = &in6p->in6p_outputopts;
2428 error = ip6_pcbopt(IPV6_HOPLIMIT,
39236c6e 2429 (u_char *)&optval, sizeof (optval),
6d2010ae
A
2430 optp, uproto);
2431 break;
2432 }
2433
2434 case IPV6_RECVHOPLIMIT:
2435 /* cannot mix with RFC2292 */
2436 if (OPTBIT(IN6P_RFC2292)) {
2437 error = EINVAL;
2438 break;
2439 }
2440 OPTSET(IN6P_HOPLIMIT);
2441 break;
2442
2443 case IPV6_RECVHOPOPTS:
2444 /* cannot mix with RFC2292 */
2445 if (OPTBIT(IN6P_RFC2292)) {
2446 error = EINVAL;
2447 break;
2448 }
2449 OPTSET(IN6P_HOPOPTS);
2450 break;
2451
2452 case IPV6_RECVDSTOPTS:
2453 /* cannot mix with RFC2292 */
2454 if (OPTBIT(IN6P_RFC2292)) {
2455 error = EINVAL;
2456 break;
2457 }
2458 OPTSET(IN6P_DSTOPTS);
2459 break;
1c79356b 2460
6d2010ae
A
2461 case IPV6_RECVRTHDRDSTOPTS:
2462 /* cannot mix with RFC2292 */
2463 if (OPTBIT(IN6P_RFC2292)) {
2464 error = EINVAL;
2465 break;
2466 }
2467 OPTSET(IN6P_RTHDRDSTOPTS);
2468 break;
2469
2470 case IPV6_RECVRTHDR:
2471 /* cannot mix with RFC2292 */
2472 if (OPTBIT(IN6P_RFC2292)) {
2473 error = EINVAL;
2474 break;
2475 }
2476 OPTSET(IN6P_RTHDR);
9bccf70c 2477 break;
1c79356b 2478
6d2010ae
A
2479 case IPV6_RECVPATHMTU:
2480 /*
2481 * We ignore this option for TCP
2482 * sockets.
2483 * (RFC3542 leaves this case
2484 * unspecified.)
2485 */
2486 if (uproto != IPPROTO_TCP)
2487 OPTSET(IN6P_MTU);
9bccf70c 2488 break;
1c79356b 2489
9bccf70c
A
2490 case IPV6_V6ONLY:
2491 /*
2492 * make setsockopt(IPV6_V6ONLY)
2493 * available only prior to bind(2).
2494 * see ipng mailing list, Jun 22 2001.
2495 */
6d2010ae 2496 if (in6p->inp_lport ||
39236c6e
A
2497 !IN6_IS_ADDR_UNSPECIFIED(
2498 &in6p->in6p_laddr)) {
9bccf70c 2499 error = EINVAL;
1c79356b 2500 break;
1c79356b 2501 }
9bccf70c 2502 OPTSET(IN6P_IPV6_V6ONLY);
55e303ae 2503 if (optval)
6d2010ae 2504 in6p->inp_vflag &= ~INP_IPV4;
55e303ae 2505 else
6d2010ae 2506 in6p->inp_vflag |= INP_IPV4;
9bccf70c 2507 break;
39236c6e 2508
b0d623f7 2509 case IPV6_RECVTCLASS:
6d2010ae 2510 /* we can mix with RFC2292 */
b0d623f7
A
2511 OPTSET(IN6P_TCLASS);
2512 break;
39236c6e 2513
6d2010ae
A
2514 case IPV6_AUTOFLOWLABEL:
2515 OPTSET(IN6P_AUTOFLOWLABEL);
2516 break;
2517
1c79356b
A
2518 }
2519 break;
9bccf70c 2520
6d2010ae
A
2521 case IPV6_TCLASS:
2522 case IPV6_DONTFRAG:
2523 case IPV6_USE_MIN_MTU:
39236c6e
A
2524 case IPV6_PREFER_TEMPADDR: {
2525 struct ip6_pktopts **optp;
2526
2527 if (optlen != sizeof (optval)) {
6d2010ae
A
2528 error = EINVAL;
2529 break;
2530 }
2531 error = sooptcopyin(sopt, &optval,
39236c6e 2532 sizeof (optval), sizeof (optval));
6d2010ae
A
2533 if (error)
2534 break;
39236c6e
A
2535
2536 optp = &in6p->in6p_outputopts;
2537 error = ip6_pcbopt(optname, (u_char *)&optval,
2538 sizeof (optval), optp, uproto);
2539 break;
2540 }
6d2010ae
A
2541
2542 case IPV6_2292PKTINFO:
2543 case IPV6_2292HOPLIMIT:
2544 case IPV6_2292HOPOPTS:
2545 case IPV6_2292DSTOPTS:
2546 case IPV6_2292RTHDR:
9bccf70c 2547 /* RFC 2292 */
39236c6e 2548 if (optlen != sizeof (int)) {
9bccf70c
A
2549 error = EINVAL;
2550 break;
2551 }
2552 error = sooptcopyin(sopt, &optval,
39236c6e 2553 sizeof (optval), sizeof (optval));
9bccf70c
A
2554 if (error)
2555 break;
2556 switch (optname) {
6d2010ae
A
2557 case IPV6_2292PKTINFO:
2558 OPTSET2292(IN6P_PKTINFO);
9bccf70c 2559 break;
6d2010ae
A
2560 case IPV6_2292HOPLIMIT:
2561 OPTSET2292(IN6P_HOPLIMIT);
9bccf70c 2562 break;
6d2010ae 2563 case IPV6_2292HOPOPTS:
9bccf70c
A
2564 /*
2565 * Check super-user privilege.
2566 * See comments for IPV6_RECVHOPOPTS.
2567 */
2568 if (!privileged)
39236c6e 2569 return (EPERM);
6d2010ae 2570 OPTSET2292(IN6P_HOPOPTS);
9bccf70c 2571 break;
6d2010ae 2572 case IPV6_2292DSTOPTS:
9bccf70c 2573 if (!privileged)
39236c6e
A
2574 return (EPERM);
2575 OPTSET2292(IN6P_DSTOPTS|
2576 IN6P_RTHDRDSTOPTS); /* XXX */
9bccf70c 2577 break;
6d2010ae
A
2578 case IPV6_2292RTHDR:
2579 OPTSET2292(IN6P_RTHDR);
1c79356b 2580 break;
1c79356b
A
2581 }
2582 break;
39236c6e 2583
6d2010ae
A
2584 case IPV6_3542PKTINFO:
2585 case IPV6_3542HOPOPTS:
2586 case IPV6_3542RTHDR:
2587 case IPV6_3542DSTOPTS:
2588 case IPV6_RTHDRDSTOPTS:
39236c6e 2589 case IPV6_3542NEXTHOP: {
316670eb 2590 struct ip6_pktopts **optp;
6d2010ae
A
2591 /* new advanced API (RFC3542) */
2592 struct mbuf *m;
1c79356b 2593
6d2010ae
A
2594 /* cannot mix with RFC2292 */
2595 if (OPTBIT(IN6P_RFC2292)) {
b0d623f7
A
2596 error = EINVAL;
2597 break;
2598 }
6d2010ae
A
2599 error = soopt_getm(sopt, &m);
2600 if (error != 0)
2601 break;
2602 error = soopt_mcopyin(sopt, m);
39236c6e 2603 if (error != 0)
b0d623f7 2604 break;
39236c6e 2605
316670eb 2606 optp = &in6p->in6p_outputopts;
39236c6e
A
2607 error = ip6_pcbopt(optname, mtod(m, u_char *),
2608 m->m_len, optp, uproto);
6d2010ae 2609 m_freem(m);
b0d623f7 2610 break;
6d2010ae
A
2611 }
2612#undef OPTSET
1c79356b
A
2613 case IPV6_MULTICAST_IF:
2614 case IPV6_MULTICAST_HOPS:
2615 case IPV6_MULTICAST_LOOP:
2616 case IPV6_JOIN_GROUP:
2617 case IPV6_LEAVE_GROUP:
6d2010ae
A
2618 case IPV6_MSFILTER:
2619 case MCAST_BLOCK_SOURCE:
2620 case MCAST_UNBLOCK_SOURCE:
2621 case MCAST_JOIN_GROUP:
2622 case MCAST_LEAVE_GROUP:
2623 case MCAST_JOIN_SOURCE_GROUP:
2624 case MCAST_LEAVE_SOURCE_GROUP:
2625 error = ip6_setmoptions(in6p, sopt);
1c79356b
A
2626 break;
2627
9bccf70c
A
2628 case IPV6_PORTRANGE:
2629 error = sooptcopyin(sopt, &optval,
39236c6e 2630 sizeof (optval), sizeof (optval));
9bccf70c
A
2631 if (error)
2632 break;
1c79356b 2633
9bccf70c
A
2634 switch (optval) {
2635 case IPV6_PORTRANGE_DEFAULT:
6d2010ae
A
2636 in6p->inp_flags &= ~(INP_LOWPORT);
2637 in6p->inp_flags &= ~(INP_HIGHPORT);
9bccf70c 2638 break;
1c79356b 2639
9bccf70c 2640 case IPV6_PORTRANGE_HIGH:
6d2010ae
A
2641 in6p->inp_flags &= ~(INP_LOWPORT);
2642 in6p->inp_flags |= INP_HIGHPORT;
9bccf70c 2643 break;
1c79356b 2644
9bccf70c 2645 case IPV6_PORTRANGE_LOW:
6d2010ae
A
2646 in6p->inp_flags &= ~(INP_HIGHPORT);
2647 in6p->inp_flags |= INP_LOWPORT;
9bccf70c 2648 break;
1c79356b 2649
9bccf70c
A
2650 default:
2651 error = EINVAL;
2652 break;
2653 }
1c79356b 2654 break;
1c79356b 2655#if IPSEC
39236c6e 2656 case IPV6_IPSEC_POLICY: {
1c79356b
A
2657 caddr_t req = NULL;
2658 size_t len = 0;
1c79356b 2659 struct mbuf *m;
fe8ab488 2660
39236c6e 2661 if ((error = soopt_getm(sopt, &m)) != 0)
1c79356b 2662 break;
39236c6e 2663 if ((error = soopt_mcopyin(sopt, m)) != 0)
1c79356b 2664 break;
fe8ab488 2665
39236c6e
A
2666 req = mtod(m, caddr_t);
2667 len = m->m_len;
1c79356b 2668 error = ipsec6_set_policy(in6p, optname, req,
fe8ab488 2669 len, privileged);
1c79356b 2670 m_freem(m);
1c79356b 2671 break;
39236c6e
A
2672 }
2673#endif /* IPSEC */
2d21ac55 2674#if IPFIREWALL
1c79356b
A
2675 case IPV6_FW_ADD:
2676 case IPV6_FW_DEL:
2677 case IPV6_FW_FLUSH:
39236c6e 2678 case IPV6_FW_ZERO: {
060df5ea
A
2679 if (ip6_fw_ctl_ptr == NULL)
2680 load_ip6fw();
2681 if (ip6_fw_ctl_ptr != NULL)
2682 error = (*ip6_fw_ctl_ptr)(sopt);
2683 else
39236c6e 2684 error = ENOPROTOOPT;
1c79356b 2685 break;
39236c6e 2686 }
2d21ac55 2687#endif /* IPFIREWALL */
6d2010ae
A
2688 /*
2689 * IPv6 variant of IP_BOUND_IF; for details see
2690 * comments on IP_BOUND_IF in ip_ctloutput().
2691 */
2692 case IPV6_BOUND_IF:
2693 /* This option is settable only on IPv6 */
2694 if (!(in6p->inp_vflag & INP_IPV6)) {
2695 error = EINVAL;
2696 break;
2697 }
2698
2699 error = sooptcopyin(sopt, &optval,
2700 sizeof (optval), sizeof (optval));
2701
2702 if (error)
2703 break;
2704
39236c6e 2705 error = inp_bindif(in6p, optval, NULL);
6d2010ae
A
2706 break;
2707
2708 case IPV6_NO_IFT_CELLULAR:
2709 /* This option is settable only for IPv6 */
2710 if (!(in6p->inp_vflag & INP_IPV6)) {
2711 error = EINVAL;
2712 break;
2713 }
2714
2715 error = sooptcopyin(sopt, &optval,
2716 sizeof (optval), sizeof (optval));
2717
2718 if (error)
2719 break;
2720
39236c6e 2721 /* once set, it cannot be unset */
fe8ab488 2722 if (!optval && INP_NO_CELLULAR(in6p)) {
39236c6e
A
2723 error = EINVAL;
2724 break;
2725 }
2726
2727 error = so_set_restrictions(so,
2728 SO_RESTRICT_DENY_CELLULAR);
6d2010ae
A
2729 break;
2730
2731 case IPV6_OUT_IF:
2732 /* This option is not settable */
2733 error = EINVAL;
2734 break;
2735
1c79356b
A
2736 default:
2737 error = ENOPROTOOPT;
2738 break;
2739 }
1c79356b
A
2740 break;
2741
1c79356b 2742 case SOPT_GET:
1c79356b
A
2743 switch (optname) {
2744
6d2010ae
A
2745 case IPV6_2292PKTOPTIONS:
2746 /*
2747 * RFC3542 (effectively) deprecated the
2748 * semantics of the 2292-style pktoptions.
2749 * Since it was not reliable in nature (i.e.,
2750 * applications had to expect the lack of some
2751 * information after all), it would make sense
2752 * to simplify this part by always returning
2753 * empty data.
2754 */
2755 sopt->sopt_valsize = 0;
1c79356b
A
2756 break;
2757
6d2010ae
A
2758 case IPV6_RECVHOPOPTS:
2759 case IPV6_RECVDSTOPTS:
2760 case IPV6_RECVRTHDRDSTOPTS:
1c79356b 2761 case IPV6_UNICAST_HOPS:
6d2010ae
A
2762 case IPV6_RECVPKTINFO:
2763 case IPV6_RECVHOPLIMIT:
2764 case IPV6_RECVRTHDR:
2765 case IPV6_RECVPATHMTU:
9bccf70c 2766 case IPV6_V6ONLY:
1c79356b 2767 case IPV6_PORTRANGE:
b0d623f7 2768 case IPV6_RECVTCLASS:
6d2010ae 2769 case IPV6_AUTOFLOWLABEL:
1c79356b
A
2770 switch (optname) {
2771
6d2010ae
A
2772 case IPV6_RECVHOPOPTS:
2773 optval = OPTBIT(IN6P_HOPOPTS);
2774 break;
2775
2776 case IPV6_RECVDSTOPTS:
2777 optval = OPTBIT(IN6P_DSTOPTS);
2778 break;
2779
2780 case IPV6_RECVRTHDRDSTOPTS:
2781 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2782 break;
2783
1c79356b 2784 case IPV6_UNICAST_HOPS:
1c79356b 2785 optval = in6p->in6p_hops;
1c79356b
A
2786 break;
2787
6d2010ae
A
2788 case IPV6_RECVPKTINFO:
2789 optval = OPTBIT(IN6P_PKTINFO);
2790 break;
2791
2792 case IPV6_RECVHOPLIMIT:
2793 optval = OPTBIT(IN6P_HOPLIMIT);
2794 break;
2795
2796 case IPV6_RECVRTHDR:
2797 optval = OPTBIT(IN6P_RTHDR);
2798 break;
2799
2800 case IPV6_RECVPATHMTU:
2801 optval = OPTBIT(IN6P_MTU);
1c79356b
A
2802 break;
2803
9bccf70c 2804 case IPV6_V6ONLY:
55e303ae 2805 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1c79356b 2806 break;
1c79356b 2807
39236c6e 2808 case IPV6_PORTRANGE: {
1c79356b 2809 int flags;
6d2010ae
A
2810 flags = in6p->inp_flags;
2811 if (flags & INP_HIGHPORT)
1c79356b 2812 optval = IPV6_PORTRANGE_HIGH;
6d2010ae 2813 else if (flags & INP_LOWPORT)
1c79356b
A
2814 optval = IPV6_PORTRANGE_LOW;
2815 else
2816 optval = 0;
2817 break;
39236c6e 2818 }
b0d623f7
A
2819 case IPV6_RECVTCLASS:
2820 optval = OPTBIT(IN6P_TCLASS);
2821 break;
2822
6d2010ae
A
2823 case IPV6_AUTOFLOWLABEL:
2824 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2825 break;
1c79356b 2826 }
6d2010ae
A
2827 if (error)
2828 break;
1c79356b 2829 error = sooptcopyout(sopt, &optval,
39236c6e 2830 sizeof (optval));
1c79356b
A
2831 break;
2832
39236c6e 2833 case IPV6_PATHMTU: {
6d2010ae
A
2834 u_int32_t pmtu = 0;
2835 struct ip6_mtuinfo mtuinfo;
2836 struct route_in6 sro;
2837
39236c6e 2838 bzero(&sro, sizeof (sro));
6d2010ae
A
2839
2840 if (!(so->so_state & SS_ISCONNECTED))
2841 return (ENOTCONN);
2842 /*
2843 * XXX: we dot not consider the case of source
2844 * routing, or optional information to specify
2845 * the outgoing interface.
2846 */
2847 error = ip6_getpmtu(&sro, NULL, NULL,
2848 &in6p->in6p_faddr, &pmtu, NULL);
39236c6e 2849 ROUTE_RELEASE(&sro);
6d2010ae
A
2850 if (error)
2851 break;
2852 if (pmtu > IPV6_MAXPACKET)
2853 pmtu = IPV6_MAXPACKET;
2854
39236c6e 2855 bzero(&mtuinfo, sizeof (mtuinfo));
6d2010ae
A
2856 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2857 optdata = (void *)&mtuinfo;
39236c6e 2858 optdatalen = sizeof (mtuinfo);
6d2010ae
A
2859 error = sooptcopyout(sopt, optdata,
2860 optdatalen);
2861 break;
2862 }
2863
2864 case IPV6_2292PKTINFO:
2865 case IPV6_2292HOPLIMIT:
2866 case IPV6_2292HOPOPTS:
2867 case IPV6_2292RTHDR:
2868 case IPV6_2292DSTOPTS:
9bccf70c 2869 switch (optname) {
6d2010ae 2870 case IPV6_2292PKTINFO:
9bccf70c 2871 optval = OPTBIT(IN6P_PKTINFO);
1c79356b 2872 break;
6d2010ae 2873 case IPV6_2292HOPLIMIT:
1c79356b
A
2874 optval = OPTBIT(IN6P_HOPLIMIT);
2875 break;
6d2010ae 2876 case IPV6_2292HOPOPTS:
9bccf70c 2877 optval = OPTBIT(IN6P_HOPOPTS);
1c79356b 2878 break;
6d2010ae 2879 case IPV6_2292RTHDR:
9bccf70c 2880 optval = OPTBIT(IN6P_RTHDR);
1c79356b 2881 break;
6d2010ae 2882 case IPV6_2292DSTOPTS:
39236c6e
A
2883 optval = OPTBIT(IN6P_DSTOPTS|
2884 IN6P_RTHDRDSTOPTS);
1c79356b 2885 break;
1c79356b 2886 }
1c79356b 2887 error = sooptcopyout(sopt, &optval,
39236c6e 2888 sizeof (optval));
1c79356b 2889 break;
39236c6e 2890
6d2010ae
A
2891 case IPV6_PKTINFO:
2892 case IPV6_HOPOPTS:
2893 case IPV6_RTHDR:
2894 case IPV6_DSTOPTS:
2895 case IPV6_RTHDRDSTOPTS:
2896 case IPV6_NEXTHOP:
b0d623f7 2897 case IPV6_TCLASS:
6d2010ae
A
2898 case IPV6_DONTFRAG:
2899 case IPV6_USE_MIN_MTU:
2900 case IPV6_PREFER_TEMPADDR:
2901 error = ip6_getpcbopt(in6p->in6p_outputopts,
2902 optname, sopt);
b0d623f7
A
2903 break;
2904
1c79356b
A
2905 case IPV6_MULTICAST_IF:
2906 case IPV6_MULTICAST_HOPS:
2907 case IPV6_MULTICAST_LOOP:
6d2010ae
A
2908 case IPV6_MSFILTER:
2909 error = ip6_getmoptions(in6p, sopt);
1c79356b 2910 break;
1c79356b 2911#if IPSEC
39236c6e 2912 case IPV6_IPSEC_POLICY: {
fe8ab488 2913 error = 0; /* This option is no longer supported */
1c79356b 2914 break;
39236c6e
A
2915 }
2916#endif /* IPSEC */
2d21ac55 2917#if IPFIREWALL
39236c6e 2918 case IPV6_FW_GET: {
060df5ea
A
2919 if (ip6_fw_ctl_ptr == NULL)
2920 load_ip6fw();
2921 if (ip6_fw_ctl_ptr != NULL)
2922 error = (*ip6_fw_ctl_ptr)(sopt);
2923 else
39236c6e 2924 error = ENOPROTOOPT;
1c79356b 2925 break;
39236c6e 2926 }
2d21ac55 2927#endif /* IPFIREWALL */
6d2010ae
A
2928 case IPV6_BOUND_IF:
2929 if (in6p->inp_flags & INP_BOUND_IF)
316670eb 2930 optval = in6p->inp_boundifp->if_index;
6d2010ae
A
2931 error = sooptcopyout(sopt, &optval,
2932 sizeof (optval));
2933 break;
2934
2935 case IPV6_NO_IFT_CELLULAR:
fe8ab488 2936 optval = INP_NO_CELLULAR(in6p) ? 1 : 0;
6d2010ae
A
2937 error = sooptcopyout(sopt, &optval,
2938 sizeof (optval));
2939 break;
2940
2941 case IPV6_OUT_IF:
316670eb
A
2942 optval = (in6p->in6p_last_outifp != NULL) ?
2943 in6p->in6p_last_outifp->if_index : 0;
6d2010ae
A
2944 error = sooptcopyout(sopt, &optval,
2945 sizeof (optval));
2946 break;
2947
1c79356b
A
2948 default:
2949 error = ENOPROTOOPT;
2950 break;
2951 }
2952 break;
2953 }
3e170ce0
A
2954 } else if (level == IPPROTO_UDP) {
2955 error = udp_ctloutput(so, sopt);
1c79356b
A
2956 } else {
2957 error = EINVAL;
1c79356b 2958 }
39236c6e 2959 return (error);
1c79356b
A
2960}
2961
6d2010ae
A
2962int
2963ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2964{
6d2010ae
A
2965 int error = 0, optval, optlen;
2966 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2967 struct inpcb *in6p = sotoinpcb(so);
2968 int level, op, optname;
1c79356b 2969
6d2010ae
A
2970 level = sopt->sopt_level;
2971 op = sopt->sopt_dir;
2972 optname = sopt->sopt_name;
2973 optlen = sopt->sopt_valsize;
2974
39236c6e 2975 if (level != IPPROTO_IPV6)
6d2010ae 2976 return (EINVAL);
6d2010ae
A
2977
2978 switch (optname) {
2979 case IPV6_CHECKSUM:
2980 /*
2981 * For ICMPv6 sockets, no modification allowed for checksum
2982 * offset, permit "no change" values to help existing apps.
2983 *
2984 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2985 * for an ICMPv6 socket will fail."
2986 * The current behavior does not meet RFC3542.
2987 */
2988 switch (op) {
2989 case SOPT_SET:
39236c6e 2990 if (optlen != sizeof (int)) {
6d2010ae
A
2991 error = EINVAL;
2992 break;
2993 }
39236c6e
A
2994 error = sooptcopyin(sopt, &optval, sizeof (optval),
2995 sizeof (optval));
6d2010ae
A
2996 if (error)
2997 break;
2998 if ((optval % 2) != 0) {
2999 /* the API assumes even offset values */
3000 error = EINVAL;
39236c6e 3001 } else if (SOCK_PROTO(so) == IPPROTO_ICMPV6) {
6d2010ae
A
3002 if (optval != icmp6off)
3003 error = EINVAL;
39236c6e 3004 } else {
6d2010ae 3005 in6p->in6p_cksum = optval;
39236c6e 3006 }
6d2010ae
A
3007 break;
3008
3009 case SOPT_GET:
39236c6e 3010 if (SOCK_PROTO(so) == IPPROTO_ICMPV6)
6d2010ae
A
3011 optval = icmp6off;
3012 else
3013 optval = in6p->in6p_cksum;
3014
39236c6e 3015 error = sooptcopyout(sopt, &optval, sizeof (optval));
6d2010ae
A
3016 break;
3017
3018 default:
3019 error = EINVAL;
3020 break;
3021 }
3022 break;
3023
3024 default:
3025 error = ENOPROTOOPT;
3026 break;
3027 }
3028
3029 return (error);
3030}
3031
3032/*
3033 * Set up IP6 options in pcb for insertion in output packets or
3034 * specifying behavior of outgoing packets.
3035 */
3036static int
39236c6e
A
3037ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so,
3038 struct sockopt *sopt)
6d2010ae 3039{
39236c6e 3040#pragma unused(sopt)
6d2010ae
A
3041 struct ip6_pktopts *opt = *pktopt;
3042 int error = 0;
3043
3044 /* turn off any old options. */
39236c6e 3045 if (opt != NULL) {
6d2010ae
A
3046#if DIAGNOSTIC
3047 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
3048 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
3049 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
39236c6e
A
3050 printf("%s: all specified options are cleared.\n",
3051 __func__);
6d2010ae
A
3052#endif
3053 ip6_clearpktopts(opt, -1);
3054 } else {
39236c6e 3055 opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK);
6d2010ae 3056 if (opt == NULL)
39236c6e 3057 return (ENOBUFS);
6d2010ae 3058 }
1c79356b
A
3059 *pktopt = NULL;
3060
39236c6e 3061 if (m == NULL || m->m_len == 0) {
1c79356b 3062 /*
55e303ae
A
3063 * Only turning off any previous options, regardless of
3064 * whether the opt is just created or given.
1c79356b 3065 */
39236c6e 3066 if (opt != NULL)
9bccf70c 3067 FREE(opt, M_IP6OPT);
39236c6e 3068 return (0);
1c79356b
A
3069 }
3070
3071 /* set options specified by user. */
39236c6e 3072 if ((error = ip6_setpktopts(m, opt, NULL, SOCK_PROTO(so))) != 0) {
6d2010ae 3073 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
55e303ae 3074 FREE(opt, M_IP6OPT);
39236c6e 3075 return (error);
1c79356b
A
3076 }
3077 *pktopt = opt;
39236c6e 3078 return (0);
1c79356b
A
3079}
3080
6d2010ae
A
3081/*
3082 * initialize ip6_pktopts. beware that there are non-zero default values in
3083 * the struct.
3084 */
3085void
3086ip6_initpktopts(struct ip6_pktopts *opt)
3087{
3088
39236c6e 3089 bzero(opt, sizeof (*opt));
6d2010ae
A
3090 opt->ip6po_hlim = -1; /* -1 means default hop limit */
3091 opt->ip6po_tclass = -1; /* -1 means default traffic class */
3092 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
3093 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
3094}
3095
b0d623f7 3096static int
316670eb
A
3097ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
3098 int uproto)
b0d623f7
A
3099{
3100 struct ip6_pktopts *opt;
3101
3102 opt = *pktopt;
3103 if (opt == NULL) {
39236c6e 3104 opt = _MALLOC(sizeof (*opt), M_IP6OPT, M_WAITOK);
6d2010ae 3105 if (opt == NULL)
39236c6e 3106 return (ENOBUFS);
b0d623f7
A
3107 ip6_initpktopts(opt);
3108 *pktopt = opt;
3109 }
3110
6d2010ae 3111 return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
b0d623f7
A
3112}
3113
3114static int
3115ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
3116{
3117 void *optdata = NULL;
3118 int optdatalen = 0;
6d2010ae 3119 struct ip6_ext *ip6e;
6d2010ae
A
3120 struct in6_pktinfo null_pktinfo;
3121 int deftclass = 0, on;
3122 int defminmtu = IP6PO_MINMTU_MCASTONLY;
3123 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
b0d623f7 3124
316670eb 3125
b0d623f7 3126 switch (optname) {
6d2010ae
A
3127 case IPV6_PKTINFO:
3128 if (pktopt && pktopt->ip6po_pktinfo)
3129 optdata = (void *)pktopt->ip6po_pktinfo;
3130 else {
3131 /* XXX: we don't have to do this every time... */
39236c6e 3132 bzero(&null_pktinfo, sizeof (null_pktinfo));
6d2010ae
A
3133 optdata = (void *)&null_pktinfo;
3134 }
39236c6e 3135 optdatalen = sizeof (struct in6_pktinfo);
6d2010ae 3136 break;
39236c6e 3137
b0d623f7
A
3138 case IPV6_TCLASS:
3139 if (pktopt && pktopt->ip6po_tclass >= 0)
6d2010ae
A
3140 optdata = (void *)&pktopt->ip6po_tclass;
3141 else
3142 optdata = (void *)&deftclass;
39236c6e 3143 optdatalen = sizeof (int);
6d2010ae 3144 break;
39236c6e 3145
6d2010ae
A
3146 case IPV6_HOPOPTS:
3147 if (pktopt && pktopt->ip6po_hbh) {
3148 optdata = (void *)pktopt->ip6po_hbh;
3149 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
3150 optdatalen = (ip6e->ip6e_len + 1) << 3;
3151 }
3152 break;
39236c6e 3153
6d2010ae
A
3154 case IPV6_RTHDR:
3155 if (pktopt && pktopt->ip6po_rthdr) {
3156 optdata = (void *)pktopt->ip6po_rthdr;
3157 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
3158 optdatalen = (ip6e->ip6e_len + 1) << 3;
3159 }
3160 break;
39236c6e 3161
6d2010ae
A
3162 case IPV6_RTHDRDSTOPTS:
3163 if (pktopt && pktopt->ip6po_dest1) {
3164 optdata = (void *)pktopt->ip6po_dest1;
3165 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
3166 optdatalen = (ip6e->ip6e_len + 1) << 3;
3167 }
3168 break;
39236c6e 3169
6d2010ae
A
3170 case IPV6_DSTOPTS:
3171 if (pktopt && pktopt->ip6po_dest2) {
3172 optdata = (void *)pktopt->ip6po_dest2;
3173 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
3174 optdatalen = (ip6e->ip6e_len + 1) << 3;
3175 }
3176 break;
39236c6e 3177
6d2010ae
A
3178 case IPV6_NEXTHOP:
3179 if (pktopt && pktopt->ip6po_nexthop) {
3180 optdata = (void *)pktopt->ip6po_nexthop;
3181 optdatalen = pktopt->ip6po_nexthop->sa_len;
3182 }
3183 break;
39236c6e 3184
6d2010ae
A
3185 case IPV6_USE_MIN_MTU:
3186 if (pktopt)
3187 optdata = (void *)&pktopt->ip6po_minmtu;
b0d623f7 3188 else
6d2010ae 3189 optdata = (void *)&defminmtu;
39236c6e 3190 optdatalen = sizeof (int);
6d2010ae 3191 break;
39236c6e 3192
6d2010ae
A
3193 case IPV6_DONTFRAG:
3194 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
3195 on = 1;
3196 else
3197 on = 0;
3198 optdata = (void *)&on;
39236c6e 3199 optdatalen = sizeof (on);
6d2010ae 3200 break;
39236c6e 3201
6d2010ae
A
3202 case IPV6_PREFER_TEMPADDR:
3203 if (pktopt)
3204 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
3205 else
3206 optdata = (void *)&defpreftemp;
39236c6e 3207 optdatalen = sizeof (int);
b0d623f7 3208 break;
39236c6e 3209
b0d623f7
A
3210 default: /* should not happen */
3211#ifdef DIAGNOSTIC
3212 panic("ip6_getpcbopt: unexpected option\n");
3213#endif
3214 return (ENOPROTOOPT);
3215 }
3216
39236c6e 3217 return (sooptcopyout(sopt, optdata, optdatalen));
1c79356b
A
3218}
3219
3220void
316670eb 3221ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
1c79356b
A
3222{
3223 if (pktopt == NULL)
3224 return;
3225
6d2010ae
A
3226 if (optname == -1 || optname == IPV6_PKTINFO) {
3227 if (pktopt->ip6po_pktinfo)
9bccf70c 3228 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
1c79356b
A
3229 pktopt->ip6po_pktinfo = NULL;
3230 }
6d2010ae 3231 if (optname == -1 || optname == IPV6_HOPLIMIT)
1c79356b 3232 pktopt->ip6po_hlim = -1;
6d2010ae 3233 if (optname == -1 || optname == IPV6_TCLASS)
b0d623f7 3234 pktopt->ip6po_tclass = -1;
6d2010ae 3235 if (optname == -1 || optname == IPV6_NEXTHOP) {
39236c6e 3236 ROUTE_RELEASE(&pktopt->ip6po_nextroute);
6d2010ae 3237 if (pktopt->ip6po_nexthop)
9bccf70c 3238 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
1c79356b
A
3239 pktopt->ip6po_nexthop = NULL;
3240 }
6d2010ae
A
3241 if (optname == -1 || optname == IPV6_HOPOPTS) {
3242 if (pktopt->ip6po_hbh)
9bccf70c 3243 FREE(pktopt->ip6po_hbh, M_IP6OPT);
1c79356b
A
3244 pktopt->ip6po_hbh = NULL;
3245 }
6d2010ae
A
3246 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
3247 if (pktopt->ip6po_dest1)
9bccf70c 3248 FREE(pktopt->ip6po_dest1, M_IP6OPT);
1c79356b
A
3249 pktopt->ip6po_dest1 = NULL;
3250 }
6d2010ae
A
3251 if (optname == -1 || optname == IPV6_RTHDR) {
3252 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
9bccf70c 3253 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
1c79356b 3254 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
39236c6e 3255 ROUTE_RELEASE(&pktopt->ip6po_route);
1c79356b 3256 }
6d2010ae
A
3257 if (optname == -1 || optname == IPV6_DSTOPTS) {
3258 if (pktopt->ip6po_dest2)
9bccf70c 3259 FREE(pktopt->ip6po_dest2, M_IP6OPT);
1c79356b
A
3260 pktopt->ip6po_dest2 = NULL;
3261 }
3262}
3263
39236c6e
A
3264#define PKTOPT_EXTHDRCPY(type) do { \
3265 if (src->type) { \
3266 int hlen = \
3267 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3; \
3268 dst->type = _MALLOC(hlen, M_IP6OPT, canwait); \
3269 if (dst->type == NULL && canwait == M_NOWAIT) \
3270 goto bad; \
3271 bcopy(src->type, dst->type, hlen); \
3272 } \
9bccf70c 3273} while (0)
1c79356b 3274
6d2010ae
A
3275static int
3276copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
1c79356b 3277{
6d2010ae 3278 if (dst == NULL || src == NULL) {
316670eb 3279 printf("copypktopts: invalid argument\n");
6d2010ae 3280 return (EINVAL);
1c79356b
A
3281 }
3282
1c79356b 3283 dst->ip6po_hlim = src->ip6po_hlim;
b0d623f7 3284 dst->ip6po_tclass = src->ip6po_tclass;
6d2010ae 3285 dst->ip6po_flags = src->ip6po_flags;
1c79356b 3286 if (src->ip6po_pktinfo) {
39236c6e
A
3287 dst->ip6po_pktinfo = _MALLOC(sizeof (*dst->ip6po_pktinfo),
3288 M_IP6OPT, canwait);
1c79356b
A
3289 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
3290 goto bad;
3291 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
3292 }
3293 if (src->ip6po_nexthop) {
3294 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
39236c6e 3295 M_IP6OPT, canwait);
1c79356b
A
3296 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
3297 goto bad;
3298 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
39236c6e 3299 src->ip6po_nexthop->sa_len);
1c79356b
A
3300 }
3301 PKTOPT_EXTHDRCPY(ip6po_hbh);
3302 PKTOPT_EXTHDRCPY(ip6po_dest1);
3303 PKTOPT_EXTHDRCPY(ip6po_dest2);
3304 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
6d2010ae 3305 return (0);
1c79356b 3306
39236c6e 3307bad:
6d2010ae
A
3308 ip6_clearpktopts(dst, -1);
3309 return (ENOBUFS);
1c79356b
A
3310}
3311#undef PKTOPT_EXTHDRCPY
3312
6d2010ae
A
3313struct ip6_pktopts *
3314ip6_copypktopts(struct ip6_pktopts *src, int canwait)
3315{
3316 int error;
3317 struct ip6_pktopts *dst;
3318
39236c6e 3319 dst = _MALLOC(sizeof (*dst), M_IP6OPT, canwait);
6d2010ae
A
3320 if (dst == NULL)
3321 return (NULL);
3322 ip6_initpktopts(dst);
3323
3324 if ((error = copypktopts(dst, src, canwait)) != 0) {
3325 FREE(dst, M_IP6OPT);
3326 return (NULL);
3327 }
3328
3329 return (dst);
3330}
3331
1c79356b 3332void
316670eb 3333ip6_freepcbopts(struct ip6_pktopts *pktopt)
1c79356b
A
3334{
3335 if (pktopt == NULL)
3336 return;
3337
6d2010ae 3338 ip6_clearpktopts(pktopt, -1);
1c79356b 3339
9bccf70c 3340 FREE(pktopt, M_IP6OPT);
1c79356b
A
3341}
3342
6d2010ae
A
3343void
3344ip6_moptions_init(void)
1c79356b 3345{
6d2010ae 3346 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
1c79356b 3347
6d2010ae
A
3348 im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
3349 sizeof (struct ip6_moptions_dbg);
3350
3351 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
3352 IM6O_ZONE_NAME);
3353 if (im6o_zone == NULL) {
3354 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
3355 /* NOTREACHED */
1c79356b 3356 }
6d2010ae
A
3357 zone_change(im6o_zone, Z_EXPAND, TRUE);
3358}
1c79356b 3359
6d2010ae
A
3360void
3361im6o_addref(struct ip6_moptions *im6o, int locked)
3362{
3363 if (!locked)
3364 IM6O_LOCK(im6o);
3365 else
3366 IM6O_LOCK_ASSERT_HELD(im6o);
1c79356b 3367
6d2010ae
A
3368 if (++im6o->im6o_refcnt == 0) {
3369 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
3370 /* NOTREACHED */
3371 } else if (im6o->im6o_trace != NULL) {
3372 (*im6o->im6o_trace)(im6o, TRUE);
3373 }
3374
3375 if (!locked)
3376 IM6O_UNLOCK(im6o);
1c79356b
A
3377}
3378
6d2010ae
A
3379void
3380im6o_remref(struct ip6_moptions *im6o)
1c79356b 3381{
6d2010ae 3382 int i;
1c79356b 3383
6d2010ae
A
3384 IM6O_LOCK(im6o);
3385 if (im6o->im6o_refcnt == 0) {
3386 panic("%s: im6o %p negative refcnt", __func__, im6o);
3387 /* NOTREACHED */
3388 } else if (im6o->im6o_trace != NULL) {
3389 (*im6o->im6o_trace)(im6o, FALSE);
3390 }
1c79356b 3391
6d2010ae
A
3392 --im6o->im6o_refcnt;
3393 if (im6o->im6o_refcnt > 0) {
3394 IM6O_UNLOCK(im6o);
3395 return;
3396 }
1c79356b 3397
6d2010ae
A
3398 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
3399 struct in6_mfilter *imf;
1c79356b 3400
6d2010ae
A
3401 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
3402 if (imf != NULL)
3403 im6f_leave(imf);
1c79356b 3404
6d2010ae 3405 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
1c79356b 3406
6d2010ae
A
3407 if (imf != NULL)
3408 im6f_purge(imf);
3409
3410 IN6M_REMREF(im6o->im6o_membership[i]);
3411 im6o->im6o_membership[i] = NULL;
3412 }
3413 im6o->im6o_num_memberships = 0;
3414 if (im6o->im6o_mfilters != NULL) {
3415 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
3416 im6o->im6o_mfilters = NULL;
3417 }
3418 if (im6o->im6o_membership != NULL) {
3419 FREE(im6o->im6o_membership, M_IP6MOPTS);
3420 im6o->im6o_membership = NULL;
3421 }
3422 IM6O_UNLOCK(im6o);
3423
3424 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
3425
3426 if (!(im6o->im6o_debug & IFD_ALLOC)) {
3427 panic("%s: im6o %p cannot be freed", __func__, im6o);
3428 /* NOTREACHED */
1c79356b 3429 }
6d2010ae 3430 zfree(im6o_zone, im6o);
1c79356b
A
3431}
3432
6d2010ae
A
3433static void
3434im6o_trace(struct ip6_moptions *im6o, int refhold)
1c79356b 3435{
6d2010ae
A
3436 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3437 ctrace_t *tr;
3438 u_int32_t idx;
3439 u_int16_t *cnt;
1c79356b 3440
6d2010ae
A
3441 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3442 panic("%s: im6o %p has no debug structure", __func__, im6o);
3443 /* NOTREACHED */
3444 }
3445 if (refhold) {
3446 cnt = &im6o_dbg->im6o_refhold_cnt;
3447 tr = im6o_dbg->im6o_refhold;
3448 } else {
3449 cnt = &im6o_dbg->im6o_refrele_cnt;
3450 tr = im6o_dbg->im6o_refrele;
3451 }
3452
3453 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3454 ctrace_record(&tr[idx]);
3455}
3456
3457struct ip6_moptions *
3458ip6_allocmoptions(int how)
3459{
3460 struct ip6_moptions *im6o;
3461
3462 im6o = (how == M_WAITOK) ?
3463 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3464 if (im6o != NULL) {
3465 bzero(im6o, im6o_size);
3466 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3467 im6o->im6o_debug |= IFD_ALLOC;
3468 if (im6o_debug != 0) {
3469 im6o->im6o_debug |= IFD_DEBUG;
3470 im6o->im6o_trace = im6o_trace;
3471 }
3472 IM6O_ADDREF(im6o);
3473 }
3474
3475 return (im6o);
1c79356b
A
3476}
3477
3478/*
3479 * Set IPv6 outgoing packet options based on advanced API.
3480 */
3481int
6d2010ae
A
3482ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3483 struct ip6_pktopts *stickyopt, int uproto)
1c79356b 3484{
39236c6e 3485 struct cmsghdr *cm = NULL;
1c79356b 3486
6d2010ae
A
3487 if (control == NULL || opt == NULL)
3488 return (EINVAL);
1c79356b 3489
b0d623f7 3490 ip6_initpktopts(opt);
6d2010ae
A
3491 if (stickyopt) {
3492 int error;
3493
3494 /*
3495 * If stickyopt is provided, make a local copy of the options
3496 * for this particular packet, then override them by ancillary
3497 * objects.
3498 * XXX: copypktopts() does not copy the cached route to a next
3499 * hop (if any). This is not very good in terms of efficiency,
3500 * but we can allow this since this option should be rarely
3501 * used.
3502 */
3503 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3504 return (error);
3505 }
1c79356b
A
3506
3507 /*
3508 * XXX: Currently, we assume all the optional information is stored
3509 * in a single mbuf.
3510 */
3511 if (control->m_next)
6d2010ae 3512 return (EINVAL);
1c79356b 3513
6d2010ae
A
3514 if (control->m_len < CMSG_LEN(0))
3515 return (EINVAL);
3516
39236c6e
A
3517 for (cm = M_FIRST_CMSGHDR(control); cm != NULL;
3518 cm = M_NXT_CMSGHDR(control, cm)) {
6d2010ae
A
3519 int error;
3520
39236c6e
A
3521 if (cm->cmsg_len < sizeof (struct cmsghdr) ||
3522 cm->cmsg_len > control->m_len)
6d2010ae 3523 return (EINVAL);
1c79356b
A
3524 if (cm->cmsg_level != IPPROTO_IPV6)
3525 continue;
3526
6d2010ae
A
3527 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3528 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3529 if (error)
3530 return (error);
3531 }
3532
3533 return (0);
3534}
3535/*
3536 * Set a particular packet option, as a sticky option or an ancillary data
3537 * item. "len" can be 0 only when it's a sticky option.
3538 * We have 4 cases of combination of "sticky" and "cmsg":
3539 * "sticky=0, cmsg=0": impossible
3540 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3541 * "sticky=1, cmsg=0": RFC3542 socket option
3542 * "sticky=1, cmsg=1": RFC2292 socket option
3543 */
3544static int
3545ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3546 int sticky, int cmsg, int uproto)
3547{
3548 int minmtupolicy, preftemp;
3549 int error;
3550
3551 if (!sticky && !cmsg) {
3552#ifdef DIAGNOSTIC
3553 printf("ip6_setpktopt: impossible case\n");
3554#endif
3555 return (EINVAL);
3556 }
3557
316670eb
A
3558 /*
3559 * Caller must have ensured that the buffer is at least
3560 * aligned on 32-bit boundary.
3561 */
3562 VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t)));
3563
6d2010ae
A
3564 /*
3565 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3566 * not be specified in the context of RFC3542. Conversely,
3567 * RFC3542 types should not be specified in the context of RFC2292.
3568 */
3569 if (!cmsg) {
3570 switch (optname) {
3571 case IPV6_2292PKTINFO:
3572 case IPV6_2292HOPLIMIT:
3573 case IPV6_2292NEXTHOP:
3574 case IPV6_2292HOPOPTS:
3575 case IPV6_2292DSTOPTS:
3576 case IPV6_2292RTHDR:
3577 case IPV6_2292PKTOPTIONS:
3578 return (ENOPROTOOPT);
3579 }
3580 }
3581 if (sticky && cmsg) {
3582 switch (optname) {
3583 case IPV6_PKTINFO:
3584 case IPV6_HOPLIMIT:
3585 case IPV6_NEXTHOP:
3586 case IPV6_HOPOPTS:
3587 case IPV6_DSTOPTS:
3588 case IPV6_RTHDRDSTOPTS:
3589 case IPV6_RTHDR:
3590 case IPV6_USE_MIN_MTU:
3591 case IPV6_DONTFRAG:
3592 case IPV6_TCLASS:
3593 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3594 return (ENOPROTOOPT);
3595 }
3596 }
3597
3598 switch (optname) {
3599 case IPV6_2292PKTINFO:
39236c6e 3600 case IPV6_PKTINFO: {
6d2010ae
A
3601 struct ifnet *ifp = NULL;
3602 struct in6_pktinfo *pktinfo;
3603
39236c6e 3604 if (len != sizeof (struct in6_pktinfo))
6d2010ae
A
3605 return (EINVAL);
3606
316670eb 3607 pktinfo = (struct in6_pktinfo *)(void *)buf;
6d2010ae 3608
9bccf70c 3609 /*
6d2010ae
A
3610 * An application can clear any sticky IPV6_PKTINFO option by
3611 * doing a "regular" setsockopt with ipi6_addr being
3612 * in6addr_any and ipi6_ifindex being zero.
3613 * [RFC 3542, Section 6]
9bccf70c 3614 */
6d2010ae
A
3615 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3616 pktinfo->ipi6_ifindex == 0 &&
3617 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3618 ip6_clearpktopts(opt, optname);
3619 break;
3620 }
1c79356b 3621
6d2010ae
A
3622 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3623 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3624 return (EINVAL);
3625 }
3626
3627 /* validate the interface index if specified. */
3628 ifnet_head_lock_shared();
3629
3630 if (pktinfo->ipi6_ifindex > if_index) {
3631 ifnet_head_done();
3632 return (ENXIO);
3633 }
39236c6e 3634
6d2010ae
A
3635 if (pktinfo->ipi6_ifindex) {
3636 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3637 if (ifp == NULL) {
3638 ifnet_head_done();
3639 return (ENXIO);
1c79356b 3640 }
6d2010ae 3641 }
39236c6e 3642
6d2010ae 3643 ifnet_head_done();
1c79356b 3644
6d2010ae
A
3645 /*
3646 * We store the address anyway, and let in6_selectsrc()
3647 * validate the specified address. This is because ipi6_addr
3648 * may not have enough information about its scope zone, and
3649 * we may need additional information (such as outgoing
3650 * interface or the scope zone of a destination address) to
3651 * disambiguate the scope.
3652 * XXX: the delay of the validation may confuse the
3653 * application when it is used as a sticky option.
3654 */
3655 if (opt->ip6po_pktinfo == NULL) {
39236c6e 3656 opt->ip6po_pktinfo = _MALLOC(sizeof (*pktinfo),
6d2010ae
A
3657 M_IP6OPT, M_NOWAIT);
3658 if (opt->ip6po_pktinfo == NULL)
3659 return (ENOBUFS);
3660 }
39236c6e 3661 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof (*pktinfo));
6d2010ae
A
3662 break;
3663 }
1c79356b 3664
6d2010ae 3665 case IPV6_2292HOPLIMIT:
39236c6e 3666 case IPV6_HOPLIMIT: {
6d2010ae 3667 int *hlimp;
1c79356b 3668
6d2010ae
A
3669 /*
3670 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3671 * to simplify the ordering among hoplimit options.
3672 */
3673 if (optname == IPV6_HOPLIMIT && sticky)
3674 return (ENOPROTOOPT);
b0d623f7 3675
39236c6e 3676 if (len != sizeof (int))
6d2010ae 3677 return (EINVAL);
316670eb 3678 hlimp = (int *)(void *)buf;
6d2010ae
A
3679 if (*hlimp < -1 || *hlimp > 255)
3680 return (EINVAL);
b0d623f7 3681
6d2010ae
A
3682 opt->ip6po_hlim = *hlimp;
3683 break;
3684 }
3685
39236c6e 3686 case IPV6_TCLASS: {
6d2010ae
A
3687 int tclass;
3688
39236c6e 3689 if (len != sizeof (int))
6d2010ae 3690 return (EINVAL);
316670eb 3691 tclass = *(int *)(void *)buf;
6d2010ae
A
3692 if (tclass < -1 || tclass > 255)
3693 return (EINVAL);
3694
3695 opt->ip6po_tclass = tclass;
3696 break;
3697 }
3698
3699 case IPV6_2292NEXTHOP:
3700 case IPV6_NEXTHOP:
3701 error = suser(kauth_cred_get(), 0);
3702 if (error)
3703 return (EACCES);
3704
3705 if (len == 0) { /* just remove the option */
3706 ip6_clearpktopts(opt, IPV6_NEXTHOP);
1c79356b 3707 break;
6d2010ae 3708 }
1c79356b 3709
6d2010ae 3710 /* check if cmsg_len is large enough for sa_len */
39236c6e 3711 if (len < sizeof (struct sockaddr) || len < *buf)
6d2010ae
A
3712 return (EINVAL);
3713
39236c6e
A
3714 switch (SA(buf)->sa_family) {
3715 case AF_INET6: {
3716 struct sockaddr_in6 *sa6 = SIN6(buf);
6d2010ae 3717
39236c6e 3718 if (sa6->sin6_len != sizeof (struct sockaddr_in6))
6d2010ae
A
3719 return (EINVAL);
3720
3721 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3722 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3723 return (EINVAL);
3724 }
3725 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3726 != 0) {
3727 return (error);
3728 }
1c79356b
A
3729 break;
3730 }
6d2010ae
A
3731 case AF_LINK: /* should eventually be supported */
3732 default:
3733 return (EAFNOSUPPORT);
3734 }
1c79356b 3735
6d2010ae
A
3736 /* turn off the previous option, then set the new option. */
3737 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3738 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3739 if (opt->ip6po_nexthop == NULL)
3740 return (ENOBUFS);
3741 bcopy(buf, opt->ip6po_nexthop, *buf);
3742 break;
3743
3744 case IPV6_2292HOPOPTS:
39236c6e 3745 case IPV6_HOPOPTS: {
6d2010ae
A
3746 struct ip6_hbh *hbh;
3747 int hbhlen;
3748
3749 /*
3750 * XXX: We don't allow a non-privileged user to set ANY HbH
3751 * options, since per-option restriction has too much
3752 * overhead.
3753 */
3754 error = suser(kauth_cred_get(), 0);
3755 if (error)
3756 return (EACCES);
3757
3758 if (len == 0) {
3759 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3760 break; /* just remove the option */
3761 }
3762
3763 /* message length validation */
39236c6e 3764 if (len < sizeof (struct ip6_hbh))
6d2010ae 3765 return (EINVAL);
316670eb 3766 hbh = (struct ip6_hbh *)(void *)buf;
6d2010ae
A
3767 hbhlen = (hbh->ip6h_len + 1) << 3;
3768 if (len != hbhlen)
3769 return (EINVAL);
3770
3771 /* turn off the previous option, then set the new option. */
3772 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3773 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3774 if (opt->ip6po_hbh == NULL)
3775 return (ENOBUFS);
3776 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3777
3778 break;
3779 }
3780
3781 case IPV6_2292DSTOPTS:
3782 case IPV6_DSTOPTS:
39236c6e 3783 case IPV6_RTHDRDSTOPTS: {
6d2010ae
A
3784 struct ip6_dest *dest, **newdest = NULL;
3785 int destlen;
3786
3787 error = suser(kauth_cred_get(), 0);
3788 if (error)
3789 return (EACCES);
3790
3791 if (len == 0) {
3792 ip6_clearpktopts(opt, optname);
3793 break; /* just remove the option */
3794 }
3795
3796 /* message length validation */
39236c6e 3797 if (len < sizeof (struct ip6_dest))
6d2010ae 3798 return (EINVAL);
316670eb 3799 dest = (struct ip6_dest *)(void *)buf;
6d2010ae
A
3800 destlen = (dest->ip6d_len + 1) << 3;
3801 if (len != destlen)
3802 return (EINVAL);
3803
3804 /*
3805 * Determine the position that the destination options header
3806 * should be inserted; before or after the routing header.
3807 */
3808 switch (optname) {
3809 case IPV6_2292DSTOPTS:
3810 /*
3811 * The old advacned API is ambiguous on this point.
3812 * Our approach is to determine the position based
3813 * according to the existence of a routing header.
3814 * Note, however, that this depends on the order of the
3815 * extension headers in the ancillary data; the 1st
3816 * part of the destination options header must appear
3817 * before the routing header in the ancillary data,
3818 * too.
3819 * RFC3542 solved the ambiguity by introducing
3820 * separate ancillary data or option types.
1c79356b 3821 */
9bccf70c
A
3822 if (opt->ip6po_rthdr == NULL)
3823 newdest = &opt->ip6po_dest1;
3824 else
3825 newdest = &opt->ip6po_dest2;
6d2010ae
A
3826 break;
3827 case IPV6_RTHDRDSTOPTS:
3828 newdest = &opt->ip6po_dest1;
3829 break;
3830 case IPV6_DSTOPTS:
3831 newdest = &opt->ip6po_dest2;
1c79356b
A
3832 break;
3833 }
3834
6d2010ae
A
3835 /* turn off the previous option, then set the new option. */
3836 ip6_clearpktopts(opt, optname);
3837 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3838 if (*newdest == NULL)
3839 return (ENOBUFS);
3840 bcopy(dest, *newdest, destlen);
6d2010ae
A
3841 break;
3842 }
1c79356b 3843
6d2010ae 3844 case IPV6_2292RTHDR:
39236c6e 3845 case IPV6_RTHDR: {
6d2010ae
A
3846 struct ip6_rthdr *rth;
3847 int rthlen;
3848
3849 if (len == 0) {
3850 ip6_clearpktopts(opt, IPV6_RTHDR);
3851 break; /* just remove the option */
1c79356b
A
3852 }
3853
6d2010ae 3854 /* message length validation */
39236c6e 3855 if (len < sizeof (struct ip6_rthdr))
6d2010ae 3856 return (EINVAL);
316670eb 3857 rth = (struct ip6_rthdr *)(void *)buf;
6d2010ae
A
3858 rthlen = (rth->ip6r_len + 1) << 3;
3859 if (len != rthlen)
3860 return (EINVAL);
3861
3862 switch (rth->ip6r_type) {
3863 case IPV6_RTHDR_TYPE_0:
3864 if (rth->ip6r_len == 0) /* must contain one addr */
3865 return (EINVAL);
3866 if (rth->ip6r_len % 2) /* length must be even */
3867 return (EINVAL);
3868 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3869 return (EINVAL);
3870 break;
1c79356b 3871 default:
6d2010ae 3872 return (EINVAL); /* not supported */
1c79356b 3873 }
6d2010ae
A
3874
3875 /* turn off the previous option */
3876 ip6_clearpktopts(opt, IPV6_RTHDR);
3877 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3878 if (opt->ip6po_rthdr == NULL)
3879 return (ENOBUFS);
3880 bcopy(rth, opt->ip6po_rthdr, rthlen);
6d2010ae 3881 break;
1c79356b
A
3882 }
3883
6d2010ae 3884 case IPV6_USE_MIN_MTU:
39236c6e 3885 if (len != sizeof (int))
6d2010ae 3886 return (EINVAL);
316670eb 3887 minmtupolicy = *(int *)(void *)buf;
6d2010ae
A
3888 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3889 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3890 minmtupolicy != IP6PO_MINMTU_ALL) {
3891 return (EINVAL);
3892 }
3893 opt->ip6po_minmtu = minmtupolicy;
3894 break;
3895
3896 case IPV6_DONTFRAG:
39236c6e 3897 if (len != sizeof (int))
6d2010ae
A
3898 return (EINVAL);
3899
316670eb 3900 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
6d2010ae
A
3901 /*
3902 * we ignore this option for TCP sockets.
3903 * (RFC3542 leaves this case unspecified.)
3904 */
3905 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
39236c6e 3906 } else {
6d2010ae 3907 opt->ip6po_flags |= IP6PO_DONTFRAG;
39236c6e 3908 }
6d2010ae
A
3909 break;
3910
3911 case IPV6_PREFER_TEMPADDR:
39236c6e 3912 if (len != sizeof (int))
6d2010ae 3913 return (EINVAL);
316670eb 3914 preftemp = *(int *)(void *)buf;
6d2010ae
A
3915 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3916 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3917 preftemp != IP6PO_TEMPADDR_PREFER) {
3918 return (EINVAL);
3919 }
3920 opt->ip6po_prefer_tempaddr = preftemp;
3921 break;
3922
3923 default:
3924 return (ENOPROTOOPT);
3925 } /* end of switch */
3926
3927 return (0);
1c79356b
A
3928}
3929
3930/*
3931 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3932 * packet to the input queue of a specified interface. Note that this
3933 * calls the output routine of the loopback "driver", but with an interface
3934 * pointer that might NOT be &loif -- easier than replicating that code here.
3935 */
3936void
39236c6e
A
3937ip6_mloopback(struct ifnet *srcifp, struct ifnet *origifp, struct mbuf *m,
3938 struct sockaddr_in6 *dst, uint32_t optlen, int32_t nxt0)
1c79356b 3939{
9bccf70c
A
3940 struct mbuf *copym;
3941 struct ip6_hdr *ip6;
39236c6e 3942 struct in6_addr src;
1c79356b 3943
39236c6e 3944 if (lo_ifp == NULL)
9bccf70c
A
3945 return;
3946
3947 /*
39236c6e 3948 * Copy the packet header as it's needed for the checksum.
9bccf70c
A
3949 * Make sure to deep-copy IPv6 header portion in case the data
3950 * is in an mbuf cluster, so that we can safely override the IPv6
3951 * header portion later.
3952 */
39236c6e
A
3953 copym = m_copym_mode(m, 0, M_COPYALL, M_DONTWAIT, M_COPYM_COPY_HDR);
3954 if (copym != NULL && ((copym->m_flags & M_EXT) ||
3955 copym->m_len < sizeof (struct ip6_hdr)))
3956 copym = m_pullup(copym, sizeof (struct ip6_hdr));
9bccf70c 3957
39236c6e 3958 if (copym == NULL)
9bccf70c 3959 return;
9bccf70c
A
3960
3961 ip6 = mtod(copym, struct ip6_hdr *);
39236c6e 3962 src = ip6->ip6_src;
9bccf70c
A
3963 /*
3964 * clear embedded scope identifiers if necessary.
3965 * in6_clearscope will touch the addresses only when necessary.
3966 */
3967 in6_clearscope(&ip6->ip6_src);
3968 in6_clearscope(&ip6->ip6_dst);
9bccf70c 3969
39236c6e
A
3970 if (copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA)
3971 in6_delayed_cksum_offset(copym, 0, optlen, nxt0);
9bccf70c 3972
39236c6e
A
3973 /*
3974 * Stuff the 'real' ifp into the pkthdr, to be used in matching
3975 * in ip6_input(); we need the loopback ifp/dl_tag passed as args
3976 * to make the loopback driver compliant with the data link
3977 * requirements.
3978 */
3979 copym->m_pkthdr.rcvif = origifp;
9bccf70c 3980
39236c6e
A
3981 /*
3982 * Also record the source interface (which owns the source address).
3983 * This is basically a stripped down version of ifa_foraddr6().
3984 */
3985 if (srcifp == NULL) {
3986 struct in6_ifaddr *ia;
3987
3988 lck_rw_lock_shared(&in6_ifaddr_rwlock);
3989 for (ia = in6_ifaddrs; ia != NULL; ia = ia->ia_next) {
3990 IFA_LOCK_SPIN(&ia->ia_ifa);
3991 /* compare against src addr with embedded scope */
3992 if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &src)) {
3993 srcifp = ia->ia_ifp;
3994 IFA_UNLOCK(&ia->ia_ifa);
3995 break;
3996 }
3997 IFA_UNLOCK(&ia->ia_ifa);
3998 }
3999 lck_rw_done(&in6_ifaddr_rwlock);
6d2010ae 4000 }
39236c6e
A
4001 if (srcifp != NULL)
4002 ip6_setsrcifaddr_info(copym, srcifp->if_index, NULL);
4003 ip6_setdstifaddr_info(copym, origifp->if_index, NULL);
9bccf70c 4004
39236c6e 4005 dlil_output(lo_ifp, PF_INET6, copym, NULL, SA(dst), 0, NULL);
1c79356b
A
4006}
4007
4008/*
4009 * Chop IPv6 header off from the payload.
4010 */
4011static int
39236c6e 4012ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
1c79356b
A
4013{
4014 struct mbuf *mh;
4015 struct ip6_hdr *ip6;
4016
4017 ip6 = mtod(m, struct ip6_hdr *);
39236c6e 4018 if (m->m_len > sizeof (*ip6)) {
2d21ac55 4019 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
39236c6e 4020 if (mh == NULL) {
1c79356b 4021 m_freem(m);
39236c6e 4022 return (ENOBUFS);
1c79356b
A
4023 }
4024 M_COPY_PKTHDR(mh, m);
39236c6e 4025 MH_ALIGN(mh, sizeof (*ip6));
1c79356b 4026 m->m_flags &= ~M_PKTHDR;
39236c6e
A
4027 m->m_len -= sizeof (*ip6);
4028 m->m_data += sizeof (*ip6);
1c79356b
A
4029 mh->m_next = m;
4030 m = mh;
39236c6e
A
4031 m->m_len = sizeof (*ip6);
4032 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof (*ip6));
1c79356b
A
4033 }
4034 exthdrs->ip6e_ip6 = m;
39236c6e
A
4035 return (0);
4036}
4037
4038static void
4039ip6_output_checksum(struct ifnet *ifp, uint32_t mtu, struct mbuf *m,
4040 int nxt0, uint32_t tlen, uint32_t optlen)
4041{
4042 uint32_t sw_csum, hwcap = ifp->if_hwassist;
4043 int tso = TSO_IPV6_OK(ifp, m);
4044
4045 if (!hwcksum_tx) {
4046 /* do all in software; checksum offload is disabled */
4047 sw_csum = CSUM_DELAY_IPV6_DATA & m->m_pkthdr.csum_flags;
4048 } else {
4049 /* do in software what the hardware cannot */
4050 sw_csum = m->m_pkthdr.csum_flags &
4051 ~IF_HWASSIST_CSUM_FLAGS(hwcap);
4052 }
4053
4054 if (optlen != 0) {
4055 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4056 m->m_pkthdr.csum_flags);
4057 } else if (!(sw_csum & CSUM_DELAY_IPV6_DATA) &&
4058 (hwcap & CSUM_PARTIAL)) {
4059 /*
4060 * Partial checksum offload, ere), if no extension
4061 * headers, and TCP only (no UDP support, as the
4062 * hardware may not be able to convert +0 to
4063 * -0 (0xffff) per RFC1122 4.1.3.4.)
4064 */
4065 if (hwcksum_tx && !tso &&
4066 (m->m_pkthdr.csum_flags & CSUM_TCPIPV6) &&
4067 tlen <= mtu) {
4068 uint16_t start = sizeof (struct ip6_hdr);
4069 uint16_t ulpoff =
4070 m->m_pkthdr.csum_data & 0xffff;
4071 m->m_pkthdr.csum_flags |=
4072 (CSUM_DATA_VALID | CSUM_PARTIAL);
4073 m->m_pkthdr.csum_tx_stuff = (ulpoff + start);
4074 m->m_pkthdr.csum_tx_start = start;
4075 sw_csum = 0;
4076 } else {
4077 sw_csum |= (CSUM_DELAY_IPV6_DATA &
4078 m->m_pkthdr.csum_flags);
4079 }
4080 }
4081
4082 if (sw_csum & CSUM_DELAY_IPV6_DATA) {
4083 in6_delayed_cksum_offset(m, 0, optlen, nxt0);
4084 sw_csum &= ~CSUM_DELAY_IPV6_DATA;
4085 }
4086
4087 if (hwcksum_tx) {
4088 /*
4089 * Drop off bits that aren't supported by hardware;
4090 * also make sure to preserve non-checksum related bits.
4091 */
4092 m->m_pkthdr.csum_flags =
4093 ((m->m_pkthdr.csum_flags &
4094 (IF_HWASSIST_CSUM_FLAGS(hwcap) | CSUM_DATA_VALID)) |
4095 (m->m_pkthdr.csum_flags & ~IF_HWASSIST_CSUM_MASK));
4096 } else {
4097 /* drop all bits; checksum offload is disabled */
4098 m->m_pkthdr.csum_flags = 0;
4099 }
1c79356b
A
4100}
4101
4102/*
4103 * Compute IPv6 extension header length.
4104 */
1c79356b 4105int
39236c6e 4106ip6_optlen(struct in6pcb *in6p)
1c79356b
A
4107{
4108 int len;
4109
4110 if (!in6p->in6p_outputopts)
39236c6e 4111 return (0);
1c79356b
A
4112
4113 len = 0;
39236c6e
A
4114#define elen(x) \
4115 (((struct ip6_ext *)(x)) ? \
4116 (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
1c79356b
A
4117
4118 len += elen(in6p->in6p_outputopts->ip6po_hbh);
39236c6e 4119 if (in6p->in6p_outputopts->ip6po_rthdr) {
1c79356b
A
4120 /* dest1 is valid with rthdr only */
4121 len += elen(in6p->in6p_outputopts->ip6po_dest1);
39236c6e 4122 }
1c79356b
A
4123 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
4124 len += elen(in6p->in6p_outputopts->ip6po_dest2);
39236c6e 4125 return (len);
1c79356b
A
4126#undef elen
4127}
3e170ce0
A
4128
4129static int
4130sysctl_reset_ip6_output_stats SYSCTL_HANDLER_ARGS
4131{
4132#pragma unused(arg1, arg2)
4133 int error, i;
4134
4135 i = ip6_output_measure;
4136 error = sysctl_handle_int(oidp, &i, 0, req);
4137 if (error || req->newptr == USER_ADDR_NULL)
4138 goto done;
4139 /* impose bounds */
4140 if (i < 0 || i > 1) {
4141 error = EINVAL;
4142 goto done;
4143 }
4144 if (ip6_output_measure != i && i == 1) {
4145 net_perf_initialize(&net_perf, ip6_output_measure_bins);
4146 }
4147 ip6_output_measure = i;
4148done:
4149 return (error);
4150}
4151
4152static int
4153sysctl_ip6_output_measure_bins SYSCTL_HANDLER_ARGS
4154{
4155#pragma unused(arg1, arg2)
4156 int error;
4157 uint64_t i;
4158
4159 i = ip6_output_measure_bins;
4160 error = sysctl_handle_quad(oidp, &i, 0, req);
4161 if (error || req->newptr == USER_ADDR_NULL)
4162 goto done;
4163 /* validate data */
4164 if (!net_perf_validate_bins(i)) {
4165 error = EINVAL;
4166 goto done;
4167 }
4168 ip6_output_measure_bins = i;
4169done:
4170 return (error);
4171}
4172
4173static int
4174sysctl_ip6_output_getperf SYSCTL_HANDLER_ARGS
4175{
4176#pragma unused(oidp, arg1, arg2)
4177 if (req->oldptr == USER_ADDR_NULL)
4178 req->oldlen = (size_t)sizeof (struct ipstat);
4179
4180 return (SYSCTL_OUT(req, &net_perf, MIN(sizeof (net_perf), req->oldlen)));
4181}
4182