]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet6/ip6_output.c
xnu-1699.24.23.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
CommitLineData
b0d623f7 1/*
6d2010ae 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
55e303ae
A
29/* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $ */
30/* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */
1c79356b
A
31
32/*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61/*
62 * Copyright (c) 1982, 1986, 1988, 1990, 1993
63 * The Regents of the University of California. All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 * notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 * notice, this list of conditions and the following disclaimer in the
72 * documentation and/or other materials provided with the distribution.
73 * 3. All advertising materials mentioning features or use of this software
74 * must display the following acknowledgement:
75 * This product includes software developed by the University of
76 * California, Berkeley and its contributors.
77 * 4. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
94 */
2d21ac55
A
95/*
96 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
97 * support for mandatory and extensible security protections. This notice
98 * is included in support of clause 2.2 (b) of the Apple Public License,
99 * Version 2.0.
100 */
1c79356b
A
101
102#include <sys/param.h>
103#include <sys/malloc.h>
104#include <sys/mbuf.h>
105#include <sys/errno.h>
106#include <sys/protosw.h>
107#include <sys/socket.h>
108#include <sys/socketvar.h>
109#include <sys/systm.h>
1c79356b 110#include <sys/kernel.h>
1c79356b 111#include <sys/proc.h>
91447636 112#include <sys/kauth.h>
6d2010ae
A
113#include <sys/mcache.h>
114#include <sys/sysctl.h>
115#include <kern/zalloc.h>
116
117#include <pexpert/pexpert.h>
1c79356b
A
118
119#include <net/if.h>
120#include <net/route.h>
2d21ac55 121#include <net/dlil.h>
1c79356b
A
122
123#include <netinet/in.h>
124#include <netinet/in_var.h>
55e303ae 125#include <netinet/ip_var.h>
9bccf70c 126#include <netinet6/in6_var.h>
1c79356b 127#include <netinet/ip6.h>
6d2010ae 128#include <netinet6/ip6protosw.h>
1c79356b
A
129#include <netinet/icmp6.h>
130#include <netinet6/ip6_var.h>
1c79356b 131#include <netinet/in_pcb.h>
1c79356b 132#include <netinet6/nd6.h>
6d2010ae
A
133#include <netinet6/scope6_var.h>
134#include <mach/sdt.h>
1c79356b
A
135
136#if IPSEC
137#include <netinet6/ipsec.h>
9bccf70c
A
138#if INET6
139#include <netinet6/ipsec6.h>
140#endif
1c79356b 141#include <netkey/key.h>
9bccf70c 142extern int ipsec_bypass;
1c79356b 143#endif /* IPSEC */
2d21ac55
A
144
145#if CONFIG_MACF_NET
146#include <security/mac.h>
147#endif /* MAC_NET */
1c79356b 148
9bccf70c 149#include <netinet6/ip6_fw.h>
1c79356b
A
150
151#include <net/net_osdep.h>
152
91447636
A
153#include <netinet/kpi_ipfilter_var.h>
154
b0d623f7
A
155#if PF
156#include <net/pfvar.h>
157#endif /* PF */
158
9bccf70c 159#ifndef __APPLE__
1c79356b
A
160static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
161#endif
162
163struct ip6_exthdrs {
164 struct mbuf *ip6e_ip6;
165 struct mbuf *ip6e_hbh;
166 struct mbuf *ip6e_dest1;
167 struct mbuf *ip6e_rthdr;
168 struct mbuf *ip6e_dest2;
169};
170
6d2010ae 171int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
91447636
A
172static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
173 struct socket *, struct sockopt *sopt);
6d2010ae 174static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto);
b0d623f7 175static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt);
6d2010ae
A
176static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto);
177static void im6o_trace(struct ip6_moptions *, int);
91447636
A
178static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
179static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
180 struct ip6_frag **);
181static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
182static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
6d2010ae
A
183static int ip6_getpmtu (struct route_in6 *, struct route_in6 *,
184 struct ifnet *, struct in6_addr *, u_int32_t *, int *);
185
186#define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
187
188/* For gdb */
189__private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
190
191struct ip6_moptions_dbg {
192 struct ip6_moptions im6o; /* ip6_moptions */
193 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
194 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
195 /*
196 * Alloc and free callers.
197 */
198 ctrace_t im6o_alloc;
199 ctrace_t im6o_free;
200 /*
201 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
202 */
203 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
204 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
205};
206
207#if DEBUG
208static unsigned int im6o_debug = 1; /* debugging (enabled) */
209#else
210static unsigned int im6o_debug; /* debugging (disabled) */
211#endif /* !DEBUG */
212
213static unsigned int im6o_size; /* size of zone element */
214static struct zone *im6o_zone; /* zone for ip6_moptions */
215
216#define IM6O_ZONE_MAX 64 /* maximum elements in zone */
217#define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
1c79356b 218
55e303ae 219
1c79356b
A
220/*
221 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
222 * header (with pri, len, nxt, hlim, src, dst).
223 * This function may modify ver and hlim only.
224 * The mbuf chain containing the packet will be freed.
225 * The mbuf opt, if present, will not be freed.
9bccf70c 226 *
b0d623f7
A
227 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
228 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
9bccf70c 229 * which is rt_rmx.rmx_mtu.
1c79356b
A
230 */
231int
91447636
A
232ip6_output(
233 struct mbuf *m0,
234 struct ip6_pktopts *opt,
235 struct route_in6 *ro,
236 int flags,
237 struct ip6_moptions *im6o,
238 struct ifnet **ifpp, /* XXX: just for statistics */
6d2010ae 239 struct ip6_out_args *ip6oa)
1c79356b
A
240{
241 struct ip6_hdr *ip6, *mhip6;
6d2010ae 242 struct ifnet *ifp = NULL, *origifp = NULL;
1c79356b
A
243 struct mbuf *m = m0;
244 int hlen, tlen, len, off;
245 struct route_in6 ip6route;
6d2010ae
A
246 struct rtentry *rt = NULL;
247 struct sockaddr_in6 *dst, src_sa, dst_sa;
1c79356b 248 int error = 0;
9bccf70c 249 struct in6_ifaddr *ia = NULL;
b0d623f7 250 u_int32_t mtu;
6d2010ae 251 int alwaysfrag = 0, dontfrag = 0;
1c79356b
A
252 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
253 struct ip6_exthdrs exthdrs;
6d2010ae
A
254 struct in6_addr finaldst, src0, dst0;
255 u_int32_t zone;
1c79356b
A
256 struct route_in6 *ro_pmtu = NULL;
257 int hdrsplit = 0;
258 int needipsec = 0;
91447636 259 ipfilter_t inject_filter_ref;
6d2010ae
A
260 int tso;
261 unsigned int ifscope;
262 unsigned int nocell;
263 boolean_t select_srcif;
264 struct ipf_pktopts *ippo = NULL, ipf_pktopts;
265 u_int32_t ifmtu;
266
1c79356b
A
267#if IPSEC
268 int needipsectun = 0;
9bccf70c 269 struct socket *so = NULL;
1c79356b 270 struct secpolicy *sp = NULL;
ebb1b9f4
A
271 struct route_in6 *ipsec_saved_route = NULL;
272 struct ipsec_output_state ipsec_state;
1c79356b 273
ebb1b9f4
A
274 bzero(&ipsec_state, sizeof(ipsec_state));
275
1c79356b 276 /* for AH processing. stupid to have "socket" variable in IP layer... */
9bccf70c
A
277 if (ipsec_bypass == 0)
278 {
279 so = ipsec_getsocket(m);
280 (void)ipsec_setsocket(m, NULL);
281 }
1c79356b
A
282#endif /* IPSEC */
283
6d2010ae
A
284 bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
285 ippo = &ipf_pktopts;
286
91447636
A
287 ip6 = mtod(m, struct ip6_hdr *);
288 inject_filter_ref = ipf_get_inject_filter(m);
6d2010ae 289
d41d1dae 290 finaldst = ip6->ip6_dst;
91447636 291
6d2010ae
A
292 if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
293 select_srcif = !(flags & (IPV6_FORWARDING | IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL));
294 ifscope = ip6oa->ip6oa_boundif;
295 ipf_pktopts.ippo_flags = IPPOF_BOUND_IF;
296 ipf_pktopts.ippo_flags |= (ifscope << IPPOF_SHIFT_IFSCOPE);
297 } else {
298 select_srcif = FALSE;
299 ifscope = IFSCOPE_NONE;
300 }
301
302 if (flags & IPV6_OUTARGS) {
303 nocell = ip6oa->ip6oa_nocell;
304 if (nocell)
305 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
306 } else {
307 nocell = 0;
308 }
309
9bccf70c
A
310#define MAKE_EXTHDR(hp, mp) \
311 do { \
1c79356b
A
312 if (hp) { \
313 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
314 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
315 ((eh)->ip6e_len + 1) << 3); \
316 if (error) \
317 goto freehdrs; \
318 } \
9bccf70c 319 } while (0)
1c79356b
A
320
321 bzero(&exthdrs, sizeof(exthdrs));
322
1c79356b
A
323 if (opt) {
324 /* Hop-by-Hop options header */
325 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
9bccf70c 326 /* Destination options header(1st part) */
6d2010ae
A
327 if (opt->ip6po_rthdr) {
328 /*
329 * Destination options header(1st part)
330 * This only makes sense with a routing header.
331 * See Section 9.2 of RFC 3542.
332 * Disabling this part just for MIP6 convenience is
333 * a bad idea. We need to think carefully about a
334 * way to make the advanced API coexist with MIP6
335 * options, which might automatically be inserted in
336 * the kernel.
337 */
338 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
339 }
1c79356b
A
340 /* Routing header */
341 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
342 /* Destination options header(2nd part) */
343 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
344 }
345
346#if IPSEC
9bccf70c
A
347 if (ipsec_bypass != 0)
348 goto skip_ipsec;
91447636 349
1c79356b
A
350 /* get a security policy for this packet */
351 if (so == NULL)
352 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
353 else
354 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
355
356 if (sp == NULL) {
2d21ac55 357 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
9bccf70c 358 goto freehdrs;
1c79356b
A
359 }
360
361 error = 0;
362
363 /* check policy */
364 switch (sp->policy) {
365 case IPSEC_POLICY_DISCARD:
2d21ac55 366 case IPSEC_POLICY_GENERATE:
1c79356b
A
367 /*
368 * This packet is just discarded.
369 */
2d21ac55 370 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
9bccf70c 371 goto freehdrs;
1c79356b
A
372
373 case IPSEC_POLICY_BYPASS:
374 case IPSEC_POLICY_NONE:
375 /* no need to do IPsec. */
376 needipsec = 0;
377 break;
378
379 case IPSEC_POLICY_IPSEC:
380 if (sp->req == NULL) {
381 /* acquire a policy */
382 error = key_spdacquire(sp);
9bccf70c 383 goto freehdrs;
1c79356b
A
384 }
385 needipsec = 1;
386 break;
387
388 case IPSEC_POLICY_ENTRUST:
389 default:
390 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
391 }
9bccf70c 392 skip_ipsec:
1c79356b
A
393#endif /* IPSEC */
394
395 /*
396 * Calculate the total length of the extension header chain.
397 * Keep the length of the unfragmentable part for fragmentation.
398 */
399 optlen = 0;
6d2010ae
A
400 if (exthdrs.ip6e_hbh)
401 optlen += exthdrs.ip6e_hbh->m_len;
402 if (exthdrs.ip6e_dest1)
403 optlen += exthdrs.ip6e_dest1->m_len;
404 if (exthdrs.ip6e_rthdr)
405 optlen += exthdrs.ip6e_rthdr->m_len;
1c79356b 406 unfragpartlen = optlen + sizeof(struct ip6_hdr);
6d2010ae 407
1c79356b 408 /* NOTE: we don't add AH/ESP length here. do that later. */
6d2010ae
A
409 if (exthdrs.ip6e_dest2)
410 optlen += exthdrs.ip6e_dest2->m_len;
411
412
413 if (needipsec &&
414 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
415 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
416 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
417 }
1c79356b
A
418
419 /*
420 * If we need IPsec, or there is at least one extension header,
421 * separate IP6 header from the payload.
422 */
423 if ((needipsec || optlen) && !hdrsplit) {
424 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
425 m = NULL;
426 goto freehdrs;
427 }
428 m = exthdrs.ip6e_ip6;
429 hdrsplit++;
430 }
431
432 /* adjust pointer */
433 ip6 = mtod(m, struct ip6_hdr *);
434
435 /* adjust mbuf packet header length */
436 m->m_pkthdr.len += optlen;
437 plen = m->m_pkthdr.len - sizeof(*ip6);
438
439 /* If this is a jumbo payload, insert a jumbo payload option. */
440 if (plen > IPV6_MAXPACKET) {
441 if (!hdrsplit) {
442 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
443 m = NULL;
444 goto freehdrs;
445 }
446 m = exthdrs.ip6e_ip6;
447 hdrsplit++;
448 }
449 /* adjust pointer */
450 ip6 = mtod(m, struct ip6_hdr *);
451 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
452 goto freehdrs;
453 ip6->ip6_plen = 0;
454 } else
455 ip6->ip6_plen = htons(plen);
456
457 /*
458 * Concatenate headers and fill in next header fields.
459 * Here we have, on "m"
460 * IPv6 payload
461 * and we insert headers accordingly. Finally, we should be getting:
462 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
463 *
464 * during the header composing process, "m" points to IPv6 header.
465 * "mprev" points to an extension header prior to esp.
466 */
467 {
468 u_char *nexthdrp = &ip6->ip6_nxt;
469 struct mbuf *mprev = m;
470
471 /*
472 * we treat dest2 specially. this makes IPsec processing
55e303ae
A
473 * much easier. the goal here is to make mprev point the
474 * mbuf prior to dest2.
1c79356b
A
475 *
476 * result: IPv6 dest2 payload
477 * m and mprev will point to IPv6 header.
478 */
479 if (exthdrs.ip6e_dest2) {
480 if (!hdrsplit)
481 panic("assumption failed: hdr not split");
482 exthdrs.ip6e_dest2->m_next = m->m_next;
483 m->m_next = exthdrs.ip6e_dest2;
484 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
485 ip6->ip6_nxt = IPPROTO_DSTOPTS;
486 }
487
9bccf70c
A
488#define MAKE_CHAIN(m, mp, p, i)\
489 do {\
1c79356b
A
490 if (m) {\
491 if (!hdrsplit) \
492 panic("assumption failed: hdr not split"); \
493 *mtod((m), u_char *) = *(p);\
494 *(p) = (i);\
495 p = mtod((m), u_char *);\
496 (m)->m_next = (mp)->m_next;\
497 (mp)->m_next = (m);\
498 (mp) = (m);\
499 }\
9bccf70c 500 } while (0)
1c79356b
A
501 /*
502 * result: IPv6 hbh dest1 rthdr dest2 payload
503 * m will point to IPv6 header. mprev will point to the
504 * extension header prior to dest2 (rthdr in the above case).
505 */
506 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
507 nexthdrp, IPPROTO_HOPOPTS);
508 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
509 nexthdrp, IPPROTO_DSTOPTS);
510 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
511 nexthdrp, IPPROTO_ROUTING);
512
91447636
A
513 if (!TAILQ_EMPTY(&ipv6_filters)) {
514 struct ipfilter *filter;
515 int seen = (inject_filter_ref == 0);
516 int fixscope = 0;
6d2010ae 517
91447636 518 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
6d2010ae
A
519 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
520 IM6O_LOCK(im6o);
91447636
A
521 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
522 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
523 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
6d2010ae 524 IM6O_UNLOCK(im6o);
91447636
A
525 }
526
527 /* Hack: embed the scope_id in the destination */
528 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
529 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
530 fixscope = 1;
531 ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id);
532 }
533 {
91447636
A
534 ipf_ref();
535 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
536 /*
537 * No need to proccess packet twice if we've
538 * already seen it
539 */
540 if (seen == 0) {
541 if ((struct ipfilter *)inject_filter_ref == filter)
542 seen = 1;
543 } else if (filter->ipf_filter.ipf_output) {
544 errno_t result;
545
546 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
547 if (result == EJUSTRETURN) {
548 ipf_unref();
91447636
A
549 goto done;
550 }
551 if (result != 0) {
552 ipf_unref();
91447636
A
553 goto bad;
554 }
555 }
556 }
557 ipf_unref();
91447636 558 }
6601e61a 559 ip6 = mtod(m, struct ip6_hdr *);
91447636
A
560 /* Hack: cleanup embedded scope_id if we put it there */
561 if (fixscope)
562 ip6->ip6_dst.s6_addr16[1] = 0;
563 }
564
1c79356b
A
565#if IPSEC
566 if (!needipsec)
567 goto skip_ipsec2;
568
569 /*
570 * pointers after IPsec headers are not valid any more.
571 * other pointers need a great care too.
572 * (IPsec routines should not mangle mbufs prior to AH/ESP)
573 */
574 exthdrs.ip6e_dest2 = NULL;
575
576 {
577 struct ip6_rthdr *rh = NULL;
578 int segleft_org = 0;
1c79356b
A
579
580 if (exthdrs.ip6e_rthdr) {
581 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
582 segleft_org = rh->ip6r_segleft;
583 rh->ip6r_segleft = 0;
584 }
585
ebb1b9f4
A
586 ipsec_state.m = m;
587 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags,
1c79356b 588 &needipsectun);
ebb1b9f4 589 m = ipsec_state.m;
1c79356b
A
590 if (error) {
591 /* mbuf is already reclaimed in ipsec6_output_trans. */
592 m = NULL;
593 switch (error) {
594 case EHOSTUNREACH:
595 case ENETUNREACH:
596 case EMSGSIZE:
597 case ENOBUFS:
598 case ENOMEM:
599 break;
600 default:
601 printf("ip6_output (ipsec): error code %d\n", error);
55e303ae 602 /* fall through */
1c79356b
A
603 case ENOENT:
604 /* don't show these error codes to the user */
605 error = 0;
606 break;
607 }
608 goto bad;
609 }
610 if (exthdrs.ip6e_rthdr) {
611 /* ah6_output doesn't modify mbuf chain */
612 rh->ip6r_segleft = segleft_org;
613 }
6d2010ae 614 }
1c79356b 615 }
6d2010ae
A
616skip_ipsec2:
617#endif
1c79356b
A
618
619 /*
6d2010ae 620 * If there is a routing header, replace the destination address field
1c79356b
A
621 * with the first hop of the routing header.
622 */
623 if (exthdrs.ip6e_rthdr) {
624 struct ip6_rthdr *rh =
625 (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
626 struct ip6_rthdr *));
627 struct ip6_rthdr0 *rh0;
6d2010ae
A
628 struct in6_addr *addr;
629 struct sockaddr_in6 sa;
1c79356b 630
9bccf70c 631 switch (rh->ip6r_type) {
1c79356b
A
632 case IPV6_RTHDR_TYPE_0:
633 rh0 = (struct ip6_rthdr0 *)rh;
6d2010ae
A
634 addr = (struct in6_addr *)(rh0 + 1);
635
636 /*
637 * construct a sockaddr_in6 form of
638 * the first hop.
639 *
640 * XXX: we may not have enough
641 * information about its scope zone;
642 * there is no standard API to pass
643 * the information from the
644 * application.
645 */
646 bzero(&sa, sizeof(sa));
647 sa.sin6_family = AF_INET6;
648 sa.sin6_len = sizeof(sa);
649 sa.sin6_addr = addr[0];
650 if ((error = sa6_embedscope(&sa,
651 ip6_use_defzone)) != 0) {
652 goto bad;
653 }
654 ip6->ip6_dst = sa.sin6_addr;
655 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
656 * (rh0->ip6r0_segleft - 1));
657 addr[rh0->ip6r0_segleft - 1] = finaldst;
658 /* XXX */
659 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
1c79356b
A
660 break;
661 default: /* is it possible? */
662 error = EINVAL;
663 goto bad;
664 }
665 }
666
667 /* Source address validation */
668 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
6d2010ae 669 (flags & IPV6_UNSPECSRC) == 0) {
1c79356b
A
670 error = EOPNOTSUPP;
671 ip6stat.ip6s_badscope++;
672 goto bad;
673 }
674 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
675 error = EOPNOTSUPP;
676 ip6stat.ip6s_badscope++;
677 goto bad;
678 }
679
680 ip6stat.ip6s_localout++;
681
682 /*
683 * Route packet.
684 */
685 if (ro == 0) {
686 ro = &ip6route;
687 bzero((caddr_t)ro, sizeof(*ro));
688 }
689 ro_pmtu = ro;
690 if (opt && opt->ip6po_rthdr)
691 ro = &opt->ip6po_route;
692 dst = (struct sockaddr_in6 *)&ro->ro_dst;
6d2010ae
A
693
694 if (ro && ro->ro_rt)
695 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
696 /*
697 * if specified, try to fill in the traffic class field.
698 * do not override if a non-zero value is already set.
699 * we check the diffserv field and the ecn field separately.
700 */
701 if (opt && opt->ip6po_tclass >= 0) {
702 int mask = 0;
703
704 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
705 mask |= 0xfc;
706 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
707 mask |= 0x03;
708 if (mask != 0)
709 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
710 }
711
712 /* fill in or override the hop limit field, if necessary. */
713 if (opt && opt->ip6po_hlim != -1)
714 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
715 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
716 if (im6o != NULL) {
717 IM6O_LOCK(im6o);
718 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
719 IM6O_UNLOCK(im6o);
720 } else {
721 ip6->ip6_hlim = ip6_defmcasthlim;
722 }
723 }
724
1c79356b 725 /*
b0d623f7
A
726 * If there is a cached route, check that it is to the same
727 * destination and is still up. If not, free it and try again.
728 * Test rt_flags without holding rt_lock for performance reasons;
729 * if the route is down it will hopefully be caught by the layer
730 * below (since it uses this route as a hint) or during the
731 * next transmit.
1c79356b 732 */
b0d623f7
A
733 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
734 dst->sin6_family != AF_INET6 ||
735 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) ||
736 ro->ro_rt->generation_id != route_generation)) {
9bccf70c 737 rtfree(ro->ro_rt);
b0d623f7 738 ro->ro_rt = NULL;
1c79356b 739 }
b0d623f7 740 if (ro->ro_rt == NULL) {
1c79356b
A
741 bzero(dst, sizeof(*dst));
742 dst->sin6_family = AF_INET6;
743 dst->sin6_len = sizeof(struct sockaddr_in6);
744 dst->sin6_addr = ip6->ip6_dst;
745 }
ebb1b9f4 746
1c79356b
A
747#if IPSEC
748 if (needipsec && needipsectun) {
6d2010ae
A
749#if CONFIG_DTRACE
750 struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
751#endif /* CONFIG_DTRACE */
1c79356b
A
752 /*
753 * All the extension headers will become inaccessible
754 * (since they can be encrypted).
755 * Don't panic, we need no more updates to extension headers
756 * on inner IPv6 packet (since they are now encapsulated).
757 *
758 * IPv6 [ESP|AH] IPv6 [extension headers] payload
759 */
760 bzero(&exthdrs, sizeof(exthdrs));
761 exthdrs.ip6e_ip6 = m;
762
ebb1b9f4
A
763 ipsec_state.m = m;
764 route_copyout(&ipsec_state.ro, (struct route *)ro, sizeof(ipsec_state.ro));
765 ipsec_state.dst = (struct sockaddr *)dst;
6d2010ae
A
766
767 /* Added a trace here so that we can see packets inside a tunnel */
768 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
769 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
770 struct ip *, NULL, struct ip6_hdr *, ip6);
771
ebb1b9f4
A
772 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
773 if (ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */
2d21ac55 774 goto done;
ebb1b9f4
A
775 m = ipsec_state.m;
776 ipsec_saved_route = ro;
777 ro = (struct route_in6 *)&ipsec_state.ro;
778 dst = (struct sockaddr_in6 *)ipsec_state.dst;
1c79356b
A
779 if (error) {
780 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
781 m0 = m = NULL;
782 m = NULL;
783 switch (error) {
784 case EHOSTUNREACH:
785 case ENETUNREACH:
786 case EMSGSIZE:
787 case ENOBUFS:
788 case ENOMEM:
789 break;
790 default:
791 printf("ip6_output (ipsec): error code %d\n", error);
55e303ae 792 /* fall through */
1c79356b
A
793 case ENOENT:
794 /* don't show these error codes to the user */
795 error = 0;
796 break;
797 }
798 goto bad;
799 }
6d2010ae
A
800 /*
801 * The packet has been encapsulated so the ifscope is no longer valid
802 * since it does not apply to the outer address: ignore the ifscope.
803 */
804 ifscope = IFSCOPE_NONE;
805 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
806 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
807 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
808 }
1c79356b
A
809 exthdrs.ip6e_ip6 = m;
810 }
55e303ae 811#endif /* IPSEC */
1c79356b 812
6d2010ae
A
813 /* for safety */
814 if (ifp != NULL) {
815 ifnet_release(ifp);
816 ifp = NULL;
817 }
1c79356b 818
6d2010ae
A
819 /* adjust pointer */
820 ip6 = mtod(m, struct ip6_hdr *);
1c79356b 821
6d2010ae
A
822 if (select_srcif) {
823 bzero(&src_sa, sizeof(src_sa));
824 src_sa.sin6_family = AF_INET6;
825 src_sa.sin6_len = sizeof(src_sa);
826 src_sa.sin6_addr = ip6->ip6_src;
827 }
828 bzero(&dst_sa, sizeof(dst_sa));
829 dst_sa.sin6_family = AF_INET6;
830 dst_sa.sin6_len = sizeof(dst_sa);
831 dst_sa.sin6_addr = ip6->ip6_dst;
832
833 if ((error = in6_selectroute(select_srcif ? &src_sa : NULL,
834 &dst_sa, opt, im6o, ro, &ifp, &rt, 0, ifscope, nocell)) != 0) {
835 switch (error) {
836 case EHOSTUNREACH:
837 ip6stat.ip6s_noroute++;
838 break;
839 case EADDRNOTAVAIL:
840 default:
841 break; /* XXX statistics? */
1c79356b 842 }
6d2010ae
A
843 if (ifp != NULL)
844 in6_ifstat_inc(ifp, ifs6_out_discard);
845 goto bad;
846 }
847 if (rt == NULL) {
b0d623f7 848 /*
6d2010ae
A
849 * If in6_selectroute() does not return a route entry,
850 * dst may not have been updated.
b0d623f7 851 */
6d2010ae
A
852 *dst = dst_sa; /* XXX */
853 }
b0d623f7 854
6d2010ae
A
855 /*
856 * then rt (for unicast) and ifp must be non-NULL valid values.
857 */
858 if ((flags & IPV6_FORWARDING) == 0) {
859 /* XXX: the FORWARDING flag can be set for mrouting. */
860 in6_ifstat_inc(ifp, ifs6_out_request);
861 }
862 if (rt != NULL) {
863 RT_LOCK(rt);
864 ia = (struct in6_ifaddr *)(rt->rt_ifa);
865 if (ia != NULL)
866 IFA_ADDREF(&ia->ia_ifa);
867 rt->rt_use++;
868 RT_UNLOCK(rt);
869 }
1c79356b 870
6d2010ae
A
871 /*
872 * The outgoing interface must be in the zone of source and
873 * destination addresses. We should use ia_ifp to support the
874 * case of sending packets to an address of our own.
875 */
876 if (ia != NULL && ia->ia_ifp) {
877 ifnet_reference(ia->ia_ifp);
878 if (origifp != NULL)
879 ifnet_release(origifp);
880 origifp = ia->ia_ifp;
881 } else {
882 if (ifp != NULL)
883 ifnet_reference(ifp);
884 if (origifp != NULL)
885 ifnet_release(origifp);
886 origifp = ifp;
887 }
888 src0 = ip6->ip6_src;
889 if (in6_setscope(&src0, origifp, &zone))
890 goto badscope;
891 bzero(&src_sa, sizeof(src_sa));
892 src_sa.sin6_family = AF_INET6;
893 src_sa.sin6_len = sizeof(src_sa);
894 src_sa.sin6_addr = ip6->ip6_src;
895 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
896 goto badscope;
897
898 dst0 = ip6->ip6_dst;
899 if (in6_setscope(&dst0, origifp, &zone))
900 goto badscope;
901 /* re-initialize to be sure */
902 bzero(&dst_sa, sizeof(dst_sa));
903 dst_sa.sin6_family = AF_INET6;
904 dst_sa.sin6_len = sizeof(dst_sa);
905 dst_sa.sin6_addr = ip6->ip6_dst;
906 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
907 goto badscope;
908 }
1c79356b 909
6d2010ae
A
910 /* scope check is done. */
911 goto routefound;
1c79356b 912
6d2010ae
A
913 badscope:
914 ip6stat.ip6s_badscope++;
915 in6_ifstat_inc(origifp, ifs6_out_discard);
916 if (error == 0)
917 error = EHOSTUNREACH; /* XXX */
918 goto bad;
1c79356b 919
6d2010ae
A
920 routefound:
921 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
922 if (opt && opt->ip6po_nextroute.ro_rt) {
1c79356b 923 /*
6d2010ae
A
924 * The nexthop is explicitly specified by the
925 * application. We assume the next hop is an IPv6
926 * address.
1c79356b 927 */
6d2010ae 928 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
b0d623f7 929 }
6d2010ae
A
930 else if ((rt->rt_flags & RTF_GATEWAY))
931 dst = (struct sockaddr_in6 *)rt->rt_gateway;
932 }
b0d623f7 933
6d2010ae
A
934 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
935 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
936 } else {
937 struct in6_multi *in6m;
1c79356b 938
6d2010ae 939 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1c79356b 940
1c79356b
A
941 in6_ifstat_inc(ifp, ifs6_out_mcast);
942
943 /*
944 * Confirm that the outgoing interface supports multicast.
945 */
946 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
947 ip6stat.ip6s_noroute++;
948 in6_ifstat_inc(ifp, ifs6_out_discard);
949 error = ENETUNREACH;
950 goto bad;
951 }
6d2010ae
A
952 in6_multihead_lock_shared();
953 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
954 in6_multihead_lock_done();
955 if (im6o != NULL)
956 IM6O_LOCK(im6o);
1c79356b
A
957 if (in6m != NULL &&
958 (im6o == NULL || im6o->im6o_multicast_loop)) {
6d2010ae
A
959 if (im6o != NULL)
960 IM6O_UNLOCK(im6o);
1c79356b
A
961 /*
962 * If we belong to the destination multicast group
963 * on the outgoing interface, and the caller did not
964 * forbid loopback, loop back a copy.
965 */
966 ip6_mloopback(ifp, m, dst);
967 } else {
6d2010ae
A
968 if (im6o != NULL)
969 IM6O_UNLOCK(im6o);
1c79356b
A
970 /*
971 * If we are acting as a multicast router, perform
972 * multicast forwarding as if the packet had just
973 * arrived on the interface to which we are about
974 * to send. The multicast forwarding function
975 * recursively calls this function, using the
976 * IPV6_FORWARDING flag to prevent infinite recursion.
977 *
978 * Multicasts that are looped back by ip6_mloopback(),
979 * above, will be forwarded by the ip6_input() routine,
980 * if necessary.
981 */
b7266188 982#if MROUTING
1c79356b 983 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
6d2010ae
A
984 /*
985 * XXX: ip6_mforward expects that rcvif is NULL
986 * when it is called from the originating path.
987 * However, it is not always the case, since
988 * some versions of MGETHDR() does not
989 * initialize the field.
990 */
991 m->m_pkthdr.rcvif = NULL;
2d21ac55 992 if (ip6_mforward(ip6, ifp, m) != 0) {
1c79356b 993 m_freem(m);
6d2010ae
A
994 if (in6m != NULL)
995 IN6M_REMREF(in6m);
1c79356b
A
996 goto done;
997 }
998 }
b7266188 999#endif
1c79356b 1000 }
6d2010ae
A
1001 if (in6m != NULL)
1002 IN6M_REMREF(in6m);
1c79356b
A
1003 /*
1004 * Multicasts with a hoplimit of zero may be looped back,
1005 * above, but must not be transmitted on a network.
1006 * Also, multicasts addressed to the loopback interface
1007 * are not sent -- the above call to ip6_mloopback() will
1008 * loop back a copy if this host actually belongs to the
1009 * destination group on the loopback interface.
1010 */
6d2010ae
A
1011 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1012 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1c79356b
A
1013 m_freem(m);
1014 goto done;
1015 }
1016 }
1017
1018 /*
1019 * Fill the outgoing inteface to tell the upper layer
1020 * to increment per-interface statistics.
1021 */
6d2010ae
A
1022 if (ifpp != NULL) {
1023 ifnet_reference(ifp); /* for caller */
1024 if (*ifpp != NULL)
1025 ifnet_release(*ifpp);
1c79356b 1026 *ifpp = ifp;
1c79356b 1027 }
b0d623f7 1028
6d2010ae
A
1029 /* Determine path MTU. */
1030 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1031 &alwaysfrag)) != 0)
1032 goto bad;
1c79356b
A
1033
1034 /*
6d2010ae
A
1035 * The caller of this function may specify to use the minimum MTU
1036 * in some cases.
1037 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1038 * setting. The logic is a bit complicated; by default, unicast
1039 * packets will follow path MTU while multicast packets will be sent at
1040 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1041 * including unicast ones will be sent at the minimum MTU. Multicast
1042 * packets will always be sent at the minimum MTU unless
1043 * IP6PO_MINMTU_DISABLE is explicitly specified.
1044 * See RFC 3542 for more details.
1c79356b 1045 */
6d2010ae
A
1046 if (mtu > IPV6_MMTU) {
1047 if ((flags & IPV6_MINMTU))
1048 mtu = IPV6_MMTU;
1049 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
1050 mtu = IPV6_MMTU;
1051 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1052 (opt == NULL ||
1053 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1054 mtu = IPV6_MMTU;
b0d623f7 1055 }
1c79356b 1056 }
6d2010ae 1057
9bccf70c
A
1058 /*
1059 * clear embedded scope identifiers if necessary.
1060 * in6_clearscope will touch the addresses only when necessary.
1061 */
1062 in6_clearscope(&ip6->ip6_src);
1063 in6_clearscope(&ip6->ip6_dst);
1c79356b 1064
7e4a7d39 1065#if IPFW2
1c79356b
A
1066 /*
1067 * Check with the firewall...
1068 */
9bccf70c 1069 if (ip6_fw_enable && ip6_fw_chk_ptr) {
1c79356b 1070 u_short port = 0;
55e303ae 1071 m->m_pkthdr.rcvif = NULL; /* XXX */
1c79356b 1072 /* If ipfw says divert, we have to just drop packet */
91447636 1073 if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
1c79356b
A
1074 m_freem(m);
1075 goto done;
1076 }
1077 if (!m) {
1078 error = EACCES;
1079 goto done;
1080 }
1081 }
7e4a7d39 1082#endif
1c79356b
A
1083
1084 /*
1085 * If the outgoing packet contains a hop-by-hop options header,
1086 * it must be examined and processed even by the source node.
1087 * (RFC 2460, section 4.)
1088 */
1089 if (exthdrs.ip6e_hbh) {
9bccf70c 1090 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
6d2010ae 1091 u_int32_t dummy; /* XXX unused */
1c79356b 1092
9bccf70c
A
1093#if DIAGNOSTIC
1094 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1095 panic("ip6e_hbh is not continuous");
1096#endif
1c79356b
A
1097 /*
1098 * XXX: if we have to send an ICMPv6 error to the sender,
1099 * we need the M_LOOP flag since icmp6_error() expects
1100 * the IPv6 and the hop-by-hop options header are
1101 * continuous unless the flag is set.
1102 */
1103 m->m_flags |= M_LOOP;
1104 m->m_pkthdr.rcvif = ifp;
6d2010ae
A
1105 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1106 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1107 &dummy, &plen) < 0) {
1c79356b
A
1108 /* m was already freed at this point */
1109 error = EINVAL;/* better error? */
1110 goto done;
1111 }
1112 m->m_flags &= ~M_LOOP; /* XXX */
1113 m->m_pkthdr.rcvif = NULL;
1114 }
1115
b0d623f7 1116#if PF
6d2010ae
A
1117 if (PF_IS_ENABLED) {
1118 /* Invoke outbound packet filter */
1119 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
b0d623f7 1120
6d2010ae
A
1121 if (error) {
1122 if (m != NULL) {
1123 panic("%s: unexpected packet %p\n", __func__, m);
1124 /* NOTREACHED */
1125 }
1126 /* Already freed by callee */
1127 goto done;
b0d623f7 1128 }
6d2010ae 1129 ip6 = mtod(m, struct ip6_hdr *);
b0d623f7 1130 }
b0d623f7
A
1131#endif /* PF */
1132
1c79356b
A
1133 /*
1134 * Send the packet to the outgoing interface.
1135 * If necessary, do IPv6 fragmentation before sending.
6d2010ae
A
1136 *
1137 * the logic here is rather complex:
1138 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1139 * 1-a: send as is if tlen <= path mtu
1140 * 1-b: fragment if tlen > path mtu
1141 *
1142 * 2: if user asks us not to fragment (dontfrag == 1)
1143 * 2-a: send as is if tlen <= interface mtu
1144 * 2-b: error if tlen > interface mtu
1145 *
1146 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1147 * always fragment
1148 *
1149 * 4: if dontfrag == 1 && alwaysfrag == 1
1150 * error, as we cannot handle this conflicting request
1c79356b
A
1151 */
1152 tlen = m->m_pkthdr.len;
6d2010ae
A
1153
1154 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
1155 dontfrag = 1;
1156 else
1157 dontfrag = 0;
1158 if (dontfrag && alwaysfrag) { /* case 4 */
1159 /* conflicting request - can't transmit */
1160 error = EMSGSIZE;
1161 goto bad;
1162 }
1163
1164 lck_rw_lock_shared(nd_if_rwlock);
1165 ifmtu = IN6_LINKMTU(ifp);
1166 lck_rw_done(nd_if_rwlock);
1167
1168 if (dontfrag && tlen > ifmtu) { /* case 2-b */
1169 /*
1170 * Even if the DONTFRAG option is specified, we cannot send the
1171 * packet when the data length is larger than the MTU of the
1172 * outgoing interface.
1173 * Notify the error by sending IPV6_PATHMTU ancillary data as
1174 * well as returning an error code (the latter is not described
1175 * in the API spec.)
1176 */
1177 u_int32_t mtu32;
1178 struct ip6ctlparam ip6cp;
1179
1180 mtu32 = (u_int32_t)mtu;
1181 bzero(&ip6cp, sizeof(ip6cp));
1182 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1183 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1184 (void *)&ip6cp);
1185
1186 error = EMSGSIZE;
1187 goto bad;
1188 }
1189
1190 /*
1191 * transmit packet without fragmentation
1192 */
1193 tso = (ifp->if_hwassist & IFNET_TSO_IPV6) &&
1194 (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6);
1195 if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
1196 (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1197 int sw_csum;
1198
1199 ip6 = mtod(m, struct ip6_hdr *);
9bccf70c
A
1200#ifdef IPSEC
1201 /* clean ipsec history once it goes out of the node */
1202 ipsec_delaux(m);
1c79356b 1203#endif
9bccf70c 1204
6d2010ae
A
1205 if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */
1206 sw_csum = m->m_pkthdr.csum_flags;
1207 else
1208 sw_csum = m->m_pkthdr.csum_flags &
1209 ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1210
1211 if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) {
1212 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1213 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1214 }
1215 if (ro->ro_rt)
1216 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1217 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1c79356b 1218 goto done;
6d2010ae
A
1219 }
1220
1221 /*
1222 * try to fragment the packet. case 1-b and 3
1223 */
1224 if (mtu < IPV6_MMTU) {
1225 /* path MTU cannot be less than IPV6_MMTU */
1c79356b
A
1226 error = EMSGSIZE;
1227 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1228 goto bad;
6d2010ae
A
1229 } else if (ip6->ip6_plen == 0) {
1230 /* jumbo payload cannot be fragmented */
1c79356b
A
1231 error = EMSGSIZE;
1232 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1233 goto bad;
1234 } else {
1235 struct mbuf **mnext, *m_frgpart;
6d2010ae
A
1236 struct ip6_frag *ip6f;
1237 u_int32_t id = htonl(ip6_randomid());
1c79356b
A
1238 u_char nextproto;
1239
1240 /*
1241 * Too large for the destination or interface;
1242 * fragment if possible.
1243 * Must be able to put at least 8 bytes per fragment.
1244 */
1245 hlen = unfragpartlen;
1246 if (mtu > IPV6_MAXPACKET)
1247 mtu = IPV6_MAXPACKET;
9bccf70c 1248
1c79356b
A
1249 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1250 if (len < 8) {
1251 error = EMSGSIZE;
1252 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1253 goto bad;
1254 }
1255
1256 mnext = &m->m_nextpkt;
1257
1258 /*
1259 * Change the next header field of the last header in the
1260 * unfragmentable part.
1261 */
1262 if (exthdrs.ip6e_rthdr) {
1263 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1264 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1265 } else if (exthdrs.ip6e_dest1) {
1266 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1267 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1268 } else if (exthdrs.ip6e_hbh) {
1269 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1270 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1271 } else {
1272 nextproto = ip6->ip6_nxt;
1273 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1274 }
1275
6d2010ae
A
1276 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
1277 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1278 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1279 }
1280
1c79356b
A
1281 /*
1282 * Loop through length of segment after first fragment,
55e303ae
A
1283 * make new header and copy data of each part and link onto
1284 * chain.
1c79356b
A
1285 */
1286 m0 = m;
1287 for (off = hlen; off < tlen; off += len) {
2d21ac55 1288 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
1289 if (!m) {
1290 error = ENOBUFS;
1291 ip6stat.ip6s_odropped++;
1292 goto sendorfree;
1293 }
9bccf70c 1294 m->m_pkthdr.rcvif = NULL;
1c79356b
A
1295 m->m_flags = m0->m_flags & M_COPYFLAGS;
1296 *mnext = m;
1297 mnext = &m->m_nextpkt;
1298 m->m_data += max_linkhdr;
1299 mhip6 = mtod(m, struct ip6_hdr *);
1300 *mhip6 = *ip6;
1301 m->m_len = sizeof(*mhip6);
1302 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1303 if (error) {
1304 ip6stat.ip6s_odropped++;
1305 goto sendorfree;
1306 }
1307 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1308 if (off + len >= tlen)
1309 len = tlen - off;
1310 else
1311 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1312 mhip6->ip6_plen = htons((u_short)(len + hlen +
1313 sizeof(*ip6f) -
1314 sizeof(struct ip6_hdr)));
1315 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1316 error = ENOBUFS;
1317 ip6stat.ip6s_odropped++;
1318 goto sendorfree;
1319 }
1320 m_cat(m, m_frgpart);
1321 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
91447636
A
1322 m->m_pkthdr.rcvif = 0;
1323 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
2d21ac55
A
1324#ifdef __darwin8_notyet
1325#if CONFIG_MACF_NET
1326 mac_create_fragment(m0, m);
1327#endif
1328#endif
1c79356b
A
1329 ip6f->ip6f_reserved = 0;
1330 ip6f->ip6f_ident = id;
1331 ip6f->ip6f_nxt = nextproto;
1332 ip6stat.ip6s_ofragments++;
1333 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1334 }
1335
1336 in6_ifstat_inc(ifp, ifs6_out_fragok);
1337 }
1338
1339 /*
1340 * Remove leading garbages.
1341 */
1342sendorfree:
1343 m = m0->m_nextpkt;
1344 m0->m_nextpkt = 0;
1345 m_freem(m0);
1346 for (m0 = m; m; m = m0) {
1347 m0 = m->m_nextpkt;
1348 m->m_nextpkt = 0;
1349 if (error == 0) {
9bccf70c
A
1350 /* Record statistics for this interface address. */
1351 if (ia) {
1352#ifndef __APPLE__
1353 ia->ia_ifa.if_opackets++;
1354 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1c79356b 1355#endif
9bccf70c
A
1356 }
1357#if IPSEC
1358 /* clean ipsec history once it goes out of the node */
1359 ipsec_delaux(m);
1c79356b 1360#endif
6d2010ae 1361 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
91447636 1362
1c79356b
A
1363 } else
1364 m_freem(m);
1365 }
1366
1367 if (error == 0)
1368 ip6stat.ip6s_fragmented++;
1369
1370done:
ebb1b9f4
A
1371#if IPSEC
1372 if (ipsec_saved_route) {
1373 ro = ipsec_saved_route;
1374 if (ipsec_state.ro.ro_rt) {
1375 rtfree(ipsec_state.ro.ro_rt);
1376 }
1377 }
1378#endif /* IPSEC */
9bccf70c
A
1379 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */
1380 rtfree(ro->ro_rt);
1c79356b 1381 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
9bccf70c 1382 rtfree(ro_pmtu->ro_rt);
1c79356b
A
1383 }
1384
1385#if IPSEC
2d21ac55
A
1386 if (sp != NULL)
1387 key_freesp(sp, KEY_SADB_UNLOCKED);
1c79356b
A
1388#endif /* IPSEC */
1389
b0d623f7 1390 if (ia != NULL)
6d2010ae
A
1391 IFA_REMREF(&ia->ia_ifa);
1392 if (ifp != NULL)
1393 ifnet_release(ifp);
1394 if (origifp != NULL)
1395 ifnet_release(origifp);
1396 return (error);
1c79356b
A
1397
1398freehdrs:
1399 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1400 m_freem(exthdrs.ip6e_dest1);
1401 m_freem(exthdrs.ip6e_rthdr);
1402 m_freem(exthdrs.ip6e_dest2);
1403 /* fall through */
1404bad:
1405 m_freem(m);
1406 goto done;
1407}
1408
1409static int
1410ip6_copyexthdr(mp, hdr, hlen)
1411 struct mbuf **mp;
1412 caddr_t hdr;
1413 int hlen;
1414{
1415 struct mbuf *m;
1416
1417 if (hlen > MCLBYTES)
1418 return(ENOBUFS); /* XXX */
1419
1420 MGET(m, M_DONTWAIT, MT_DATA);
1421 if (!m)
1422 return(ENOBUFS);
1423
1424 if (hlen > MLEN) {
1425 MCLGET(m, M_DONTWAIT);
1426 if ((m->m_flags & M_EXT) == 0) {
1427 m_free(m);
6d2010ae 1428 return (ENOBUFS);
1c79356b
A
1429 }
1430 }
1431 m->m_len = hlen;
1432 if (hdr)
1433 bcopy(hdr, mtod(m, caddr_t), hlen);
1434
1435 *mp = m;
6d2010ae 1436 return (0);
1c79356b
A
1437}
1438
6d2010ae
A
1439/*
1440 * Process a delayed payload checksum calculation.
1441 */
1442void
1443in6_delayed_cksum(struct mbuf *m, uint16_t offset)
1444{
1445 uint16_t csum;
1446
1447 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1448 if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) {
1449 csum = 0xffff;
1450 }
1451
1452 offset += (m->m_pkthdr.csum_data & 0xffff);
1453 if ((offset + sizeof(csum)) > m->m_len) {
1454 m_copyback(m, offset, sizeof(csum), &csum);
1455 } else {
1456 *(uint16_t *)(mtod(m, char *) + offset) = csum;
1457 }
1458}
1c79356b
A
1459/*
1460 * Insert jumbo payload option.
1461 */
1462static int
1463ip6_insert_jumboopt(exthdrs, plen)
1464 struct ip6_exthdrs *exthdrs;
1465 u_int32_t plen;
1466{
1467 struct mbuf *mopt;
1468 u_char *optbuf;
9bccf70c 1469 u_int32_t v;
1c79356b
A
1470
1471#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1472
1473 /*
1474 * If there is no hop-by-hop options header, allocate new one.
1475 * If there is one but it doesn't have enough space to store the
1476 * jumbo payload option, allocate a cluster to store the whole options.
1477 * Otherwise, use it to store the options.
1478 */
1479 if (exthdrs->ip6e_hbh == 0) {
1480 MGET(mopt, M_DONTWAIT, MT_DATA);
1481 if (mopt == 0)
6d2010ae 1482 return (ENOBUFS);
1c79356b
A
1483 mopt->m_len = JUMBOOPTLEN;
1484 optbuf = mtod(mopt, u_char *);
1485 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1486 exthdrs->ip6e_hbh = mopt;
1487 } else {
1488 struct ip6_hbh *hbh;
1489
1490 mopt = exthdrs->ip6e_hbh;
1491 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
9bccf70c
A
1492 /*
1493 * XXX assumption:
1494 * - exthdrs->ip6e_hbh is not referenced from places
1495 * other than exthdrs.
1496 * - exthdrs->ip6e_hbh is not an mbuf chain.
1497 */
6d2010ae 1498 u_int32_t oldoptlen = mopt->m_len;
9bccf70c 1499 struct mbuf *n;
1c79356b 1500
9bccf70c
A
1501 /*
1502 * XXX: give up if the whole (new) hbh header does
1503 * not fit even in an mbuf cluster.
1504 */
1505 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
6d2010ae 1506 return (ENOBUFS);
1c79356b 1507
9bccf70c
A
1508 /*
1509 * As a consequence, we must always prepare a cluster
1510 * at this point.
1511 */
1512 MGET(n, M_DONTWAIT, MT_DATA);
1513 if (n) {
1514 MCLGET(n, M_DONTWAIT);
1515 if ((n->m_flags & M_EXT) == 0) {
1516 m_freem(n);
1517 n = NULL;
1518 }
1519 }
1520 if (!n)
6d2010ae 1521 return (ENOBUFS);
9bccf70c
A
1522 n->m_len = oldoptlen + JUMBOOPTLEN;
1523 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
6d2010ae
A
1524 oldoptlen);
1525 optbuf = mtod(n, u_char *) + oldoptlen;
9bccf70c
A
1526 m_freem(mopt);
1527 mopt = exthdrs->ip6e_hbh = n;
1c79356b
A
1528 } else {
1529 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1530 mopt->m_len += JUMBOOPTLEN;
1531 }
1532 optbuf[0] = IP6OPT_PADN;
1533 optbuf[1] = 1;
1534
1535 /*
1536 * Adjust the header length according to the pad and
1537 * the jumbo payload option.
1538 */
1539 hbh = mtod(mopt, struct ip6_hbh *);
1540 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1541 }
1542
1543 /* fill in the option. */
1544 optbuf[2] = IP6OPT_JUMBO;
1545 optbuf[3] = 4;
9bccf70c
A
1546 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1547 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1c79356b
A
1548
1549 /* finally, adjust the packet header length */
1550 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1551
6d2010ae 1552 return (0);
1c79356b
A
1553#undef JUMBOOPTLEN
1554}
1555
1556/*
1557 * Insert fragment header and copy unfragmentable header portions.
1558 */
1559static int
1560ip6_insertfraghdr(m0, m, hlen, frghdrp)
1561 struct mbuf *m0, *m;
1562 int hlen;
1563 struct ip6_frag **frghdrp;
1564{
1565 struct mbuf *n, *mlast;
1566
1567 if (hlen > sizeof(struct ip6_hdr)) {
1568 n = m_copym(m0, sizeof(struct ip6_hdr),
6d2010ae 1569 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1c79356b 1570 if (n == 0)
6d2010ae 1571 return (ENOBUFS);
1c79356b
A
1572 m->m_next = n;
1573 } else
1574 n = m;
1575
1576 /* Search for the last mbuf of unfragmentable part. */
1577 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1578 ;
1579
1580 if ((mlast->m_flags & M_EXT) == 0 &&
9bccf70c 1581 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1c79356b 1582 /* use the trailing space of the last mbuf for the fragment hdr */
6d2010ae
A
1583 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1584 mlast->m_len);
1c79356b
A
1585 mlast->m_len += sizeof(struct ip6_frag);
1586 m->m_pkthdr.len += sizeof(struct ip6_frag);
1587 } else {
1588 /* allocate a new mbuf for the fragment header */
1589 struct mbuf *mfrg;
1590
1591 MGET(mfrg, M_DONTWAIT, MT_DATA);
1592 if (mfrg == 0)
6d2010ae 1593 return (ENOBUFS);
1c79356b
A
1594 mfrg->m_len = sizeof(struct ip6_frag);
1595 *frghdrp = mtod(mfrg, struct ip6_frag *);
1596 mlast->m_next = mfrg;
1597 }
1598
6d2010ae 1599 return (0);
1c79356b
A
1600}
1601
2d21ac55 1602extern int load_ipfw(void);
6d2010ae
A
1603static int
1604ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1605 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
1606 int *alwaysfragp)
1607{
1608 u_int32_t mtu = 0;
1609 int alwaysfrag = 0;
1610 int error = 0;
55e303ae 1611
6d2010ae
A
1612 if (ro_pmtu != ro) {
1613 /* The first hop and the final destination may differ. */
1614 struct sockaddr_in6 *sa6_dst =
1615 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1616 if (ro_pmtu->ro_rt &&
1617 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1618 ro_pmtu->ro_rt->generation_id != route_generation ||
1619 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1620 rtfree(ro_pmtu->ro_rt);
1621 ro_pmtu->ro_rt = (struct rtentry *)NULL;
1622 }
1623 if (ro_pmtu->ro_rt == NULL) {
1624 bzero(sa6_dst, sizeof(*sa6_dst));
1625 sa6_dst->sin6_family = AF_INET6;
1626 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1627 sa6_dst->sin6_addr = *dst;
1628
1629 rtalloc_scoped((struct route *)ro_pmtu,
1630 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
1631 }
1632 }
1633
1634
1635 if (ro_pmtu->ro_rt != NULL) {
1636 u_int32_t ifmtu;
1637
1638 lck_rw_lock_shared(nd_if_rwlock);
1639 ifmtu = IN6_LINKMTU(ifp);
1640 lck_rw_done(nd_if_rwlock);
1641
1642 RT_LOCK_SPIN(ro_pmtu->ro_rt);
1643 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1644 if (mtu > ifmtu || mtu == 0) {
1645 /*
1646 * The MTU on the route is larger than the MTU on
1647 * the interface! This shouldn't happen, unless the
1648 * MTU of the interface has been changed after the
1649 * interface was brought up. Change the MTU in the
1650 * route to match the interface MTU (as long as the
1651 * field isn't locked).
1652 *
1653 * if MTU on the route is 0, we need to fix the MTU.
1654 * this case happens with path MTU discovery timeouts.
1655 */
1656 mtu = ifmtu;
1657 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
1658 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
1659 }
1660 else if (mtu < IPV6_MMTU) {
1661 /*
1662 * RFC2460 section 5, last paragraph:
1663 * if we record ICMPv6 too big message with
1664 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1665 * or smaller, with framgent header attached.
1666 * (fragment header is needed regardless from the
1667 * packet size, for translators to identify packets)
1668 */
1669 alwaysfrag = 1;
1670 mtu = IPV6_MMTU;
1671 }
1672 RT_UNLOCK(ro_pmtu->ro_rt);
1673 } else {
1674 if (ifp) {
1675 lck_rw_lock_shared(nd_if_rwlock);
1676 mtu = IN6_LINKMTU(ifp);
1677 lck_rw_done(nd_if_rwlock);
1678 } else
1679 error = EHOSTUNREACH; /* XXX */
1680 }
1681
1682 *mtup = mtu;
1683 if (alwaysfragp)
1684 *alwaysfragp = alwaysfrag;
1685 return (error);
1686}
1687
1688/*
1c79356b
A
1689 * IP6 socket option processing.
1690 */
1c79356b
A
1691int
1692ip6_ctloutput(so, sopt)
1693 struct socket *so;
1694 struct sockopt *sopt;
1c79356b 1695{
6d2010ae
A
1696 int optdatalen, uproto;
1697 void *optdata;
9bccf70c
A
1698 int privileged;
1699 struct inpcb *in6p = sotoinpcb(so);
b0d623f7 1700 int error = 0, optval = 0;
2d21ac55
A
1701 int level, op = -1, optname = 0;
1702 int optlen = 0;
b0d623f7 1703 struct proc *p;
1c79356b 1704
b0d623f7 1705 if (sopt == NULL) {
55e303ae 1706 panic("ip6_ctloutput: arg soopt is NULL");
b0d623f7 1707 /* NOTREACHED */
1c79356b 1708 }
b0d623f7
A
1709 level = sopt->sopt_level;
1710 op = sopt->sopt_dir;
1711 optname = sopt->sopt_name;
1712 optlen = sopt->sopt_valsize;
1713 p = sopt->sopt_p;
6d2010ae 1714 uproto = (int)so->so_proto->pr_protocol;
1c79356b 1715
b0d623f7 1716 privileged = (proc_suser(p) == 0);
1c79356b
A
1717
1718 if (level == IPPROTO_IPV6) {
1719 switch (op) {
1720
1c79356b 1721 case SOPT_SET:
1c79356b 1722 switch (optname) {
6d2010ae 1723 case IPV6_2292PKTOPTIONS:
9bccf70c 1724 {
1c79356b
A
1725 struct mbuf *m;
1726
9bccf70c 1727 error = soopt_getm(sopt, &m); /* XXX */
2d21ac55 1728 if (error != 0)
1c79356b 1729 break;
9bccf70c 1730 error = soopt_mcopyin(sopt, m); /* XXX */
2d21ac55 1731 if (error != 0)
1c79356b
A
1732 break;
1733 error = ip6_pcbopts(&in6p->in6p_outputopts,
1734 m, so, sopt);
1735 m_freem(m); /* XXX */
1c79356b
A
1736 break;
1737 }
9bccf70c 1738
1c79356b
A
1739 /*
1740 * Use of some Hop-by-Hop options or some
1741 * Destination options, might require special
1742 * privilege. That is, normal applications
1743 * (without special privilege) might be forbidden
1744 * from setting certain options in outgoing packets,
1745 * and might never see certain options in received
1746 * packets. [RFC 2292 Section 6]
1747 * KAME specific note:
1748 * KAME prevents non-privileged users from sending or
1749 * receiving ANY hbh/dst options in order to avoid
1750 * overhead of parsing options in the kernel.
1751 */
6d2010ae
A
1752 case IPV6_RECVHOPOPTS:
1753 case IPV6_RECVDSTOPTS:
1754 case IPV6_RECVRTHDRDSTOPTS:
1755 if (!privileged)
1756 break;
1757 /* FALLTHROUGH */
1c79356b 1758 case IPV6_UNICAST_HOPS:
6d2010ae 1759 case IPV6_HOPLIMIT:
1c79356b
A
1760 case IPV6_FAITH:
1761
6d2010ae
A
1762 case IPV6_RECVPKTINFO:
1763 case IPV6_RECVHOPLIMIT:
1764 case IPV6_RECVRTHDR:
1765 case IPV6_RECVPATHMTU:
b0d623f7 1766 case IPV6_RECVTCLASS:
9bccf70c 1767 case IPV6_V6ONLY:
6d2010ae 1768 case IPV6_AUTOFLOWLABEL:
9bccf70c 1769 if (optlen != sizeof(int)) {
1c79356b 1770 error = EINVAL;
9bccf70c
A
1771 break;
1772 }
1773 error = sooptcopyin(sopt, &optval,
1774 sizeof optval, sizeof optval);
1775 if (error)
1776 break;
1777 switch (optname) {
1c79356b 1778
9bccf70c
A
1779 case IPV6_UNICAST_HOPS:
1780 if (optval < -1 || optval >= 256)
1781 error = EINVAL;
1782 else {
1783 /* -1 = kernel default */
1784 in6p->in6p_hops = optval;
6d2010ae 1785 if ((in6p->inp_vflag &
9bccf70c
A
1786 INP_IPV4) != 0)
1787 in6p->inp_ip_ttl = optval;
1788 }
1789 break;
1c79356b 1790#define OPTSET(bit) \
9bccf70c 1791do { \
1c79356b 1792 if (optval) \
6d2010ae 1793 in6p->inp_flags |= (bit); \
1c79356b 1794 else \
6d2010ae
A
1795 in6p->inp_flags &= ~(bit); \
1796} while (/*CONSTCOND*/ 0)
1797#define OPTSET2292(bit) \
1798do { \
1799 in6p->inp_flags |= IN6P_RFC2292; \
1800 if (optval) \
1801 in6p->inp_flags |= (bit); \
1802 else \
1803 in6p->inp_flags &= ~(bit); \
1804} while (/*CONSTCOND*/ 0)
1805#define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1806
1807 case IPV6_RECVPKTINFO:
1808 /* cannot mix with RFC2292 */
1809 if (OPTBIT(IN6P_RFC2292)) {
1810 error = EINVAL;
1811 break;
1812 }
1813 OPTSET(IN6P_PKTINFO);
1814 break;
1815
1816 case IPV6_HOPLIMIT:
1817 {
1818 struct ip6_pktopts **optp;
1819
1820 /* cannot mix with RFC2292 */
1821 if (OPTBIT(IN6P_RFC2292)) {
1822 error = EINVAL;
1823 break;
1824 }
1825 optp = &in6p->in6p_outputopts;
1826 error = ip6_pcbopt(IPV6_HOPLIMIT,
1827 (u_char *)&optval, sizeof(optval),
1828 optp, uproto);
1829 break;
1830 }
1831
1832 case IPV6_RECVHOPLIMIT:
1833 /* cannot mix with RFC2292 */
1834 if (OPTBIT(IN6P_RFC2292)) {
1835 error = EINVAL;
1836 break;
1837 }
1838 OPTSET(IN6P_HOPLIMIT);
1839 break;
1840
1841 case IPV6_RECVHOPOPTS:
1842 /* cannot mix with RFC2292 */
1843 if (OPTBIT(IN6P_RFC2292)) {
1844 error = EINVAL;
1845 break;
1846 }
1847 OPTSET(IN6P_HOPOPTS);
1848 break;
1849
1850 case IPV6_RECVDSTOPTS:
1851 /* cannot mix with RFC2292 */
1852 if (OPTBIT(IN6P_RFC2292)) {
1853 error = EINVAL;
1854 break;
1855 }
1856 OPTSET(IN6P_DSTOPTS);
1857 break;
1c79356b 1858
6d2010ae
A
1859 case IPV6_RECVRTHDRDSTOPTS:
1860 /* cannot mix with RFC2292 */
1861 if (OPTBIT(IN6P_RFC2292)) {
1862 error = EINVAL;
1863 break;
1864 }
1865 OPTSET(IN6P_RTHDRDSTOPTS);
1866 break;
1867
1868 case IPV6_RECVRTHDR:
1869 /* cannot mix with RFC2292 */
1870 if (OPTBIT(IN6P_RFC2292)) {
1871 error = EINVAL;
1872 break;
1873 }
1874 OPTSET(IN6P_RTHDR);
9bccf70c 1875 break;
1c79356b 1876
9bccf70c 1877 case IPV6_FAITH:
6d2010ae
A
1878 OPTSET(INP_FAITH);
1879 break;
1880
1881 case IPV6_RECVPATHMTU:
1882 /*
1883 * We ignore this option for TCP
1884 * sockets.
1885 * (RFC3542 leaves this case
1886 * unspecified.)
1887 */
1888 if (uproto != IPPROTO_TCP)
1889 OPTSET(IN6P_MTU);
9bccf70c 1890 break;
1c79356b 1891
9bccf70c
A
1892 case IPV6_V6ONLY:
1893 /*
1894 * make setsockopt(IPV6_V6ONLY)
1895 * available only prior to bind(2).
1896 * see ipng mailing list, Jun 22 2001.
1897 */
6d2010ae
A
1898 if (in6p->inp_lport ||
1899 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
9bccf70c 1900 error = EINVAL;
1c79356b 1901 break;
1c79356b 1902 }
9bccf70c 1903 OPTSET(IN6P_IPV6_V6ONLY);
55e303ae 1904 if (optval)
6d2010ae 1905 in6p->inp_vflag &= ~INP_IPV4;
55e303ae 1906 else
6d2010ae 1907 in6p->inp_vflag |= INP_IPV4;
9bccf70c 1908 break;
b0d623f7 1909 case IPV6_RECVTCLASS:
6d2010ae 1910 /* we can mix with RFC2292 */
b0d623f7
A
1911 OPTSET(IN6P_TCLASS);
1912 break;
6d2010ae
A
1913 case IPV6_AUTOFLOWLABEL:
1914 OPTSET(IN6P_AUTOFLOWLABEL);
1915 break;
1916
1c79356b
A
1917 }
1918 break;
9bccf70c 1919
6d2010ae
A
1920 case IPV6_TCLASS:
1921 case IPV6_DONTFRAG:
1922 case IPV6_USE_MIN_MTU:
1923 case IPV6_PREFER_TEMPADDR:
1924 if (optlen != sizeof(optval)) {
1925 error = EINVAL;
1926 break;
1927 }
1928 error = sooptcopyin(sopt, &optval,
1929 sizeof optval, sizeof optval);
1930 if (error)
1931 break;
1932 {
1933 struct ip6_pktopts **optp;
1934 optp = &in6p->in6p_outputopts;
1935 error = ip6_pcbopt(optname,
1936 (u_char *)&optval, sizeof(optval),
1937 optp, uproto);
1938 break;
1939 }
1940
1941 case IPV6_2292PKTINFO:
1942 case IPV6_2292HOPLIMIT:
1943 case IPV6_2292HOPOPTS:
1944 case IPV6_2292DSTOPTS:
1945 case IPV6_2292RTHDR:
9bccf70c
A
1946 /* RFC 2292 */
1947 if (optlen != sizeof(int)) {
1948 error = EINVAL;
1949 break;
1950 }
1951 error = sooptcopyin(sopt, &optval,
1952 sizeof optval, sizeof optval);
1953 if (error)
1954 break;
1955 switch (optname) {
6d2010ae
A
1956 case IPV6_2292PKTINFO:
1957 OPTSET2292(IN6P_PKTINFO);
9bccf70c 1958 break;
6d2010ae
A
1959 case IPV6_2292HOPLIMIT:
1960 OPTSET2292(IN6P_HOPLIMIT);
9bccf70c 1961 break;
6d2010ae 1962 case IPV6_2292HOPOPTS:
9bccf70c
A
1963 /*
1964 * Check super-user privilege.
1965 * See comments for IPV6_RECVHOPOPTS.
1966 */
1967 if (!privileged)
1968 return(EPERM);
6d2010ae 1969 OPTSET2292(IN6P_HOPOPTS);
9bccf70c 1970 break;
6d2010ae 1971 case IPV6_2292DSTOPTS:
9bccf70c
A
1972 if (!privileged)
1973 return(EPERM);
6d2010ae 1974 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
9bccf70c 1975 break;
6d2010ae
A
1976 case IPV6_2292RTHDR:
1977 OPTSET2292(IN6P_RTHDR);
1c79356b 1978 break;
1c79356b
A
1979 }
1980 break;
6d2010ae
A
1981 case IPV6_3542PKTINFO:
1982 case IPV6_3542HOPOPTS:
1983 case IPV6_3542RTHDR:
1984 case IPV6_3542DSTOPTS:
1985 case IPV6_RTHDRDSTOPTS:
1986 case IPV6_3542NEXTHOP:
1987 {
1988 /* new advanced API (RFC3542) */
1989 struct mbuf *m;
1c79356b 1990
6d2010ae
A
1991 /* cannot mix with RFC2292 */
1992 if (OPTBIT(IN6P_RFC2292)) {
b0d623f7
A
1993 error = EINVAL;
1994 break;
1995 }
6d2010ae
A
1996 error = soopt_getm(sopt, &m);
1997 if (error != 0)
1998 break;
1999 error = soopt_mcopyin(sopt, m);
2000 if (error) {
2001 m_freem(m);
b0d623f7 2002 break;
6d2010ae
A
2003 }
2004 error = ip6_pcbopt(optname, mtod(m, u_char *),
2005 m->m_len, &in6p->in6p_outputopts, uproto);
2006 m_freem(m);
b0d623f7 2007 break;
6d2010ae
A
2008 }
2009#undef OPTSET
b0d623f7 2010
1c79356b
A
2011 case IPV6_MULTICAST_IF:
2012 case IPV6_MULTICAST_HOPS:
2013 case IPV6_MULTICAST_LOOP:
2014 case IPV6_JOIN_GROUP:
2015 case IPV6_LEAVE_GROUP:
6d2010ae
A
2016 case IPV6_MSFILTER:
2017 case MCAST_BLOCK_SOURCE:
2018 case MCAST_UNBLOCK_SOURCE:
2019 case MCAST_JOIN_GROUP:
2020 case MCAST_LEAVE_GROUP:
2021 case MCAST_JOIN_SOURCE_GROUP:
2022 case MCAST_LEAVE_SOURCE_GROUP:
2023 error = ip6_setmoptions(in6p, sopt);
1c79356b
A
2024 break;
2025
9bccf70c
A
2026 case IPV6_PORTRANGE:
2027 error = sooptcopyin(sopt, &optval,
2028 sizeof optval, sizeof optval);
2029 if (error)
2030 break;
1c79356b 2031
9bccf70c
A
2032 switch (optval) {
2033 case IPV6_PORTRANGE_DEFAULT:
6d2010ae
A
2034 in6p->inp_flags &= ~(INP_LOWPORT);
2035 in6p->inp_flags &= ~(INP_HIGHPORT);
9bccf70c 2036 break;
1c79356b 2037
9bccf70c 2038 case IPV6_PORTRANGE_HIGH:
6d2010ae
A
2039 in6p->inp_flags &= ~(INP_LOWPORT);
2040 in6p->inp_flags |= INP_HIGHPORT;
9bccf70c 2041 break;
1c79356b 2042
9bccf70c 2043 case IPV6_PORTRANGE_LOW:
6d2010ae
A
2044 in6p->inp_flags &= ~(INP_HIGHPORT);
2045 in6p->inp_flags |= INP_LOWPORT;
9bccf70c 2046 break;
1c79356b 2047
9bccf70c
A
2048 default:
2049 error = EINVAL;
2050 break;
2051 }
1c79356b 2052 break;
1c79356b
A
2053
2054#if IPSEC
2055 case IPV6_IPSEC_POLICY:
2056 {
2057 caddr_t req = NULL;
2058 size_t len = 0;
1c79356b 2059 struct mbuf *m;
1c79356b 2060
9bccf70c 2061 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1c79356b 2062 break;
55e303ae 2063 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1c79356b 2064 break;
1c79356b
A
2065 if (m) {
2066 req = mtod(m, caddr_t);
2067 len = m->m_len;
2068 }
1c79356b
A
2069 error = ipsec6_set_policy(in6p, optname, req,
2070 len, privileged);
1c79356b 2071 m_freem(m);
1c79356b
A
2072 }
2073 break;
9bccf70c 2074#endif /* KAME IPSEC */
1c79356b 2075
2d21ac55 2076#if IPFIREWALL
1c79356b
A
2077 case IPV6_FW_ADD:
2078 case IPV6_FW_DEL:
2079 case IPV6_FW_FLUSH:
2080 case IPV6_FW_ZERO:
55e303ae 2081 {
060df5ea
A
2082 if (ip6_fw_ctl_ptr == NULL)
2083 load_ip6fw();
2084 if (ip6_fw_ctl_ptr != NULL)
2085 error = (*ip6_fw_ctl_ptr)(sopt);
2086 else
2087 return ENOPROTOOPT;
55e303ae 2088 }
1c79356b 2089 break;
2d21ac55 2090#endif /* IPFIREWALL */
1c79356b 2091
6d2010ae
A
2092 /*
2093 * IPv6 variant of IP_BOUND_IF; for details see
2094 * comments on IP_BOUND_IF in ip_ctloutput().
2095 */
2096 case IPV6_BOUND_IF:
2097 /* This option is settable only on IPv6 */
2098 if (!(in6p->inp_vflag & INP_IPV6)) {
2099 error = EINVAL;
2100 break;
2101 }
2102
2103 error = sooptcopyin(sopt, &optval,
2104 sizeof (optval), sizeof (optval));
2105
2106 if (error)
2107 break;
2108
2109 inp_bindif(in6p, optval);
2110 break;
2111
2112 case IPV6_NO_IFT_CELLULAR:
2113 /* This option is settable only for IPv6 */
2114 if (!(in6p->inp_vflag & INP_IPV6)) {
2115 error = EINVAL;
2116 break;
2117 }
2118
2119 error = sooptcopyin(sopt, &optval,
2120 sizeof (optval), sizeof (optval));
2121
2122 if (error)
2123 break;
2124
2125 error = inp_nocellular(in6p, optval);
2126 break;
2127
2128 case IPV6_OUT_IF:
2129 /* This option is not settable */
2130 error = EINVAL;
2131 break;
2132
1c79356b
A
2133 default:
2134 error = ENOPROTOOPT;
2135 break;
2136 }
1c79356b
A
2137 break;
2138
1c79356b 2139 case SOPT_GET:
1c79356b
A
2140 switch (optname) {
2141
6d2010ae
A
2142 case IPV6_2292PKTOPTIONS:
2143 /*
2144 * RFC3542 (effectively) deprecated the
2145 * semantics of the 2292-style pktoptions.
2146 * Since it was not reliable in nature (i.e.,
2147 * applications had to expect the lack of some
2148 * information after all), it would make sense
2149 * to simplify this part by always returning
2150 * empty data.
2151 */
2152 sopt->sopt_valsize = 0;
1c79356b
A
2153 break;
2154
6d2010ae
A
2155 case IPV6_RECVHOPOPTS:
2156 case IPV6_RECVDSTOPTS:
2157 case IPV6_RECVRTHDRDSTOPTS:
1c79356b 2158 case IPV6_UNICAST_HOPS:
6d2010ae
A
2159 case IPV6_RECVPKTINFO:
2160 case IPV6_RECVHOPLIMIT:
2161 case IPV6_RECVRTHDR:
2162 case IPV6_RECVPATHMTU:
1c79356b 2163
1c79356b 2164 case IPV6_FAITH:
9bccf70c 2165 case IPV6_V6ONLY:
1c79356b 2166 case IPV6_PORTRANGE:
b0d623f7 2167 case IPV6_RECVTCLASS:
6d2010ae 2168 case IPV6_AUTOFLOWLABEL:
1c79356b
A
2169 switch (optname) {
2170
6d2010ae
A
2171 case IPV6_RECVHOPOPTS:
2172 optval = OPTBIT(IN6P_HOPOPTS);
2173 break;
2174
2175 case IPV6_RECVDSTOPTS:
2176 optval = OPTBIT(IN6P_DSTOPTS);
2177 break;
2178
2179 case IPV6_RECVRTHDRDSTOPTS:
2180 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2181 break;
2182
1c79356b 2183 case IPV6_UNICAST_HOPS:
1c79356b 2184 optval = in6p->in6p_hops;
1c79356b
A
2185 break;
2186
6d2010ae
A
2187 case IPV6_RECVPKTINFO:
2188 optval = OPTBIT(IN6P_PKTINFO);
2189 break;
2190
2191 case IPV6_RECVHOPLIMIT:
2192 optval = OPTBIT(IN6P_HOPLIMIT);
2193 break;
2194
2195 case IPV6_RECVRTHDR:
2196 optval = OPTBIT(IN6P_RTHDR);
2197 break;
2198
2199 case IPV6_RECVPATHMTU:
2200 optval = OPTBIT(IN6P_MTU);
1c79356b
A
2201 break;
2202
2203 case IPV6_FAITH:
6d2010ae 2204 optval = OPTBIT(INP_FAITH);
1c79356b
A
2205 break;
2206
9bccf70c 2207 case IPV6_V6ONLY:
55e303ae 2208 optval = OPTBIT(IN6P_IPV6_V6ONLY);
1c79356b 2209 break;
1c79356b 2210
1c79356b
A
2211 case IPV6_PORTRANGE:
2212 {
2213 int flags;
6d2010ae
A
2214 flags = in6p->inp_flags;
2215 if (flags & INP_HIGHPORT)
1c79356b 2216 optval = IPV6_PORTRANGE_HIGH;
6d2010ae 2217 else if (flags & INP_LOWPORT)
1c79356b
A
2218 optval = IPV6_PORTRANGE_LOW;
2219 else
2220 optval = 0;
2221 break;
2222 }
b0d623f7
A
2223 case IPV6_RECVTCLASS:
2224 optval = OPTBIT(IN6P_TCLASS);
2225 break;
2226
6d2010ae
A
2227 case IPV6_AUTOFLOWLABEL:
2228 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2229 break;
1c79356b 2230 }
6d2010ae
A
2231 if (error)
2232 break;
1c79356b
A
2233 error = sooptcopyout(sopt, &optval,
2234 sizeof optval);
1c79356b
A
2235 break;
2236
6d2010ae
A
2237 case IPV6_PATHMTU:
2238 {
2239 u_int32_t pmtu = 0;
2240 struct ip6_mtuinfo mtuinfo;
2241 struct route_in6 sro;
2242
2243 bzero(&sro, sizeof(sro));
2244
2245 if (!(so->so_state & SS_ISCONNECTED))
2246 return (ENOTCONN);
2247 /*
2248 * XXX: we dot not consider the case of source
2249 * routing, or optional information to specify
2250 * the outgoing interface.
2251 */
2252 error = ip6_getpmtu(&sro, NULL, NULL,
2253 &in6p->in6p_faddr, &pmtu, NULL);
2254 if (sro.ro_rt)
2255 rtfree(sro.ro_rt);
2256 if (error)
2257 break;
2258 if (pmtu > IPV6_MAXPACKET)
2259 pmtu = IPV6_MAXPACKET;
2260
2261 bzero(&mtuinfo, sizeof(mtuinfo));
2262 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2263 optdata = (void *)&mtuinfo;
2264 optdatalen = sizeof(mtuinfo);
2265 error = sooptcopyout(sopt, optdata,
2266 optdatalen);
2267 break;
2268 }
2269
2270 case IPV6_2292PKTINFO:
2271 case IPV6_2292HOPLIMIT:
2272 case IPV6_2292HOPOPTS:
2273 case IPV6_2292RTHDR:
2274 case IPV6_2292DSTOPTS:
9bccf70c 2275 switch (optname) {
6d2010ae 2276 case IPV6_2292PKTINFO:
9bccf70c 2277 optval = OPTBIT(IN6P_PKTINFO);
1c79356b 2278 break;
6d2010ae 2279 case IPV6_2292HOPLIMIT:
1c79356b
A
2280 optval = OPTBIT(IN6P_HOPLIMIT);
2281 break;
6d2010ae 2282 case IPV6_2292HOPOPTS:
9bccf70c 2283 optval = OPTBIT(IN6P_HOPOPTS);
1c79356b 2284 break;
6d2010ae 2285 case IPV6_2292RTHDR:
9bccf70c 2286 optval = OPTBIT(IN6P_RTHDR);
1c79356b 2287 break;
6d2010ae 2288 case IPV6_2292DSTOPTS:
9bccf70c 2289 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
1c79356b 2290 break;
1c79356b 2291 }
1c79356b 2292 error = sooptcopyout(sopt, &optval,
6d2010ae 2293 sizeof optval);
1c79356b 2294 break;
6d2010ae
A
2295 case IPV6_PKTINFO:
2296 case IPV6_HOPOPTS:
2297 case IPV6_RTHDR:
2298 case IPV6_DSTOPTS:
2299 case IPV6_RTHDRDSTOPTS:
2300 case IPV6_NEXTHOP:
b0d623f7 2301 case IPV6_TCLASS:
6d2010ae
A
2302 case IPV6_DONTFRAG:
2303 case IPV6_USE_MIN_MTU:
2304 case IPV6_PREFER_TEMPADDR:
2305 error = ip6_getpcbopt(in6p->in6p_outputopts,
2306 optname, sopt);
b0d623f7
A
2307 break;
2308
1c79356b
A
2309 case IPV6_MULTICAST_IF:
2310 case IPV6_MULTICAST_HOPS:
2311 case IPV6_MULTICAST_LOOP:
6d2010ae
A
2312 case IPV6_MSFILTER:
2313 error = ip6_getmoptions(in6p, sopt);
1c79356b
A
2314 break;
2315
2316#if IPSEC
2317 case IPV6_IPSEC_POLICY:
2318 {
2319 caddr_t req = NULL;
2320 size_t len = 0;
1c79356b
A
2321 struct mbuf *m = NULL;
2322 struct mbuf **mp = &m;
2323
9bccf70c 2324 error = soopt_getm(sopt, &m); /* XXX */
2d21ac55 2325 if (error != 0)
1c79356b 2326 break;
9bccf70c 2327 error = soopt_mcopyin(sopt, m); /* XXX */
2d21ac55 2328 if (error != 0)
1c79356b 2329 break;
1c79356b
A
2330 if (m) {
2331 req = mtod(m, caddr_t);
2332 len = m->m_len;
2333 }
1c79356b 2334 error = ipsec6_get_policy(in6p, req, len, mp);
1c79356b 2335 if (error == 0)
9bccf70c
A
2336 error = soopt_mcopyout(sopt, m); /*XXX*/
2337 if (error == 0 && m)
2338 m_freem(m);
1c79356b
A
2339 break;
2340 }
9bccf70c 2341#endif /* KAME IPSEC */
1c79356b 2342
2d21ac55 2343#if IPFIREWALL
1c79356b 2344 case IPV6_FW_GET:
55e303ae 2345 {
060df5ea
A
2346 if (ip6_fw_ctl_ptr == NULL)
2347 load_ip6fw();
2348 if (ip6_fw_ctl_ptr != NULL)
2349 error = (*ip6_fw_ctl_ptr)(sopt);
2350 else
2351 return ENOPROTOOPT;
1c79356b 2352 }
1c79356b 2353 break;
2d21ac55 2354#endif /* IPFIREWALL */
1c79356b 2355
6d2010ae
A
2356 case IPV6_BOUND_IF:
2357 if (in6p->inp_flags & INP_BOUND_IF)
2358 optval = in6p->inp_boundif;
2359 error = sooptcopyout(sopt, &optval,
2360 sizeof (optval));
2361 break;
2362
2363 case IPV6_NO_IFT_CELLULAR:
2364 optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
2365 ? 1 : 0;
2366 error = sooptcopyout(sopt, &optval,
2367 sizeof (optval));
2368 break;
2369
2370 case IPV6_OUT_IF:
2371 optval = in6p->in6p_last_outif;
2372 error = sooptcopyout(sopt, &optval,
2373 sizeof (optval));
2374 break;
2375
1c79356b
A
2376 default:
2377 error = ENOPROTOOPT;
2378 break;
2379 }
2380 break;
2381 }
2382 } else {
2383 error = EINVAL;
1c79356b
A
2384 }
2385 return(error);
2386}
2387
6d2010ae
A
2388int
2389ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2390{
6d2010ae
A
2391 int error = 0, optval, optlen;
2392 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2393 struct inpcb *in6p = sotoinpcb(so);
2394 int level, op, optname;
1c79356b 2395
6d2010ae
A
2396 level = sopt->sopt_level;
2397 op = sopt->sopt_dir;
2398 optname = sopt->sopt_name;
2399 optlen = sopt->sopt_valsize;
2400
2401 if (level != IPPROTO_IPV6) {
2402 return (EINVAL);
2403 }
2404
2405 switch (optname) {
2406 case IPV6_CHECKSUM:
2407 /*
2408 * For ICMPv6 sockets, no modification allowed for checksum
2409 * offset, permit "no change" values to help existing apps.
2410 *
2411 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2412 * for an ICMPv6 socket will fail."
2413 * The current behavior does not meet RFC3542.
2414 */
2415 switch (op) {
2416 case SOPT_SET:
2417 if (optlen != sizeof(int)) {
2418 error = EINVAL;
2419 break;
2420 }
2421 error = sooptcopyin(sopt, &optval, sizeof(optval),
2422 sizeof(optval));
2423 if (error)
2424 break;
2425 if ((optval % 2) != 0) {
2426 /* the API assumes even offset values */
2427 error = EINVAL;
2428 } else if (so->so_proto->pr_protocol ==
2429 IPPROTO_ICMPV6) {
2430 if (optval != icmp6off)
2431 error = EINVAL;
2432 } else
2433 in6p->in6p_cksum = optval;
2434 break;
2435
2436 case SOPT_GET:
2437 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2438 optval = icmp6off;
2439 else
2440 optval = in6p->in6p_cksum;
2441
2442 error = sooptcopyout(sopt, &optval, sizeof(optval));
2443 break;
2444
2445 default:
2446 error = EINVAL;
2447 break;
2448 }
2449 break;
2450
2451 default:
2452 error = ENOPROTOOPT;
2453 break;
2454 }
2455
2456 return (error);
2457}
2458
2459/*
2460 * Set up IP6 options in pcb for insertion in output packets or
2461 * specifying behavior of outgoing packets.
2462 */
2463static int
2464ip6_pcbopts(
2465 struct ip6_pktopts **pktopt,
2466 struct mbuf *m,
2467 __unused struct socket *so,
2468 __unused struct sockopt *sopt)
2469{
2470 struct ip6_pktopts *opt = *pktopt;
2471 int error = 0;
2472
2473 /* turn off any old options. */
2474 if (opt) {
2475#if DIAGNOSTIC
2476 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2477 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2478 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2479 printf("ip6_pcbopts: all specified options are cleared.\n");
2480#endif
2481 ip6_clearpktopts(opt, -1);
2482 } else {
2483 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2484 if (opt == NULL)
2485 return ENOBUFS;
2486 }
1c79356b
A
2487 *pktopt = NULL;
2488
2489 if (!m || m->m_len == 0) {
2490 /*
55e303ae
A
2491 * Only turning off any previous options, regardless of
2492 * whether the opt is just created or given.
1c79356b
A
2493 */
2494 if (opt)
9bccf70c 2495 FREE(opt, M_IP6OPT);
1c79356b
A
2496 return(0);
2497 }
2498
2499 /* set options specified by user. */
6d2010ae
A
2500 if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) {
2501 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
55e303ae 2502 FREE(opt, M_IP6OPT);
1c79356b
A
2503 return(error);
2504 }
2505 *pktopt = opt;
2506 return(0);
2507}
2508
6d2010ae
A
2509/*
2510 * initialize ip6_pktopts. beware that there are non-zero default values in
2511 * the struct.
2512 */
2513void
2514ip6_initpktopts(struct ip6_pktopts *opt)
2515{
2516
2517 bzero(opt, sizeof(*opt));
2518 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2519 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2520 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2521 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2522}
2523
b0d623f7 2524static int
6d2010ae 2525ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto)
b0d623f7
A
2526{
2527 struct ip6_pktopts *opt;
2528
2529 opt = *pktopt;
2530 if (opt == NULL) {
2531 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
6d2010ae
A
2532 if (opt == NULL)
2533 return(ENOBUFS);
b0d623f7
A
2534 ip6_initpktopts(opt);
2535 *pktopt = opt;
2536 }
2537
6d2010ae 2538 return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
b0d623f7
A
2539}
2540
2541static int
2542ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2543{
2544 void *optdata = NULL;
2545 int optdatalen = 0;
6d2010ae 2546 struct ip6_ext *ip6e;
b0d623f7 2547 int error = 0;
6d2010ae
A
2548 struct in6_pktinfo null_pktinfo;
2549 int deftclass = 0, on;
2550 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2551 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
b0d623f7
A
2552
2553 switch (optname) {
6d2010ae
A
2554 case IPV6_PKTINFO:
2555 if (pktopt && pktopt->ip6po_pktinfo)
2556 optdata = (void *)pktopt->ip6po_pktinfo;
2557 else {
2558 /* XXX: we don't have to do this every time... */
2559 bzero(&null_pktinfo, sizeof(null_pktinfo));
2560 optdata = (void *)&null_pktinfo;
2561 }
2562 optdatalen = sizeof(struct in6_pktinfo);
2563 break;
b0d623f7
A
2564 case IPV6_TCLASS:
2565 if (pktopt && pktopt->ip6po_tclass >= 0)
6d2010ae
A
2566 optdata = (void *)&pktopt->ip6po_tclass;
2567 else
2568 optdata = (void *)&deftclass;
2569 optdatalen = sizeof(int);
2570 break;
2571 case IPV6_HOPOPTS:
2572 if (pktopt && pktopt->ip6po_hbh) {
2573 optdata = (void *)pktopt->ip6po_hbh;
2574 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2575 optdatalen = (ip6e->ip6e_len + 1) << 3;
2576 }
2577 break;
2578 case IPV6_RTHDR:
2579 if (pktopt && pktopt->ip6po_rthdr) {
2580 optdata = (void *)pktopt->ip6po_rthdr;
2581 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2582 optdatalen = (ip6e->ip6e_len + 1) << 3;
2583 }
2584 break;
2585 case IPV6_RTHDRDSTOPTS:
2586 if (pktopt && pktopt->ip6po_dest1) {
2587 optdata = (void *)pktopt->ip6po_dest1;
2588 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2589 optdatalen = (ip6e->ip6e_len + 1) << 3;
2590 }
2591 break;
2592 case IPV6_DSTOPTS:
2593 if (pktopt && pktopt->ip6po_dest2) {
2594 optdata = (void *)pktopt->ip6po_dest2;
2595 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2596 optdatalen = (ip6e->ip6e_len + 1) << 3;
2597 }
2598 break;
2599 case IPV6_NEXTHOP:
2600 if (pktopt && pktopt->ip6po_nexthop) {
2601 optdata = (void *)pktopt->ip6po_nexthop;
2602 optdatalen = pktopt->ip6po_nexthop->sa_len;
2603 }
2604 break;
2605 case IPV6_USE_MIN_MTU:
2606 if (pktopt)
2607 optdata = (void *)&pktopt->ip6po_minmtu;
b0d623f7 2608 else
6d2010ae
A
2609 optdata = (void *)&defminmtu;
2610 optdatalen = sizeof(int);
2611 break;
2612 case IPV6_DONTFRAG:
2613 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2614 on = 1;
2615 else
2616 on = 0;
2617 optdata = (void *)&on;
2618 optdatalen = sizeof(on);
2619 break;
2620 case IPV6_PREFER_TEMPADDR:
2621 if (pktopt)
2622 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2623 else
2624 optdata = (void *)&defpreftemp;
b0d623f7
A
2625 optdatalen = sizeof(int);
2626 break;
2627 default: /* should not happen */
2628#ifdef DIAGNOSTIC
2629 panic("ip6_getpcbopt: unexpected option\n");
2630#endif
2631 return (ENOPROTOOPT);
2632 }
2633
2634 error = sooptcopyout(sopt, optdata, optdatalen);
b0d623f7 2635
6d2010ae 2636 return (error);
1c79356b
A
2637}
2638
2639void
6d2010ae 2640ip6_clearpktopts(pktopt, optname)
1c79356b 2641 struct ip6_pktopts *pktopt;
6d2010ae 2642 int optname;
1c79356b
A
2643{
2644 if (pktopt == NULL)
2645 return;
2646
6d2010ae
A
2647 if (optname == -1 || optname == IPV6_PKTINFO) {
2648 if (pktopt->ip6po_pktinfo)
9bccf70c 2649 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
1c79356b
A
2650 pktopt->ip6po_pktinfo = NULL;
2651 }
6d2010ae 2652 if (optname == -1 || optname == IPV6_HOPLIMIT)
1c79356b 2653 pktopt->ip6po_hlim = -1;
6d2010ae 2654 if (optname == -1 || optname == IPV6_TCLASS)
b0d623f7 2655 pktopt->ip6po_tclass = -1;
6d2010ae
A
2656 if (optname == -1 || optname == IPV6_NEXTHOP) {
2657 if (pktopt->ip6po_nextroute.ro_rt) {
2658 rtfree(pktopt->ip6po_nextroute.ro_rt);
2659 pktopt->ip6po_nextroute.ro_rt = NULL;
2660 }
2661 if (pktopt->ip6po_nexthop)
9bccf70c 2662 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
1c79356b
A
2663 pktopt->ip6po_nexthop = NULL;
2664 }
6d2010ae
A
2665 if (optname == -1 || optname == IPV6_HOPOPTS) {
2666 if (pktopt->ip6po_hbh)
9bccf70c 2667 FREE(pktopt->ip6po_hbh, M_IP6OPT);
1c79356b
A
2668 pktopt->ip6po_hbh = NULL;
2669 }
6d2010ae
A
2670 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2671 if (pktopt->ip6po_dest1)
9bccf70c 2672 FREE(pktopt->ip6po_dest1, M_IP6OPT);
1c79356b
A
2673 pktopt->ip6po_dest1 = NULL;
2674 }
6d2010ae
A
2675 if (optname == -1 || optname == IPV6_RTHDR) {
2676 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
9bccf70c 2677 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
1c79356b
A
2678 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2679 if (pktopt->ip6po_route.ro_rt) {
9bccf70c 2680 rtfree(pktopt->ip6po_route.ro_rt);
1c79356b
A
2681 pktopt->ip6po_route.ro_rt = NULL;
2682 }
2683 }
6d2010ae
A
2684 if (optname == -1 || optname == IPV6_DSTOPTS) {
2685 if (pktopt->ip6po_dest2)
9bccf70c 2686 FREE(pktopt->ip6po_dest2, M_IP6OPT);
1c79356b
A
2687 pktopt->ip6po_dest2 = NULL;
2688 }
2689}
2690
9bccf70c
A
2691#define PKTOPT_EXTHDRCPY(type) \
2692do {\
2693 if (src->type) {\
1c79356b
A
2694 int hlen =\
2695 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2696 dst->type = _MALLOC(hlen, M_IP6OPT, canwait);\
2697 if (dst->type == NULL && canwait == M_NOWAIT)\
2698 goto bad;\
2699 bcopy(src->type, dst->type, hlen);\
9bccf70c
A
2700 }\
2701} while (0)
1c79356b 2702
6d2010ae
A
2703static int
2704copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
1c79356b 2705{
6d2010ae 2706 if (dst == NULL || src == NULL) {
1c79356b 2707 printf("ip6_clearpktopts: invalid argument\n");
6d2010ae 2708 return (EINVAL);
1c79356b
A
2709 }
2710
1c79356b 2711 dst->ip6po_hlim = src->ip6po_hlim;
b0d623f7 2712 dst->ip6po_tclass = src->ip6po_tclass;
6d2010ae 2713 dst->ip6po_flags = src->ip6po_flags;
1c79356b
A
2714 if (src->ip6po_pktinfo) {
2715 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
2716 M_IP6OPT, canwait);
2717 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2718 goto bad;
2719 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2720 }
2721 if (src->ip6po_nexthop) {
2722 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
2723 M_IP6OPT, canwait);
2724 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2725 goto bad;
2726 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2727 src->ip6po_nexthop->sa_len);
2728 }
2729 PKTOPT_EXTHDRCPY(ip6po_hbh);
2730 PKTOPT_EXTHDRCPY(ip6po_dest1);
2731 PKTOPT_EXTHDRCPY(ip6po_dest2);
2732 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
6d2010ae 2733 return (0);
1c79356b
A
2734
2735 bad:
6d2010ae
A
2736 ip6_clearpktopts(dst, -1);
2737 return (ENOBUFS);
1c79356b
A
2738}
2739#undef PKTOPT_EXTHDRCPY
2740
6d2010ae
A
2741struct ip6_pktopts *
2742ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2743{
2744 int error;
2745 struct ip6_pktopts *dst;
2746
2747 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
2748 if (dst == NULL)
2749 return (NULL);
2750 ip6_initpktopts(dst);
2751
2752 if ((error = copypktopts(dst, src, canwait)) != 0) {
2753 FREE(dst, M_IP6OPT);
2754 return (NULL);
2755 }
2756
2757 return (dst);
2758}
2759
1c79356b
A
2760void
2761ip6_freepcbopts(pktopt)
2762 struct ip6_pktopts *pktopt;
2763{
2764 if (pktopt == NULL)
2765 return;
2766
6d2010ae 2767 ip6_clearpktopts(pktopt, -1);
1c79356b 2768
9bccf70c 2769 FREE(pktopt, M_IP6OPT);
1c79356b
A
2770}
2771
6d2010ae
A
2772void
2773ip6_moptions_init(void)
1c79356b 2774{
6d2010ae 2775 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
1c79356b 2776
6d2010ae
A
2777 im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
2778 sizeof (struct ip6_moptions_dbg);
2779
2780 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
2781 IM6O_ZONE_NAME);
2782 if (im6o_zone == NULL) {
2783 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
2784 /* NOTREACHED */
1c79356b 2785 }
6d2010ae
A
2786 zone_change(im6o_zone, Z_EXPAND, TRUE);
2787}
1c79356b 2788
6d2010ae
A
2789void
2790im6o_addref(struct ip6_moptions *im6o, int locked)
2791{
2792 if (!locked)
2793 IM6O_LOCK(im6o);
2794 else
2795 IM6O_LOCK_ASSERT_HELD(im6o);
1c79356b 2796
6d2010ae
A
2797 if (++im6o->im6o_refcnt == 0) {
2798 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
2799 /* NOTREACHED */
2800 } else if (im6o->im6o_trace != NULL) {
2801 (*im6o->im6o_trace)(im6o, TRUE);
2802 }
2803
2804 if (!locked)
2805 IM6O_UNLOCK(im6o);
1c79356b
A
2806}
2807
6d2010ae
A
2808void
2809im6o_remref(struct ip6_moptions *im6o)
1c79356b 2810{
6d2010ae 2811 int i;
1c79356b 2812
6d2010ae
A
2813 IM6O_LOCK(im6o);
2814 if (im6o->im6o_refcnt == 0) {
2815 panic("%s: im6o %p negative refcnt", __func__, im6o);
2816 /* NOTREACHED */
2817 } else if (im6o->im6o_trace != NULL) {
2818 (*im6o->im6o_trace)(im6o, FALSE);
2819 }
1c79356b 2820
6d2010ae
A
2821 --im6o->im6o_refcnt;
2822 if (im6o->im6o_refcnt > 0) {
2823 IM6O_UNLOCK(im6o);
2824 return;
2825 }
1c79356b 2826
6d2010ae
A
2827 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
2828 struct in6_mfilter *imf;
1c79356b 2829
6d2010ae
A
2830 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
2831 if (imf != NULL)
2832 im6f_leave(imf);
1c79356b 2833
6d2010ae 2834 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
1c79356b 2835
6d2010ae
A
2836 if (imf != NULL)
2837 im6f_purge(imf);
2838
2839 IN6M_REMREF(im6o->im6o_membership[i]);
2840 im6o->im6o_membership[i] = NULL;
2841 }
2842 im6o->im6o_num_memberships = 0;
2843 if (im6o->im6o_mfilters != NULL) {
2844 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
2845 im6o->im6o_mfilters = NULL;
2846 }
2847 if (im6o->im6o_membership != NULL) {
2848 FREE(im6o->im6o_membership, M_IP6MOPTS);
2849 im6o->im6o_membership = NULL;
2850 }
2851 IM6O_UNLOCK(im6o);
2852
2853 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
2854
2855 if (!(im6o->im6o_debug & IFD_ALLOC)) {
2856 panic("%s: im6o %p cannot be freed", __func__, im6o);
2857 /* NOTREACHED */
1c79356b 2858 }
6d2010ae 2859 zfree(im6o_zone, im6o);
1c79356b
A
2860}
2861
6d2010ae
A
2862static void
2863im6o_trace(struct ip6_moptions *im6o, int refhold)
1c79356b 2864{
6d2010ae
A
2865 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
2866 ctrace_t *tr;
2867 u_int32_t idx;
2868 u_int16_t *cnt;
1c79356b 2869
6d2010ae
A
2870 if (!(im6o->im6o_debug & IFD_DEBUG)) {
2871 panic("%s: im6o %p has no debug structure", __func__, im6o);
2872 /* NOTREACHED */
2873 }
2874 if (refhold) {
2875 cnt = &im6o_dbg->im6o_refhold_cnt;
2876 tr = im6o_dbg->im6o_refhold;
2877 } else {
2878 cnt = &im6o_dbg->im6o_refrele_cnt;
2879 tr = im6o_dbg->im6o_refrele;
2880 }
2881
2882 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
2883 ctrace_record(&tr[idx]);
2884}
2885
2886struct ip6_moptions *
2887ip6_allocmoptions(int how)
2888{
2889 struct ip6_moptions *im6o;
2890
2891 im6o = (how == M_WAITOK) ?
2892 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
2893 if (im6o != NULL) {
2894 bzero(im6o, im6o_size);
2895 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
2896 im6o->im6o_debug |= IFD_ALLOC;
2897 if (im6o_debug != 0) {
2898 im6o->im6o_debug |= IFD_DEBUG;
2899 im6o->im6o_trace = im6o_trace;
2900 }
2901 IM6O_ADDREF(im6o);
2902 }
2903
2904 return (im6o);
1c79356b
A
2905}
2906
2907/*
2908 * Set IPv6 outgoing packet options based on advanced API.
2909 */
2910int
6d2010ae
A
2911ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2912 struct ip6_pktopts *stickyopt, int uproto)
1c79356b 2913{
9bccf70c 2914 struct cmsghdr *cm = 0;
1c79356b 2915
6d2010ae
A
2916 if (control == NULL || opt == NULL)
2917 return (EINVAL);
1c79356b 2918
b0d623f7 2919 ip6_initpktopts(opt);
6d2010ae
A
2920 if (stickyopt) {
2921 int error;
2922
2923 /*
2924 * If stickyopt is provided, make a local copy of the options
2925 * for this particular packet, then override them by ancillary
2926 * objects.
2927 * XXX: copypktopts() does not copy the cached route to a next
2928 * hop (if any). This is not very good in terms of efficiency,
2929 * but we can allow this since this option should be rarely
2930 * used.
2931 */
2932 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2933 return (error);
2934 }
1c79356b
A
2935
2936 /*
2937 * XXX: Currently, we assume all the optional information is stored
2938 * in a single mbuf.
2939 */
2940 if (control->m_next)
6d2010ae 2941 return (EINVAL);
1c79356b 2942
6d2010ae
A
2943 if (control->m_len < CMSG_LEN(0))
2944 return (EINVAL);
2945
2946 for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
2947 int error;
2948
2949 if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
2950 return (EINVAL);
1c79356b
A
2951 if (cm->cmsg_level != IPPROTO_IPV6)
2952 continue;
2953
6d2010ae
A
2954 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2955 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
2956 if (error)
2957 return (error);
2958 }
2959
2960 return (0);
2961}
2962/*
2963 * Set a particular packet option, as a sticky option or an ancillary data
2964 * item. "len" can be 0 only when it's a sticky option.
2965 * We have 4 cases of combination of "sticky" and "cmsg":
2966 * "sticky=0, cmsg=0": impossible
2967 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2968 * "sticky=1, cmsg=0": RFC3542 socket option
2969 * "sticky=1, cmsg=1": RFC2292 socket option
2970 */
2971static int
2972ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2973 int sticky, int cmsg, int uproto)
2974{
2975 int minmtupolicy, preftemp;
2976 int error;
2977
2978 if (!sticky && !cmsg) {
2979#ifdef DIAGNOSTIC
2980 printf("ip6_setpktopt: impossible case\n");
2981#endif
2982 return (EINVAL);
2983 }
2984
2985 /*
2986 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2987 * not be specified in the context of RFC3542. Conversely,
2988 * RFC3542 types should not be specified in the context of RFC2292.
2989 */
2990 if (!cmsg) {
2991 switch (optname) {
2992 case IPV6_2292PKTINFO:
2993 case IPV6_2292HOPLIMIT:
2994 case IPV6_2292NEXTHOP:
2995 case IPV6_2292HOPOPTS:
2996 case IPV6_2292DSTOPTS:
2997 case IPV6_2292RTHDR:
2998 case IPV6_2292PKTOPTIONS:
2999 return (ENOPROTOOPT);
3000 }
3001 }
3002 if (sticky && cmsg) {
3003 switch (optname) {
3004 case IPV6_PKTINFO:
3005 case IPV6_HOPLIMIT:
3006 case IPV6_NEXTHOP:
3007 case IPV6_HOPOPTS:
3008 case IPV6_DSTOPTS:
3009 case IPV6_RTHDRDSTOPTS:
3010 case IPV6_RTHDR:
3011 case IPV6_USE_MIN_MTU:
3012 case IPV6_DONTFRAG:
3013 case IPV6_TCLASS:
3014 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3015 return (ENOPROTOOPT);
3016 }
3017 }
3018
3019 switch (optname) {
3020 case IPV6_2292PKTINFO:
3021 case IPV6_PKTINFO:
3022 {
3023 struct ifnet *ifp = NULL;
3024 struct in6_pktinfo *pktinfo;
3025
3026 if (len != sizeof(struct in6_pktinfo))
3027 return (EINVAL);
3028
3029 pktinfo = (struct in6_pktinfo *)buf;
3030
9bccf70c 3031 /*
6d2010ae
A
3032 * An application can clear any sticky IPV6_PKTINFO option by
3033 * doing a "regular" setsockopt with ipi6_addr being
3034 * in6addr_any and ipi6_ifindex being zero.
3035 * [RFC 3542, Section 6]
9bccf70c 3036 */
6d2010ae
A
3037 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3038 pktinfo->ipi6_ifindex == 0 &&
3039 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3040 ip6_clearpktopts(opt, optname);
3041 break;
3042 }
1c79356b 3043
6d2010ae
A
3044 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3045 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3046 return (EINVAL);
3047 }
3048
3049 /* validate the interface index if specified. */
3050 ifnet_head_lock_shared();
3051
3052 if (pktinfo->ipi6_ifindex > if_index) {
3053 ifnet_head_done();
3054 return (ENXIO);
3055 }
3056
3057 if (pktinfo->ipi6_ifindex) {
3058 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3059 if (ifp == NULL) {
3060 ifnet_head_done();
3061 return (ENXIO);
1c79356b 3062 }
6d2010ae
A
3063 }
3064
3065 ifnet_head_done();
1c79356b 3066
6d2010ae
A
3067 /*
3068 * We store the address anyway, and let in6_selectsrc()
3069 * validate the specified address. This is because ipi6_addr
3070 * may not have enough information about its scope zone, and
3071 * we may need additional information (such as outgoing
3072 * interface or the scope zone of a destination address) to
3073 * disambiguate the scope.
3074 * XXX: the delay of the validation may confuse the
3075 * application when it is used as a sticky option.
3076 */
3077 if (opt->ip6po_pktinfo == NULL) {
3078 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
3079 M_IP6OPT, M_NOWAIT);
3080 if (opt->ip6po_pktinfo == NULL)
3081 return (ENOBUFS);
3082 }
3083 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3084 break;
3085 }
1c79356b 3086
6d2010ae
A
3087 case IPV6_2292HOPLIMIT:
3088 case IPV6_HOPLIMIT:
3089 {
3090 int *hlimp;
1c79356b 3091
6d2010ae
A
3092 /*
3093 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3094 * to simplify the ordering among hoplimit options.
3095 */
3096 if (optname == IPV6_HOPLIMIT && sticky)
3097 return (ENOPROTOOPT);
b0d623f7 3098
6d2010ae
A
3099 if (len != sizeof(int))
3100 return (EINVAL);
3101 hlimp = (int *)buf;
3102 if (*hlimp < -1 || *hlimp > 255)
3103 return (EINVAL);
b0d623f7 3104
6d2010ae
A
3105 opt->ip6po_hlim = *hlimp;
3106 break;
3107 }
3108
3109 case IPV6_TCLASS:
3110 {
3111 int tclass;
3112
3113 if (len != sizeof(int))
3114 return (EINVAL);
3115 tclass = *(int *)buf;
3116 if (tclass < -1 || tclass > 255)
3117 return (EINVAL);
3118
3119 opt->ip6po_tclass = tclass;
3120 break;
3121 }
3122
3123 case IPV6_2292NEXTHOP:
3124 case IPV6_NEXTHOP:
3125 error = suser(kauth_cred_get(), 0);
3126 if (error)
3127 return (EACCES);
3128
3129 if (len == 0) { /* just remove the option */
3130 ip6_clearpktopts(opt, IPV6_NEXTHOP);
1c79356b 3131 break;
6d2010ae 3132 }
1c79356b 3133
6d2010ae
A
3134 /* check if cmsg_len is large enough for sa_len */
3135 if (len < sizeof(struct sockaddr) || len < *buf)
3136 return (EINVAL);
3137
3138 switch (((struct sockaddr *)buf)->sa_family) {
3139 case AF_INET6:
1c79356b 3140 {
6d2010ae
A
3141 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3142
3143 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3144 return (EINVAL);
3145
3146 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3147 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3148 return (EINVAL);
3149 }
3150 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3151 != 0) {
3152 return (error);
3153 }
1c79356b
A
3154 break;
3155 }
6d2010ae
A
3156 case AF_LINK: /* should eventually be supported */
3157 default:
3158 return (EAFNOSUPPORT);
3159 }
1c79356b 3160
6d2010ae
A
3161 /* turn off the previous option, then set the new option. */
3162 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3163 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3164 if (opt->ip6po_nexthop == NULL)
3165 return (ENOBUFS);
3166 bcopy(buf, opt->ip6po_nexthop, *buf);
3167 break;
3168
3169 case IPV6_2292HOPOPTS:
3170 case IPV6_HOPOPTS:
3171 {
3172 struct ip6_hbh *hbh;
3173 int hbhlen;
3174
3175 /*
3176 * XXX: We don't allow a non-privileged user to set ANY HbH
3177 * options, since per-option restriction has too much
3178 * overhead.
3179 */
3180 error = suser(kauth_cred_get(), 0);
3181 if (error)
3182 return (EACCES);
3183
3184 if (len == 0) {
3185 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3186 break; /* just remove the option */
3187 }
3188
3189 /* message length validation */
3190 if (len < sizeof(struct ip6_hbh))
3191 return (EINVAL);
3192 hbh = (struct ip6_hbh *)buf;
3193 hbhlen = (hbh->ip6h_len + 1) << 3;
3194 if (len != hbhlen)
3195 return (EINVAL);
3196
3197 /* turn off the previous option, then set the new option. */
3198 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3199 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3200 if (opt->ip6po_hbh == NULL)
3201 return (ENOBUFS);
3202 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3203
3204 break;
3205 }
3206
3207 case IPV6_2292DSTOPTS:
3208 case IPV6_DSTOPTS:
3209 case IPV6_RTHDRDSTOPTS:
3210 {
3211 struct ip6_dest *dest, **newdest = NULL;
3212 int destlen;
3213
3214 error = suser(kauth_cred_get(), 0);
3215 if (error)
3216 return (EACCES);
3217
3218 if (len == 0) {
3219 ip6_clearpktopts(opt, optname);
3220 break; /* just remove the option */
3221 }
3222
3223 /* message length validation */
3224 if (len < sizeof(struct ip6_dest))
3225 return (EINVAL);
3226 dest = (struct ip6_dest *)buf;
3227 destlen = (dest->ip6d_len + 1) << 3;
3228 if (len != destlen)
3229 return (EINVAL);
3230
3231 /*
3232 * Determine the position that the destination options header
3233 * should be inserted; before or after the routing header.
3234 */
3235 switch (optname) {
3236 case IPV6_2292DSTOPTS:
3237 /*
3238 * The old advacned API is ambiguous on this point.
3239 * Our approach is to determine the position based
3240 * according to the existence of a routing header.
3241 * Note, however, that this depends on the order of the
3242 * extension headers in the ancillary data; the 1st
3243 * part of the destination options header must appear
3244 * before the routing header in the ancillary data,
3245 * too.
3246 * RFC3542 solved the ambiguity by introducing
3247 * separate ancillary data or option types.
1c79356b 3248 */
9bccf70c
A
3249 if (opt->ip6po_rthdr == NULL)
3250 newdest = &opt->ip6po_dest1;
3251 else
3252 newdest = &opt->ip6po_dest2;
6d2010ae
A
3253 break;
3254 case IPV6_RTHDRDSTOPTS:
3255 newdest = &opt->ip6po_dest1;
3256 break;
3257 case IPV6_DSTOPTS:
3258 newdest = &opt->ip6po_dest2;
1c79356b
A
3259 break;
3260 }
3261
6d2010ae
A
3262 /* turn off the previous option, then set the new option. */
3263 ip6_clearpktopts(opt, optname);
3264 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3265 if (*newdest == NULL)
3266 return (ENOBUFS);
3267 bcopy(dest, *newdest, destlen);
1c79356b 3268
6d2010ae
A
3269 break;
3270 }
1c79356b 3271
6d2010ae
A
3272 case IPV6_2292RTHDR:
3273 case IPV6_RTHDR:
3274 {
3275 struct ip6_rthdr *rth;
3276 int rthlen;
3277
3278 if (len == 0) {
3279 ip6_clearpktopts(opt, IPV6_RTHDR);
3280 break; /* just remove the option */
1c79356b
A
3281 }
3282
6d2010ae
A
3283 /* message length validation */
3284 if (len < sizeof(struct ip6_rthdr))
3285 return (EINVAL);
3286 rth = (struct ip6_rthdr *)buf;
3287 rthlen = (rth->ip6r_len + 1) << 3;
3288 if (len != rthlen)
3289 return (EINVAL);
3290
3291 switch (rth->ip6r_type) {
3292 case IPV6_RTHDR_TYPE_0:
3293 if (rth->ip6r_len == 0) /* must contain one addr */
3294 return (EINVAL);
3295 if (rth->ip6r_len % 2) /* length must be even */
3296 return (EINVAL);
3297 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3298 return (EINVAL);
3299 break;
1c79356b 3300 default:
6d2010ae 3301 return (EINVAL); /* not supported */
1c79356b 3302 }
6d2010ae
A
3303
3304 /* turn off the previous option */
3305 ip6_clearpktopts(opt, IPV6_RTHDR);
3306 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3307 if (opt->ip6po_rthdr == NULL)
3308 return (ENOBUFS);
3309 bcopy(rth, opt->ip6po_rthdr, rthlen);
3310
3311 break;
1c79356b
A
3312 }
3313
6d2010ae
A
3314 case IPV6_USE_MIN_MTU:
3315 if (len != sizeof(int))
3316 return (EINVAL);
3317 minmtupolicy = *(int *)buf;
3318 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3319 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3320 minmtupolicy != IP6PO_MINMTU_ALL) {
3321 return (EINVAL);
3322 }
3323 opt->ip6po_minmtu = minmtupolicy;
3324 break;
3325
3326 case IPV6_DONTFRAG:
3327 if (len != sizeof(int))
3328 return (EINVAL);
3329
3330 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3331 /*
3332 * we ignore this option for TCP sockets.
3333 * (RFC3542 leaves this case unspecified.)
3334 */
3335 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3336 } else
3337 opt->ip6po_flags |= IP6PO_DONTFRAG;
3338 break;
3339
3340 case IPV6_PREFER_TEMPADDR:
3341 if (len != sizeof(int))
3342 return (EINVAL);
3343 preftemp = *(int *)buf;
3344 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3345 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3346 preftemp != IP6PO_TEMPADDR_PREFER) {
3347 return (EINVAL);
3348 }
3349 opt->ip6po_prefer_tempaddr = preftemp;
3350 break;
3351
3352 default:
3353 return (ENOPROTOOPT);
3354 } /* end of switch */
3355
3356 return (0);
1c79356b
A
3357}
3358
3359/*
3360 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3361 * packet to the input queue of a specified interface. Note that this
3362 * calls the output routine of the loopback "driver", but with an interface
3363 * pointer that might NOT be &loif -- easier than replicating that code here.
3364 */
3365void
91447636
A
3366ip6_mloopback(
3367 struct ifnet *ifp,
3368 struct mbuf *m,
3369 struct sockaddr_in6 *dst)
1c79356b 3370{
9bccf70c
A
3371 struct mbuf *copym;
3372 struct ip6_hdr *ip6;
1c79356b
A
3373
3374 copym = m_copy(m, 0, M_COPYALL);
9bccf70c
A
3375 if (copym == NULL)
3376 return;
3377
3378 /*
3379 * Make sure to deep-copy IPv6 header portion in case the data
3380 * is in an mbuf cluster, so that we can safely override the IPv6
3381 * header portion later.
3382 */
3383 if ((copym->m_flags & M_EXT) != 0 ||
3384 copym->m_len < sizeof(struct ip6_hdr)) {
3385 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3386 if (copym == NULL)
3387 return;
3388 }
3389
3390#if DIAGNOSTIC
3391 if (copym->m_len < sizeof(*ip6)) {
3392 m_freem(copym);
3393 return;
3394 }
3395#endif
3396
3397 ip6 = mtod(copym, struct ip6_hdr *);
9bccf70c
A
3398 /*
3399 * clear embedded scope identifiers if necessary.
3400 * in6_clearscope will touch the addresses only when necessary.
3401 */
3402 in6_clearscope(&ip6->ip6_src);
3403 in6_clearscope(&ip6->ip6_dst);
9bccf70c 3404
1c79356b 3405#ifdef __APPLE__
9bccf70c
A
3406
3407 /* Makes sure the HW checksum flags are cleaned before sending the packet */
3408
6d2010ae
A
3409 if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
3410 in6_delayed_cksum(copym, sizeof(struct ip6_hdr));
3411 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
3412 }
91447636 3413 copym->m_pkthdr.rcvif = 0;
9bccf70c
A
3414 copym->m_pkthdr.csum_data = 0;
3415 copym->m_pkthdr.csum_flags = 0;
3416
91447636 3417 if (lo_ifp) {
55e303ae 3418 copym->m_pkthdr.rcvif = ifp;
91447636 3419 dlil_output(lo_ifp, PF_INET6, copym, 0, (struct sockaddr *)dst, 0);
55e303ae 3420 } else
9bccf70c 3421 m_free(copym);
1c79356b 3422#else
55e303ae 3423 (void)if_simloop(ifp, copym, dst->sin6_family, NULL);
1c79356b 3424#endif
1c79356b
A
3425}
3426
3427/*
3428 * Chop IPv6 header off from the payload.
3429 */
3430static int
3431ip6_splithdr(m, exthdrs)
3432 struct mbuf *m;
3433 struct ip6_exthdrs *exthdrs;
3434{
3435 struct mbuf *mh;
3436 struct ip6_hdr *ip6;
3437
3438 ip6 = mtod(m, struct ip6_hdr *);
3439 if (m->m_len > sizeof(*ip6)) {
2d21ac55 3440 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1c79356b
A
3441 if (mh == 0) {
3442 m_freem(m);
3443 return ENOBUFS;
3444 }
3445 M_COPY_PKTHDR(mh, m);
3446 MH_ALIGN(mh, sizeof(*ip6));
3447 m->m_flags &= ~M_PKTHDR;
3448 m->m_len -= sizeof(*ip6);
3449 m->m_data += sizeof(*ip6);
3450 mh->m_next = m;
3451 m = mh;
3452 m->m_len = sizeof(*ip6);
3453 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3454 }
3455 exthdrs->ip6e_ip6 = m;
3456 return 0;
3457}
3458
3459/*
3460 * Compute IPv6 extension header length.
3461 */
1c79356b
A
3462int
3463ip6_optlen(in6p)
3464 struct in6pcb *in6p;
3465{
3466 int len;
3467
3468 if (!in6p->in6p_outputopts)
3469 return 0;
3470
3471 len = 0;
3472#define elen(x) \
6d2010ae 3473 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
1c79356b
A
3474
3475 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3476 if (in6p->in6p_outputopts->ip6po_rthdr)
3477 /* dest1 is valid with rthdr only */
3478 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3479 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3480 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3481 return len;
3482#undef elen
3483}