]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/ip6_output.c
d96db58f7c632bbc3b81ec8ec851dbb713269f4e
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $ */
30 /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * Copyright (c) 1982, 1986, 1988, 1990, 1993
63 * The Regents of the University of California. All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 * notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 * notice, this list of conditions and the following disclaimer in the
72 * documentation and/or other materials provided with the distribution.
73 * 3. All advertising materials mentioning features or use of this software
74 * must display the following acknowledgement:
75 * This product includes software developed by the University of
76 * California, Berkeley and its contributors.
77 * 4. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
94 */
95 /*
96 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
97 * support for mandatory and extensible security protections. This notice
98 * is included in support of clause 2.2 (b) of the Apple Public License,
99 * Version 2.0.
100 */
101
102 #include <sys/param.h>
103 #include <sys/malloc.h>
104 #include <sys/mbuf.h>
105 #include <sys/errno.h>
106 #include <sys/protosw.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/systm.h>
110 #include <sys/kernel.h>
111 #include <sys/proc.h>
112 #include <sys/kauth.h>
113 #include <sys/mcache.h>
114 #include <sys/sysctl.h>
115 #include <kern/zalloc.h>
116
117 #include <pexpert/pexpert.h>
118
119 #include <net/if.h>
120 #include <net/route.h>
121 #include <net/dlil.h>
122
123 #include <netinet/in.h>
124 #include <netinet/in_var.h>
125 #include <netinet/ip_var.h>
126 #include <netinet6/in6_var.h>
127 #include <netinet/ip6.h>
128 #include <netinet6/ip6protosw.h>
129 #include <netinet/icmp6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet/in_pcb.h>
132 #include <netinet6/nd6.h>
133 #include <netinet6/scope6_var.h>
134 #include <mach/sdt.h>
135
136 #if IPSEC
137 #include <netinet6/ipsec.h>
138 #if INET6
139 #include <netinet6/ipsec6.h>
140 #endif
141 #include <netkey/key.h>
142 extern int ipsec_bypass;
143 #endif /* IPSEC */
144
145 #if CONFIG_MACF_NET
146 #include <security/mac.h>
147 #endif /* MAC_NET */
148
149 #include <netinet6/ip6_fw.h>
150
151 #if DUMMYNET
152 #include <netinet/ip_fw.h>
153 #include <netinet/ip_dummynet.h>
154 #endif /* DUMMYNET */
155
156 #include <net/net_osdep.h>
157
158 #include <netinet/kpi_ipfilter_var.h>
159
160 #if PF
161 #include <net/pfvar.h>
162 #endif /* PF */
163
164 #ifndef __APPLE__
165 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
166 #endif
167
168 int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
169 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
170 struct socket *, struct sockopt *sopt);
171 static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto);
172 static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt);
173 static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto);
174 static void im6o_trace(struct ip6_moptions *, int);
175 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
176 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
177 struct ip6_frag **);
178 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
179 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
180 static int ip6_getpmtu (struct route_in6 *, struct route_in6 *,
181 struct ifnet *, struct in6_addr *, u_int32_t *, int *);
182
183 #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
184
185 /* For gdb */
186 __private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
187
188 struct ip6_moptions_dbg {
189 struct ip6_moptions im6o; /* ip6_moptions */
190 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
191 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
192 /*
193 * Alloc and free callers.
194 */
195 ctrace_t im6o_alloc;
196 ctrace_t im6o_free;
197 /*
198 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
199 */
200 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
201 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
202 };
203
204 #if DEBUG
205 static unsigned int im6o_debug = 1; /* debugging (enabled) */
206 #else
207 static unsigned int im6o_debug; /* debugging (disabled) */
208 #endif /* !DEBUG */
209
210 static unsigned int im6o_size; /* size of zone element */
211 static struct zone *im6o_zone; /* zone for ip6_moptions */
212
213 #define IM6O_ZONE_MAX 64 /* maximum elements in zone */
214 #define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
215
216 SYSCTL_DECL(_net_inet6_ip6);
217
218 static int ip6_maxchainsent = 0;
219 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent, CTLFLAG_RW | CTLFLAG_LOCKED,
220 &ip6_maxchainsent, 0, "use dlil_output_list");
221
222 /*
223 * XXX we don't handle mbuf chains yet in nd6_output() so ip6_output_list() only
224 * walks through the packet chain and sends each mbuf separately.
225 */
226 int
227 ip6_output_list(
228 struct mbuf *m0,
229 int packetlist,
230 struct ip6_pktopts *opt,
231 struct route_in6 *ro,
232 int flags,
233 struct ip6_moptions *im6o,
234 struct ifnet **ifpp, /* XXX: just for statistics */
235 struct ip6_out_args *ip6oap)
236 {
237 #pragma unused(packetlist)
238 struct mbuf *m = m0, *nextpkt;
239 int error = 0;
240
241 while (m) {
242 /*
243 * Break the chain before calling ip6_output() and free the
244 * mbufs if there was an error.
245 */
246 nextpkt = m->m_nextpkt;
247 m->m_nextpkt = NULL;
248 error = ip6_output(m, opt, ro, flags, im6o, ifpp, ip6oap);
249 if (error) {
250 if (nextpkt)
251 m_freem_list(nextpkt);
252 return (error);
253 }
254 m = nextpkt;
255 }
256
257 return (error);
258 }
259
260 /*
261 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
262 * header (with pri, len, nxt, hlim, src, dst).
263 * This function may modify ver and hlim only.
264 * The mbuf chain containing the packet will be freed.
265 * The mbuf opt, if present, will not be freed.
266 *
267 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
268 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
269 * which is rt_rmx.rmx_mtu.
270 */
271 int
272 ip6_output(
273 struct mbuf *m0,
274 struct ip6_pktopts *opt,
275 struct route_in6 *ro,
276 int flags,
277 struct ip6_moptions *im6o,
278 struct ifnet **ifpp, /* XXX: just for statistics */
279 struct ip6_out_args *ip6oap)
280 {
281 struct ip6_hdr *ip6, *mhip6;
282 struct ifnet *ifp = NULL, *origifp = NULL;
283 struct mbuf *m = m0;
284 int hlen, tlen, len, off;
285 struct route_in6 ip6route;
286 struct rtentry *rt = NULL;
287 struct sockaddr_in6 *dst, src_sa, dst_sa;
288 int error = 0;
289 struct in6_ifaddr *ia = NULL;
290 u_int32_t mtu;
291 int alwaysfrag = 0, dontfrag = 0;
292 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
293 struct ip6_exthdrs exthdrs;
294 struct in6_addr finaldst, src0, dst0;
295 u_int32_t zone;
296 struct route_in6 *ro_pmtu = NULL;
297 int hdrsplit = 0;
298 int needipsec = 0;
299 ipfilter_t inject_filter_ref;
300 int tso;
301 boolean_t select_srcif;
302 struct ipf_pktopts *ippo = NULL, ipf_pktopts;
303 struct ip6_out_args ip6oa = { IFSCOPE_NONE, { 0 }, 0 };
304 struct flowadv *adv = NULL;
305 u_int32_t ifmtu;
306 #if DUMMYNET
307 struct m_tag *tag;
308 struct route_in6 saved_route;
309 struct route_in6 saved_ro_pmtu;
310 struct ip_fw_args args;
311 struct sockaddr_in6 dst_buf;
312
313 bzero(&args, sizeof(struct ip_fw_args));
314 #endif /* DUMMYNET */
315
316 if ((flags & IPV6_OUTARGS) && ip6oap != NULL) {
317 ip6oa = *ip6oap;
318 adv = &ip6oap->ip6oa_flowadv;
319 adv->code = FADV_SUCCESS;
320 }
321
322 #if IPSEC
323 int needipsectun = 0;
324 struct socket *so = NULL;
325 struct secpolicy *sp = NULL;
326 struct route_in6 *ipsec_saved_route = NULL;
327 struct ipsec_output_state ipsec_state;
328
329 bzero(&ipsec_state, sizeof(ipsec_state));
330
331 /* for AH processing. stupid to have "socket" variable in IP layer... */
332 if (ipsec_bypass == 0)
333 {
334 so = ipsec_getsocket(m);
335 (void)ipsec_setsocket(m, NULL);
336 }
337 #endif /* IPSEC */
338
339 bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
340 ippo = &ipf_pktopts;
341
342 ip6 = mtod(m, struct ip6_hdr *);
343 inject_filter_ref = ipf_get_inject_filter(m);
344
345 /* Grab info from mtags prepended to the chain */
346 #if DUMMYNET
347 if ((tag = m_tag_locate(m0, KERNEL_MODULE_TAG_ID,
348 KERNEL_TAG_TYPE_DUMMYNET, NULL)) != NULL) {
349 struct dn_pkt_tag *dn_tag;
350
351 dn_tag = (struct dn_pkt_tag *)(tag+1);
352 args.fwa_pf_rule = dn_tag->dn_pf_rule;
353
354 bcopy(&dn_tag->dn_dst6, &dst_buf, sizeof(dst_buf));
355 dst = &dst_buf;
356 ifp = dn_tag->dn_ifp;
357 if (ifp)
358 ifnet_reference(ifp);
359 flags = dn_tag->dn_flags;
360 if (dn_tag->dn_flags & IPV6_OUTARGS)
361 ip6oa = dn_tag->dn_ip6oa;
362
363 saved_route = dn_tag->dn_ro6;
364 ro = &saved_route;
365 saved_ro_pmtu = dn_tag->dn_ro6_pmtu;
366 ro_pmtu = &saved_ro_pmtu;
367 origifp = dn_tag->dn_origifp;
368 if (origifp)
369 ifnet_reference(origifp);
370 mtu = dn_tag->dn_mtu;
371 alwaysfrag = dn_tag->dn_alwaysfrag;
372 unfragpartlen = dn_tag->dn_unfragpartlen;
373
374 bcopy(&dn_tag->dn_exthdrs, &exthdrs, sizeof(exthdrs));
375
376 m_tag_delete(m0, tag);
377 }
378 #endif /* DUMMYNET */
379
380 finaldst = ip6->ip6_dst;
381
382 if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
383 if ((select_srcif = (!(flags & (IPV6_FORWARDING |
384 IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL)) &&
385 (ip6oa.ip6oa_flags & IP6OAF_SELECT_SRCIF))))
386 ipf_pktopts.ippo_flags |= IPPOF_SELECT_SRCIF;
387
388 if ((ip6oa.ip6oa_flags & IP6OAF_BOUND_IF) &&
389 ip6oa.ip6oa_boundif != IFSCOPE_NONE) {
390 ipf_pktopts.ippo_flags |= (IPPOF_BOUND_IF |
391 (ip6oa.ip6oa_boundif << IPPOF_SHIFT_IFSCOPE));
392 }
393
394 if (ip6oa.ip6oa_flags & IP6OAF_BOUND_SRCADDR)
395 ipf_pktopts.ippo_flags |= IPPOF_BOUND_SRCADDR;
396 } else {
397 select_srcif = FALSE;
398 ip6oa.ip6oa_boundif = IFSCOPE_NONE;
399 ip6oa.ip6oa_flags &= ~(IP6OAF_SELECT_SRCIF | IP6OAF_BOUND_IF |
400 IP6OAF_BOUND_SRCADDR);
401 }
402
403 if ((flags & IPV6_OUTARGS) && (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR))
404 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
405
406 #if DUMMYNET
407 if (args.fwa_pf_rule) {
408 ip6 = mtod(m, struct ip6_hdr *);
409
410 goto check_with_pf;
411 }
412 #endif /* DUMMYNET */
413
414 #define MAKE_EXTHDR(hp, mp) \
415 do { \
416 if (hp) { \
417 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
418 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
419 ((eh)->ip6e_len + 1) << 3); \
420 if (error) \
421 goto freehdrs; \
422 } \
423 } while (0)
424
425 bzero(&exthdrs, sizeof(exthdrs));
426
427 if (opt) {
428 /* Hop-by-Hop options header */
429 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
430 /* Destination options header(1st part) */
431 if (opt->ip6po_rthdr) {
432 /*
433 * Destination options header(1st part)
434 * This only makes sense with a routing header.
435 * See Section 9.2 of RFC 3542.
436 * Disabling this part just for MIP6 convenience is
437 * a bad idea. We need to think carefully about a
438 * way to make the advanced API coexist with MIP6
439 * options, which might automatically be inserted in
440 * the kernel.
441 */
442 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
443 }
444 /* Routing header */
445 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
446 /* Destination options header(2nd part) */
447 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
448 }
449
450 #if IPSEC
451 if (ipsec_bypass != 0)
452 goto skip_ipsec;
453
454 /* get a security policy for this packet */
455 if (so == NULL)
456 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
457 else
458 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
459
460 if (sp == NULL) {
461 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
462 goto freehdrs;
463 }
464
465 error = 0;
466
467 /* check policy */
468 switch (sp->policy) {
469 case IPSEC_POLICY_DISCARD:
470 case IPSEC_POLICY_GENERATE:
471 /*
472 * This packet is just discarded.
473 */
474 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
475 goto freehdrs;
476
477 case IPSEC_POLICY_BYPASS:
478 case IPSEC_POLICY_NONE:
479 /* no need to do IPsec. */
480 needipsec = 0;
481 break;
482
483 case IPSEC_POLICY_IPSEC:
484 if (sp->req == NULL) {
485 /* acquire a policy */
486 error = key_spdacquire(sp);
487 goto freehdrs;
488 }
489 needipsec = 1;
490 break;
491
492 case IPSEC_POLICY_ENTRUST:
493 default:
494 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
495 }
496 skip_ipsec:
497 #endif /* IPSEC */
498
499 /*
500 * Calculate the total length of the extension header chain.
501 * Keep the length of the unfragmentable part for fragmentation.
502 */
503 optlen = 0;
504 if (exthdrs.ip6e_hbh)
505 optlen += exthdrs.ip6e_hbh->m_len;
506 if (exthdrs.ip6e_dest1)
507 optlen += exthdrs.ip6e_dest1->m_len;
508 if (exthdrs.ip6e_rthdr)
509 optlen += exthdrs.ip6e_rthdr->m_len;
510 unfragpartlen = optlen + sizeof(struct ip6_hdr);
511
512 /* NOTE: we don't add AH/ESP length here. do that later. */
513 if (exthdrs.ip6e_dest2)
514 optlen += exthdrs.ip6e_dest2->m_len;
515
516
517 if (needipsec &&
518 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
519 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
520 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
521 }
522
523 /*
524 * If we need IPsec, or there is at least one extension header,
525 * separate IP6 header from the payload.
526 */
527 if ((needipsec || optlen) && !hdrsplit) {
528 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
529 m = NULL;
530 goto freehdrs;
531 }
532 m = exthdrs.ip6e_ip6;
533 hdrsplit++;
534 }
535
536 /* adjust pointer */
537 ip6 = mtod(m, struct ip6_hdr *);
538
539 /* adjust mbuf packet header length */
540 m->m_pkthdr.len += optlen;
541 plen = m->m_pkthdr.len - sizeof(*ip6);
542
543 /* If this is a jumbo payload, insert a jumbo payload option. */
544 if (plen > IPV6_MAXPACKET) {
545 if (!hdrsplit) {
546 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
547 m = NULL;
548 goto freehdrs;
549 }
550 m = exthdrs.ip6e_ip6;
551 hdrsplit++;
552 }
553 /* adjust pointer */
554 ip6 = mtod(m, struct ip6_hdr *);
555 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
556 goto freehdrs;
557 ip6->ip6_plen = 0;
558 } else
559 ip6->ip6_plen = htons(plen);
560
561 /*
562 * Concatenate headers and fill in next header fields.
563 * Here we have, on "m"
564 * IPv6 payload
565 * and we insert headers accordingly. Finally, we should be getting:
566 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
567 *
568 * during the header composing process, "m" points to IPv6 header.
569 * "mprev" points to an extension header prior to esp.
570 */
571 {
572 u_char *nexthdrp = &ip6->ip6_nxt;
573 struct mbuf *mprev = m;
574
575 /*
576 * we treat dest2 specially. this makes IPsec processing
577 * much easier. the goal here is to make mprev point the
578 * mbuf prior to dest2.
579 *
580 * result: IPv6 dest2 payload
581 * m and mprev will point to IPv6 header.
582 */
583 if (exthdrs.ip6e_dest2) {
584 if (!hdrsplit)
585 panic("assumption failed: hdr not split");
586 exthdrs.ip6e_dest2->m_next = m->m_next;
587 m->m_next = exthdrs.ip6e_dest2;
588 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
589 ip6->ip6_nxt = IPPROTO_DSTOPTS;
590 }
591
592 #define MAKE_CHAIN(m, mp, p, i)\
593 do {\
594 if (m) {\
595 if (!hdrsplit) \
596 panic("assumption failed: hdr not split"); \
597 *mtod((m), u_char *) = *(p);\
598 *(p) = (i);\
599 p = mtod((m), u_char *);\
600 (m)->m_next = (mp)->m_next;\
601 (mp)->m_next = (m);\
602 (mp) = (m);\
603 }\
604 } while (0)
605 /*
606 * result: IPv6 hbh dest1 rthdr dest2 payload
607 * m will point to IPv6 header. mprev will point to the
608 * extension header prior to dest2 (rthdr in the above case).
609 */
610 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
611 nexthdrp, IPPROTO_HOPOPTS);
612 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
613 nexthdrp, IPPROTO_DSTOPTS);
614 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
615 nexthdrp, IPPROTO_ROUTING);
616
617 if (!TAILQ_EMPTY(&ipv6_filters)) {
618 struct ipfilter *filter;
619 int seen = (inject_filter_ref == 0);
620 int fixscope = 0;
621
622 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
623 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
624 IM6O_LOCK(im6o);
625 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
626 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
627 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
628 IM6O_UNLOCK(im6o);
629 }
630
631 /* Hack: embed the scope_id in the destination */
632 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
633 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
634 fixscope = 1;
635 ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id);
636 }
637 {
638 ipf_ref();
639 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
640 /*
641 * No need to proccess packet twice if we've
642 * already seen it
643 */
644 if (seen == 0) {
645 if ((struct ipfilter *)inject_filter_ref == filter)
646 seen = 1;
647 } else if (filter->ipf_filter.ipf_output) {
648 errno_t result;
649
650 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
651 if (result == EJUSTRETURN) {
652 ipf_unref();
653 goto done;
654 }
655 if (result != 0) {
656 ipf_unref();
657 goto bad;
658 }
659 }
660 }
661 ipf_unref();
662 }
663 ip6 = mtod(m, struct ip6_hdr *);
664 /* Hack: cleanup embedded scope_id if we put it there */
665 if (fixscope)
666 ip6->ip6_dst.s6_addr16[1] = 0;
667 }
668
669 #if IPSEC
670 if (!needipsec)
671 goto skip_ipsec2;
672
673 /*
674 * pointers after IPsec headers are not valid any more.
675 * other pointers need a great care too.
676 * (IPsec routines should not mangle mbufs prior to AH/ESP)
677 */
678 exthdrs.ip6e_dest2 = NULL;
679
680 {
681 struct ip6_rthdr *rh = NULL;
682 int segleft_org = 0;
683
684 if (exthdrs.ip6e_rthdr) {
685 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
686 segleft_org = rh->ip6r_segleft;
687 rh->ip6r_segleft = 0;
688 }
689
690 ipsec_state.m = m;
691 error = ipsec6_output_trans(&ipsec_state, nexthdrp, mprev, sp, flags,
692 &needipsectun);
693 m = ipsec_state.m;
694 if (error) {
695 /* mbuf is already reclaimed in ipsec6_output_trans. */
696 m = NULL;
697 switch (error) {
698 case EHOSTUNREACH:
699 case ENETUNREACH:
700 case EMSGSIZE:
701 case ENOBUFS:
702 case ENOMEM:
703 break;
704 default:
705 printf("ip6_output (ipsec): error code %d\n", error);
706 /* fall through */
707 case ENOENT:
708 /* don't show these error codes to the user */
709 error = 0;
710 break;
711 }
712 goto bad;
713 }
714 if (exthdrs.ip6e_rthdr) {
715 /* ah6_output doesn't modify mbuf chain */
716 rh->ip6r_segleft = segleft_org;
717 }
718 }
719 }
720 skip_ipsec2:
721 #endif
722
723 /*
724 * If there is a routing header, replace the destination address field
725 * with the first hop of the routing header.
726 */
727 if (exthdrs.ip6e_rthdr) {
728 struct ip6_rthdr *rh =
729 (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
730 struct ip6_rthdr *));
731 struct ip6_rthdr0 *rh0;
732 struct in6_addr *addr;
733 struct sockaddr_in6 sa;
734
735 switch (rh->ip6r_type) {
736 case IPV6_RTHDR_TYPE_0:
737 rh0 = (struct ip6_rthdr0 *)rh;
738 addr = (struct in6_addr *)(void *)(rh0 + 1);
739
740 /*
741 * construct a sockaddr_in6 form of
742 * the first hop.
743 *
744 * XXX: we may not have enough
745 * information about its scope zone;
746 * there is no standard API to pass
747 * the information from the
748 * application.
749 */
750 bzero(&sa, sizeof(sa));
751 sa.sin6_family = AF_INET6;
752 sa.sin6_len = sizeof(sa);
753 sa.sin6_addr = addr[0];
754 if ((error = sa6_embedscope(&sa,
755 ip6_use_defzone)) != 0) {
756 goto bad;
757 }
758 ip6->ip6_dst = sa.sin6_addr;
759 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
760 * (rh0->ip6r0_segleft - 1));
761 addr[rh0->ip6r0_segleft - 1] = finaldst;
762 /* XXX */
763 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
764 break;
765 default: /* is it possible? */
766 error = EINVAL;
767 goto bad;
768 }
769 }
770
771 /* Source address validation */
772 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
773 (flags & IPV6_UNSPECSRC) == 0) {
774 error = EOPNOTSUPP;
775 ip6stat.ip6s_badscope++;
776 goto bad;
777 }
778 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
779 error = EOPNOTSUPP;
780 ip6stat.ip6s_badscope++;
781 goto bad;
782 }
783
784 ip6stat.ip6s_localout++;
785
786 /*
787 * Route packet.
788 */
789 if (ro == 0) {
790 ro = &ip6route;
791 bzero((caddr_t)ro, sizeof(*ro));
792 }
793 ro_pmtu = ro;
794 if (opt && opt->ip6po_rthdr)
795 ro = &opt->ip6po_route;
796 dst = (struct sockaddr_in6 *)&ro->ro_dst;
797
798 if (ro && ro->ro_rt)
799 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
800 /*
801 * if specified, try to fill in the traffic class field.
802 * do not override if a non-zero value is already set.
803 * we check the diffserv field and the ecn field separately.
804 */
805 if (opt && opt->ip6po_tclass >= 0) {
806 int mask = 0;
807
808 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
809 mask |= 0xfc;
810 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
811 mask |= 0x03;
812 if (mask != 0)
813 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
814 }
815
816 /* fill in or override the hop limit field, if necessary. */
817 if (opt && opt->ip6po_hlim != -1)
818 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
819 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
820 if (im6o != NULL) {
821 IM6O_LOCK(im6o);
822 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
823 IM6O_UNLOCK(im6o);
824 } else {
825 ip6->ip6_hlim = ip6_defmcasthlim;
826 }
827 }
828
829 /*
830 * If there is a cached route, check that it is to the same
831 * destination and is still up. If not, free it and try again.
832 * Test rt_flags without holding rt_lock for performance reasons;
833 * if the route is down it will hopefully be caught by the layer
834 * below (since it uses this route as a hint) or during the
835 * next transmit.
836 */
837 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
838 dst->sin6_family != AF_INET6 ||
839 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) ||
840 ro->ro_rt->generation_id != route_generation)) {
841 rtfree(ro->ro_rt);
842 ro->ro_rt = NULL;
843 }
844 if (ro->ro_rt == NULL) {
845 bzero(dst, sizeof(*dst));
846 dst->sin6_family = AF_INET6;
847 dst->sin6_len = sizeof(struct sockaddr_in6);
848 dst->sin6_addr = ip6->ip6_dst;
849 }
850 #if IPSEC
851 if (needipsec && needipsectun) {
852 #if CONFIG_DTRACE
853 struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
854 #endif
855 /*
856 * All the extension headers will become inaccessible
857 * (since they can be encrypted).
858 * Don't panic, we need no more updates to extension headers
859 * on inner IPv6 packet (since they are now encapsulated).
860 *
861 * IPv6 [ESP|AH] IPv6 [extension headers] payload
862 */
863 bzero(&exthdrs, sizeof(exthdrs));
864 exthdrs.ip6e_ip6 = m;
865
866 ipsec_state.m = m;
867 route_copyout(&ipsec_state.ro, (struct route *)ro, sizeof(ipsec_state.ro));
868 ipsec_state.dst = (struct sockaddr *)dst;
869
870 /* Added a trace here so that we can see packets inside a tunnel */
871 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
872 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
873 struct ip *, NULL, struct ip6_hdr *, ip6);
874
875 error = ipsec6_output_tunnel(&ipsec_state, sp, flags);
876 if (ipsec_state.tunneled == 4) /* tunneled in IPv4 - packet is gone */
877 goto done;
878 m = ipsec_state.m;
879 ipsec_saved_route = ro;
880 ro = (struct route_in6 *)&ipsec_state.ro;
881 dst = (struct sockaddr_in6 *)(void *)ipsec_state.dst;
882 if (error) {
883 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
884 m0 = m = NULL;
885 m = NULL;
886 switch (error) {
887 case EHOSTUNREACH:
888 case ENETUNREACH:
889 case EMSGSIZE:
890 case ENOBUFS:
891 case ENOMEM:
892 break;
893 default:
894 printf("ip6_output (ipsec): error code %d\n", error);
895 /* fall through */
896 case ENOENT:
897 /* don't show these error codes to the user */
898 error = 0;
899 break;
900 }
901 goto bad;
902 }
903 /*
904 * The packet has been encapsulated so the ifscope is no longer valid
905 * since it does not apply to the outer address: ignore the ifscope.
906 */
907 ip6oa.ip6oa_boundif = IFSCOPE_NONE;
908 ip6oa.ip6oa_flags &= ~IP6OAF_BOUND_IF;
909 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
910 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
911 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
912 }
913 exthdrs.ip6e_ip6 = m;
914 }
915 #endif /* IPSEC */
916
917 /* for safety */
918 if (ifp != NULL) {
919 ifnet_release(ifp);
920 ifp = NULL;
921 }
922
923 /* adjust pointer */
924 ip6 = mtod(m, struct ip6_hdr *);
925
926 if (select_srcif) {
927 bzero(&src_sa, sizeof(src_sa));
928 src_sa.sin6_family = AF_INET6;
929 src_sa.sin6_len = sizeof(src_sa);
930 src_sa.sin6_addr = ip6->ip6_src;
931 }
932 bzero(&dst_sa, sizeof(dst_sa));
933 dst_sa.sin6_family = AF_INET6;
934 dst_sa.sin6_len = sizeof(dst_sa);
935 dst_sa.sin6_addr = ip6->ip6_dst;
936
937 /*
938 * in6_selectroute() might return an ifp with its reference held
939 * even in the error case, so make sure to release its reference.
940 */
941 if ((error = in6_selectroute(select_srcif ? &src_sa : NULL,
942 &dst_sa, opt, im6o, ro, &ifp, &rt, 0, &ip6oa)) != 0) {
943 switch (error) {
944 case EHOSTUNREACH:
945 ip6stat.ip6s_noroute++;
946 break;
947 case EADDRNOTAVAIL:
948 default:
949 break; /* XXX statistics? */
950 }
951 if (ifp != NULL)
952 in6_ifstat_inc(ifp, ifs6_out_discard);
953 /* ifp (if non-NULL) will be released at the end */
954 goto bad;
955 }
956 if (rt == NULL) {
957 /*
958 * If in6_selectroute() does not return a route entry,
959 * dst may not have been updated.
960 */
961 *dst = dst_sa; /* XXX */
962 }
963
964 /*
965 * then rt (for unicast) and ifp must be non-NULL valid values.
966 */
967 if ((flags & IPV6_FORWARDING) == 0) {
968 /* XXX: the FORWARDING flag can be set for mrouting. */
969 in6_ifstat_inc(ifp, ifs6_out_request);
970 }
971 if (rt != NULL) {
972 RT_LOCK(rt);
973 ia = (struct in6_ifaddr *)(rt->rt_ifa);
974 if (ia != NULL)
975 IFA_ADDREF(&ia->ia_ifa);
976 rt->rt_use++;
977 RT_UNLOCK(rt);
978 }
979
980 /*
981 * The outgoing interface must be in the zone of source and
982 * destination addresses. We should use ia_ifp to support the
983 * case of sending packets to an address of our own.
984 */
985 if (ia != NULL && ia->ia_ifp) {
986 ifnet_reference(ia->ia_ifp); /* for origifp */
987 if (origifp != NULL)
988 ifnet_release(origifp);
989 origifp = ia->ia_ifp;
990 } else {
991 if (ifp != NULL)
992 ifnet_reference(ifp); /* for origifp */
993 if (origifp != NULL)
994 ifnet_release(origifp);
995 origifp = ifp;
996 }
997 src0 = ip6->ip6_src;
998 if (in6_setscope(&src0, origifp, &zone))
999 goto badscope;
1000 bzero(&src_sa, sizeof(src_sa));
1001 src_sa.sin6_family = AF_INET6;
1002 src_sa.sin6_len = sizeof(src_sa);
1003 src_sa.sin6_addr = ip6->ip6_src;
1004 if (sa6_recoverscope(&src_sa, TRUE) || zone != src_sa.sin6_scope_id)
1005 goto badscope;
1006
1007 dst0 = ip6->ip6_dst;
1008 if (in6_setscope(&dst0, origifp, &zone))
1009 goto badscope;
1010 /* re-initialize to be sure */
1011 bzero(&dst_sa, sizeof(dst_sa));
1012 dst_sa.sin6_family = AF_INET6;
1013 dst_sa.sin6_len = sizeof(dst_sa);
1014 dst_sa.sin6_addr = ip6->ip6_dst;
1015 if (sa6_recoverscope(&dst_sa, TRUE) || zone != dst_sa.sin6_scope_id) {
1016 goto badscope;
1017 }
1018
1019 /* scope check is done. */
1020 goto routefound;
1021
1022 badscope:
1023 ip6stat.ip6s_badscope++;
1024 in6_ifstat_inc(origifp, ifs6_out_discard);
1025 if (error == 0)
1026 error = EHOSTUNREACH; /* XXX */
1027 goto bad;
1028
1029 routefound:
1030 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1031 if (opt && opt->ip6po_nextroute.ro_rt) {
1032 /*
1033 * The nexthop is explicitly specified by the
1034 * application. We assume the next hop is an IPv6
1035 * address.
1036 */
1037 dst = (struct sockaddr_in6 *)(void *)opt->ip6po_nexthop;
1038 }
1039 else if ((rt->rt_flags & RTF_GATEWAY))
1040 dst = (struct sockaddr_in6 *)(void *)rt->rt_gateway;
1041 }
1042
1043 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
1044 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
1045 } else {
1046 struct in6_multi *in6m;
1047
1048 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
1049
1050 in6_ifstat_inc(ifp, ifs6_out_mcast);
1051
1052 /*
1053 * Confirm that the outgoing interface supports multicast.
1054 */
1055 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
1056 ip6stat.ip6s_noroute++;
1057 in6_ifstat_inc(ifp, ifs6_out_discard);
1058 error = ENETUNREACH;
1059 goto bad;
1060 }
1061 in6_multihead_lock_shared();
1062 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
1063 in6_multihead_lock_done();
1064 if (im6o != NULL)
1065 IM6O_LOCK(im6o);
1066 if (in6m != NULL &&
1067 (im6o == NULL || im6o->im6o_multicast_loop)) {
1068 if (im6o != NULL)
1069 IM6O_UNLOCK(im6o);
1070 /*
1071 * If we belong to the destination multicast group
1072 * on the outgoing interface, and the caller did not
1073 * forbid loopback, loop back a copy.
1074 */
1075 ip6_mloopback(ifp, m, dst);
1076 } else {
1077 if (im6o != NULL)
1078 IM6O_UNLOCK(im6o);
1079 /*
1080 * If we are acting as a multicast router, perform
1081 * multicast forwarding as if the packet had just
1082 * arrived on the interface to which we are about
1083 * to send. The multicast forwarding function
1084 * recursively calls this function, using the
1085 * IPV6_FORWARDING flag to prevent infinite recursion.
1086 *
1087 * Multicasts that are looped back by ip6_mloopback(),
1088 * above, will be forwarded by the ip6_input() routine,
1089 * if necessary.
1090 */
1091 #if MROUTING
1092 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
1093 /*
1094 * XXX: ip6_mforward expects that rcvif is NULL
1095 * when it is called from the originating path.
1096 * However, it is not always the case, since
1097 * some versions of MGETHDR() does not
1098 * initialize the field.
1099 */
1100 m->m_pkthdr.rcvif = NULL;
1101 if (ip6_mforward(ip6, ifp, m) != 0) {
1102 m_freem(m);
1103 if (in6m != NULL)
1104 IN6M_REMREF(in6m);
1105 goto done;
1106 }
1107 }
1108 #endif
1109 }
1110 if (in6m != NULL)
1111 IN6M_REMREF(in6m);
1112 /*
1113 * Multicasts with a hoplimit of zero may be looped back,
1114 * above, but must not be transmitted on a network.
1115 * Also, multicasts addressed to the loopback interface
1116 * are not sent -- the above call to ip6_mloopback() will
1117 * loop back a copy if this host actually belongs to the
1118 * destination group on the loopback interface.
1119 */
1120 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1121 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1122 m_freem(m);
1123 goto done;
1124 }
1125 }
1126
1127 /*
1128 * Fill the outgoing inteface to tell the upper layer
1129 * to increment per-interface statistics.
1130 */
1131 if (ifpp != NULL) {
1132 ifnet_reference(ifp); /* for caller */
1133 if (*ifpp != NULL)
1134 ifnet_release(*ifpp);
1135 *ifpp = ifp;
1136 }
1137
1138 /* Determine path MTU. */
1139 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1140 &alwaysfrag)) != 0)
1141 goto bad;
1142
1143 /*
1144 * The caller of this function may specify to use the minimum MTU
1145 * in some cases.
1146 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1147 * setting. The logic is a bit complicated; by default, unicast
1148 * packets will follow path MTU while multicast packets will be sent at
1149 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1150 * including unicast ones will be sent at the minimum MTU. Multicast
1151 * packets will always be sent at the minimum MTU unless
1152 * IP6PO_MINMTU_DISABLE is explicitly specified.
1153 * See RFC 3542 for more details.
1154 */
1155 if (mtu > IPV6_MMTU) {
1156 if ((flags & IPV6_MINMTU))
1157 mtu = IPV6_MMTU;
1158 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
1159 mtu = IPV6_MMTU;
1160 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1161 (opt == NULL ||
1162 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1163 mtu = IPV6_MMTU;
1164 }
1165 }
1166
1167 /*
1168 * clear embedded scope identifiers if necessary.
1169 * in6_clearscope will touch the addresses only when necessary.
1170 */
1171 in6_clearscope(&ip6->ip6_src);
1172 in6_clearscope(&ip6->ip6_dst);
1173
1174 #if IPFW2
1175 /*
1176 * Check with the firewall...
1177 */
1178 if (ip6_fw_enable && ip6_fw_chk_ptr) {
1179 u_short port = 0;
1180 m->m_pkthdr.rcvif = NULL; /* XXX */
1181 /* If ipfw says divert, we have to just drop packet */
1182 if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
1183 m_freem(m);
1184 goto done;
1185 }
1186 if (!m) {
1187 error = EACCES;
1188 goto done;
1189 }
1190 }
1191 #endif
1192
1193 /*
1194 * If the outgoing packet contains a hop-by-hop options header,
1195 * it must be examined and processed even by the source node.
1196 * (RFC 2460, section 4.)
1197 */
1198 if (exthdrs.ip6e_hbh) {
1199 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1200 u_int32_t dummy; /* XXX unused */
1201
1202 #if DIAGNOSTIC
1203 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1204 panic("ip6e_hbh is not continuous");
1205 #endif
1206 /*
1207 * XXX: if we have to send an ICMPv6 error to the sender,
1208 * we need the M_LOOP flag since icmp6_error() expects
1209 * the IPv6 and the hop-by-hop options header are
1210 * continuous unless the flag is set.
1211 */
1212 m->m_flags |= M_LOOP;
1213 m->m_pkthdr.rcvif = ifp;
1214 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1215 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1216 &dummy, &plen) < 0) {
1217 /* m was already freed at this point */
1218 error = EINVAL;/* better error? */
1219 goto done;
1220 }
1221 m->m_flags &= ~M_LOOP; /* XXX */
1222 m->m_pkthdr.rcvif = NULL;
1223 }
1224
1225 #if DUMMYNET
1226 check_with_pf:
1227 #endif
1228 #if PF
1229 if (PF_IS_ENABLED) {
1230 #if DUMMYNET
1231 /*
1232 * TBD: Need to save opt->ip6po_flags for reinjection rdar://10434993
1233 */
1234 args.fwa_m = m;
1235 args.fwa_oif = ifp;
1236 args.fwa_oflags = flags;
1237 if ((flags & IPV6_OUTARGS))
1238 args.fwa_ip6oa = &ip6oa;
1239 args.fwa_ro6 = ro;
1240 args.fwa_dst6 = dst;
1241 args.fwa_ro6_pmtu = ro_pmtu;
1242 args.fwa_origifp = origifp;
1243 args.fwa_mtu = mtu;
1244 args.fwa_alwaysfrag = alwaysfrag;
1245 args.fwa_unfragpartlen = unfragpartlen;
1246 args.fwa_exthdrs = &exthdrs;
1247 /* Invoke outbound packet filter */
1248 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, &args);
1249 #else
1250 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE, NULL);
1251 #endif /* DUMMYNET */
1252
1253 if (error != 0 || m == NULL) {
1254 /*
1255 * Note that if we ever handle packet chain, we will
1256 * have to restore the linkage from the previous
1257 * packet to the next like in ip_outout_list()
1258 */
1259 if (m != NULL) {
1260 panic("%s: unexpected packet %p\n", __func__, m);
1261 /* NOTREACHED */
1262 }
1263 /* Already freed by callee */
1264 goto done;
1265 }
1266 ip6 = mtod(m, struct ip6_hdr *);
1267 }
1268 #endif /* PF */
1269
1270 /*
1271 * Send the packet to the outgoing interface.
1272 * If necessary, do IPv6 fragmentation before sending.
1273 *
1274 * the logic here is rather complex:
1275 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1276 * 1-a: send as is if tlen <= path mtu
1277 * 1-b: fragment if tlen > path mtu
1278 *
1279 * 2: if user asks us not to fragment (dontfrag == 1)
1280 * 2-a: send as is if tlen <= interface mtu
1281 * 2-b: error if tlen > interface mtu
1282 *
1283 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1284 * always fragment
1285 *
1286 * 4: if dontfrag == 1 && alwaysfrag == 1
1287 * error, as we cannot handle this conflicting request
1288 */
1289 tlen = m->m_pkthdr.len;
1290
1291 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
1292 dontfrag = 1;
1293 else
1294 dontfrag = 0;
1295 if (dontfrag && alwaysfrag) { /* case 4 */
1296 /* conflicting request - can't transmit */
1297 error = EMSGSIZE;
1298 goto bad;
1299 }
1300
1301 lck_rw_lock_shared(nd_if_rwlock);
1302 /* Access without acquiring nd_ifinfo lock for performance */
1303 ifmtu = IN6_LINKMTU(ifp);
1304 lck_rw_done(nd_if_rwlock);
1305
1306 if (dontfrag && tlen > ifmtu) { /* case 2-b */
1307 /*
1308 * Even if the DONTFRAG option is specified, we cannot send the
1309 * packet when the data length is larger than the MTU of the
1310 * outgoing interface.
1311 * Notify the error by sending IPV6_PATHMTU ancillary data as
1312 * well as returning an error code (the latter is not described
1313 * in the API spec.)
1314 */
1315 u_int32_t mtu32;
1316 struct ip6ctlparam ip6cp;
1317
1318 mtu32 = (u_int32_t)mtu;
1319 bzero(&ip6cp, sizeof(ip6cp));
1320 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1321 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1322 (void *)&ip6cp);
1323
1324 error = EMSGSIZE;
1325 goto bad;
1326 }
1327
1328 /*
1329 * transmit packet without fragmentation
1330 */
1331 tso = (ifp->if_hwassist & IFNET_TSO_IPV6) &&
1332 (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6);
1333 if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
1334 (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1335 int sw_csum;
1336
1337 ip6 = mtod(m, struct ip6_hdr *);
1338 #ifdef IPSEC
1339 /* clean ipsec history once it goes out of the node */
1340 ipsec_delaux(m);
1341 #endif
1342
1343 if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */
1344 sw_csum = m->m_pkthdr.csum_flags;
1345 else
1346 sw_csum = m->m_pkthdr.csum_flags &
1347 ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1348
1349 if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) {
1350 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1351 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1352 }
1353 if (ro->ro_rt)
1354 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1355 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt, adv);
1356 goto done;
1357 }
1358
1359 /*
1360 * try to fragment the packet. case 1-b and 3
1361 */
1362 if (mtu < IPV6_MMTU) {
1363 /* path MTU cannot be less than IPV6_MMTU */
1364 error = EMSGSIZE;
1365 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1366 goto bad;
1367 } else if (ip6->ip6_plen == 0) {
1368 /* jumbo payload cannot be fragmented */
1369 error = EMSGSIZE;
1370 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1371 goto bad;
1372 } else {
1373 struct mbuf **mnext, *m_frgpart;
1374 struct ip6_frag *ip6f;
1375 u_int32_t id = htonl(ip6_randomid());
1376 u_char nextproto;
1377
1378 /*
1379 * Too large for the destination or interface;
1380 * fragment if possible.
1381 * Must be able to put at least 8 bytes per fragment.
1382 */
1383 hlen = unfragpartlen;
1384 if (mtu > IPV6_MAXPACKET)
1385 mtu = IPV6_MAXPACKET;
1386
1387 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1388 if (len < 8) {
1389 error = EMSGSIZE;
1390 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1391 goto bad;
1392 }
1393
1394 mnext = &m->m_nextpkt;
1395
1396 /*
1397 * Change the next header field of the last header in the
1398 * unfragmentable part.
1399 */
1400 if (exthdrs.ip6e_rthdr) {
1401 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1402 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1403 } else if (exthdrs.ip6e_dest1) {
1404 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1405 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1406 } else if (exthdrs.ip6e_hbh) {
1407 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1408 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1409 } else {
1410 nextproto = ip6->ip6_nxt;
1411 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1412 }
1413
1414 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
1415 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1416 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1417 }
1418
1419 /*
1420 * Loop through length of segment after first fragment,
1421 * make new header and copy data of each part and link onto
1422 * chain.
1423 */
1424 m0 = m;
1425 for (off = hlen; off < tlen; off += len) {
1426 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1427 if (!m) {
1428 error = ENOBUFS;
1429 ip6stat.ip6s_odropped++;
1430 goto sendorfree;
1431 }
1432 m->m_pkthdr.rcvif = NULL;
1433 m->m_flags = m0->m_flags & M_COPYFLAGS;
1434 *mnext = m;
1435 mnext = &m->m_nextpkt;
1436 m->m_data += max_linkhdr;
1437 mhip6 = mtod(m, struct ip6_hdr *);
1438 *mhip6 = *ip6;
1439 m->m_len = sizeof(*mhip6);
1440 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1441 if (error) {
1442 ip6stat.ip6s_odropped++;
1443 goto sendorfree;
1444 }
1445 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1446 if (off + len >= tlen)
1447 len = tlen - off;
1448 else
1449 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1450 mhip6->ip6_plen = htons((u_short)(len + hlen +
1451 sizeof(*ip6f) -
1452 sizeof(struct ip6_hdr)));
1453 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1454 error = ENOBUFS;
1455 ip6stat.ip6s_odropped++;
1456 goto sendorfree;
1457 }
1458 m_cat(m, m_frgpart);
1459 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1460 m->m_pkthdr.rcvif = 0;
1461 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1462
1463 M_COPY_PFTAG(m, m0);
1464 m_set_service_class(m, m0->m_pkthdr.svc);
1465
1466 #ifdef __darwin8_notyet
1467 #if CONFIG_MACF_NET
1468 mac_create_fragment(m0, m);
1469 #endif
1470 #endif
1471 ip6f->ip6f_reserved = 0;
1472 ip6f->ip6f_ident = id;
1473 ip6f->ip6f_nxt = nextproto;
1474 ip6stat.ip6s_ofragments++;
1475 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1476 }
1477
1478 in6_ifstat_inc(ifp, ifs6_out_fragok);
1479 }
1480
1481 /*
1482 * Remove leading garbages.
1483 */
1484 sendorfree:
1485 m = m0->m_nextpkt;
1486 m0->m_nextpkt = 0;
1487 m_freem(m0);
1488 for (m0 = m; m; m = m0) {
1489 m0 = m->m_nextpkt;
1490 m->m_nextpkt = 0;
1491 if (error == 0) {
1492 /* Record statistics for this interface address. */
1493 if (ia) {
1494 #ifndef __APPLE__
1495 ia->ia_ifa.if_opackets++;
1496 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1497 #endif
1498 }
1499 #if IPSEC
1500 /* clean ipsec history once it goes out of the node */
1501 ipsec_delaux(m);
1502 #endif
1503 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt,
1504 adv);
1505
1506 } else
1507 m_freem(m);
1508 }
1509
1510 if (error == 0)
1511 ip6stat.ip6s_fragmented++;
1512
1513 done:
1514 #if IPSEC
1515 if (ipsec_saved_route) {
1516 ro = ipsec_saved_route;
1517 if (ipsec_state.ro.ro_rt) {
1518 rtfree(ipsec_state.ro.ro_rt);
1519 }
1520 }
1521 #endif /* IPSEC */
1522 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */
1523 rtfree(ro->ro_rt);
1524 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1525 rtfree(ro_pmtu->ro_rt);
1526 }
1527
1528 #if IPSEC
1529 if (sp != NULL)
1530 key_freesp(sp, KEY_SADB_UNLOCKED);
1531 #endif /* IPSEC */
1532
1533 if (ia != NULL)
1534 IFA_REMREF(&ia->ia_ifa);
1535 if (ifp != NULL)
1536 ifnet_release(ifp);
1537 if (origifp != NULL)
1538 ifnet_release(origifp);
1539 return (error);
1540
1541 freehdrs:
1542 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1543 m_freem(exthdrs.ip6e_dest1);
1544 m_freem(exthdrs.ip6e_rthdr);
1545 m_freem(exthdrs.ip6e_dest2);
1546 /* fall through */
1547 bad:
1548 m_freem(m);
1549 goto done;
1550 }
1551
1552 static int
1553 ip6_copyexthdr(mp, hdr, hlen)
1554 struct mbuf **mp;
1555 caddr_t hdr;
1556 int hlen;
1557 {
1558 struct mbuf *m;
1559
1560 if (hlen > MCLBYTES)
1561 return(ENOBUFS); /* XXX */
1562
1563 MGET(m, M_DONTWAIT, MT_DATA);
1564 if (!m)
1565 return(ENOBUFS);
1566
1567 if (hlen > MLEN) {
1568 MCLGET(m, M_DONTWAIT);
1569 if ((m->m_flags & M_EXT) == 0) {
1570 m_free(m);
1571 return (ENOBUFS);
1572 }
1573 }
1574 m->m_len = hlen;
1575 if (hdr)
1576 bcopy(hdr, mtod(m, caddr_t), hlen);
1577
1578 *mp = m;
1579 return (0);
1580 }
1581
1582 /*
1583 * Process a delayed payload checksum calculation.
1584 */
1585 void
1586 in6_delayed_cksum(struct mbuf *m, uint16_t offset)
1587 {
1588 uint16_t csum;
1589
1590 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1591 if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) {
1592 csum = 0xffff;
1593 }
1594
1595 offset += (m->m_pkthdr.csum_data & 0xffff);
1596 if ((offset + sizeof(csum)) > m->m_len) {
1597 m_copyback(m, offset, sizeof(csum), &csum);
1598 } else if (IP6_HDR_ALIGNED_P(mtod(m, char *))) {
1599 *(uint16_t *)(void *)(mtod(m, char *) + offset) = csum;
1600 } else {
1601 bcopy(&csum, (mtod(m, char *) + offset), sizeof (csum));
1602 }
1603 }
1604 /*
1605 * Insert jumbo payload option.
1606 */
1607 static int
1608 ip6_insert_jumboopt(exthdrs, plen)
1609 struct ip6_exthdrs *exthdrs;
1610 u_int32_t plen;
1611 {
1612 struct mbuf *mopt;
1613 u_char *optbuf;
1614 u_int32_t v;
1615
1616 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1617
1618 /*
1619 * If there is no hop-by-hop options header, allocate new one.
1620 * If there is one but it doesn't have enough space to store the
1621 * jumbo payload option, allocate a cluster to store the whole options.
1622 * Otherwise, use it to store the options.
1623 */
1624 if (exthdrs->ip6e_hbh == 0) {
1625 MGET(mopt, M_DONTWAIT, MT_DATA);
1626 if (mopt == 0)
1627 return (ENOBUFS);
1628 mopt->m_len = JUMBOOPTLEN;
1629 optbuf = mtod(mopt, u_char *);
1630 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1631 exthdrs->ip6e_hbh = mopt;
1632 } else {
1633 struct ip6_hbh *hbh;
1634
1635 mopt = exthdrs->ip6e_hbh;
1636 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1637 /*
1638 * XXX assumption:
1639 * - exthdrs->ip6e_hbh is not referenced from places
1640 * other than exthdrs.
1641 * - exthdrs->ip6e_hbh is not an mbuf chain.
1642 */
1643 u_int32_t oldoptlen = mopt->m_len;
1644 struct mbuf *n;
1645
1646 /*
1647 * XXX: give up if the whole (new) hbh header does
1648 * not fit even in an mbuf cluster.
1649 */
1650 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1651 return (ENOBUFS);
1652
1653 /*
1654 * As a consequence, we must always prepare a cluster
1655 * at this point.
1656 */
1657 MGET(n, M_DONTWAIT, MT_DATA);
1658 if (n) {
1659 MCLGET(n, M_DONTWAIT);
1660 if ((n->m_flags & M_EXT) == 0) {
1661 m_freem(n);
1662 n = NULL;
1663 }
1664 }
1665 if (!n)
1666 return (ENOBUFS);
1667 n->m_len = oldoptlen + JUMBOOPTLEN;
1668 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1669 oldoptlen);
1670 optbuf = mtod(n, u_char *) + oldoptlen;
1671 m_freem(mopt);
1672 mopt = exthdrs->ip6e_hbh = n;
1673 } else {
1674 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1675 mopt->m_len += JUMBOOPTLEN;
1676 }
1677 optbuf[0] = IP6OPT_PADN;
1678 optbuf[1] = 1;
1679
1680 /*
1681 * Adjust the header length according to the pad and
1682 * the jumbo payload option.
1683 */
1684 hbh = mtod(mopt, struct ip6_hbh *);
1685 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1686 }
1687
1688 /* fill in the option. */
1689 optbuf[2] = IP6OPT_JUMBO;
1690 optbuf[3] = 4;
1691 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1692 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1693
1694 /* finally, adjust the packet header length */
1695 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1696
1697 return (0);
1698 #undef JUMBOOPTLEN
1699 }
1700
1701 /*
1702 * Insert fragment header and copy unfragmentable header portions.
1703 */
1704 static int
1705 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1706 struct mbuf *m0, *m;
1707 int hlen;
1708 struct ip6_frag **frghdrp;
1709 {
1710 struct mbuf *n, *mlast;
1711
1712 if (hlen > sizeof(struct ip6_hdr)) {
1713 n = m_copym(m0, sizeof(struct ip6_hdr),
1714 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1715 if (n == 0)
1716 return (ENOBUFS);
1717 m->m_next = n;
1718 } else
1719 n = m;
1720
1721 /* Search for the last mbuf of unfragmentable part. */
1722 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1723 ;
1724
1725 if ((mlast->m_flags & M_EXT) == 0 &&
1726 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1727 /* use the trailing space of the last mbuf for the fragment hdr */
1728 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1729 mlast->m_len);
1730 mlast->m_len += sizeof(struct ip6_frag);
1731 m->m_pkthdr.len += sizeof(struct ip6_frag);
1732 } else {
1733 /* allocate a new mbuf for the fragment header */
1734 struct mbuf *mfrg;
1735
1736 MGET(mfrg, M_DONTWAIT, MT_DATA);
1737 if (mfrg == 0)
1738 return (ENOBUFS);
1739 mfrg->m_len = sizeof(struct ip6_frag);
1740 *frghdrp = mtod(mfrg, struct ip6_frag *);
1741 mlast->m_next = mfrg;
1742 }
1743
1744 return (0);
1745 }
1746
1747 extern int load_ipfw(void);
1748 static int
1749 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1750 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
1751 int *alwaysfragp)
1752 {
1753 u_int32_t mtu = 0;
1754 int alwaysfrag = 0;
1755 int error = 0;
1756
1757 if (ro_pmtu != ro) {
1758 /* The first hop and the final destination may differ. */
1759 struct sockaddr_in6 *sa6_dst =
1760 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1761 if (ro_pmtu->ro_rt &&
1762 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1763 ro_pmtu->ro_rt->generation_id != route_generation ||
1764 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1765 rtfree(ro_pmtu->ro_rt);
1766 ro_pmtu->ro_rt = (struct rtentry *)NULL;
1767 }
1768 if (ro_pmtu->ro_rt == NULL) {
1769 bzero(sa6_dst, sizeof(*sa6_dst));
1770 sa6_dst->sin6_family = AF_INET6;
1771 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1772 sa6_dst->sin6_addr = *dst;
1773
1774 rtalloc_scoped((struct route *)ro_pmtu,
1775 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
1776 }
1777 }
1778
1779
1780 if (ro_pmtu->ro_rt != NULL) {
1781 u_int32_t ifmtu;
1782
1783 lck_rw_lock_shared(nd_if_rwlock);
1784 /* Access without acquiring nd_ifinfo lock for performance */
1785 ifmtu = IN6_LINKMTU(ifp);
1786 lck_rw_done(nd_if_rwlock);
1787
1788 RT_LOCK_SPIN(ro_pmtu->ro_rt);
1789 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1790 if (mtu > ifmtu || mtu == 0) {
1791 /*
1792 * The MTU on the route is larger than the MTU on
1793 * the interface! This shouldn't happen, unless the
1794 * MTU of the interface has been changed after the
1795 * interface was brought up. Change the MTU in the
1796 * route to match the interface MTU (as long as the
1797 * field isn't locked).
1798 *
1799 * if MTU on the route is 0, we need to fix the MTU.
1800 * this case happens with path MTU discovery timeouts.
1801 */
1802 mtu = ifmtu;
1803 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
1804 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
1805 }
1806 else if (mtu < IPV6_MMTU) {
1807 /*
1808 * RFC2460 section 5, last paragraph:
1809 * if we record ICMPv6 too big message with
1810 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1811 * or smaller, with framgent header attached.
1812 * (fragment header is needed regardless from the
1813 * packet size, for translators to identify packets)
1814 */
1815 alwaysfrag = 1;
1816 mtu = IPV6_MMTU;
1817 }
1818 RT_UNLOCK(ro_pmtu->ro_rt);
1819 } else {
1820 if (ifp) {
1821 lck_rw_lock_shared(nd_if_rwlock);
1822 /* Don't hold nd_ifinfo lock for performance */
1823 mtu = IN6_LINKMTU(ifp);
1824 lck_rw_done(nd_if_rwlock);
1825 } else
1826 error = EHOSTUNREACH; /* XXX */
1827 }
1828
1829 *mtup = mtu;
1830 if (alwaysfragp)
1831 *alwaysfragp = alwaysfrag;
1832 return (error);
1833 }
1834
1835 /*
1836 * IP6 socket option processing.
1837 */
1838 int
1839 ip6_ctloutput(so, sopt)
1840 struct socket *so;
1841 struct sockopt *sopt;
1842 {
1843 int optdatalen, uproto;
1844 void *optdata;
1845 int privileged;
1846 struct inpcb *in6p = sotoinpcb(so);
1847 int error = 0, optval = 0;
1848 int level, op = -1, optname = 0;
1849 int optlen = 0;
1850 struct proc *p;
1851
1852 if (sopt == NULL) {
1853 panic("ip6_ctloutput: arg soopt is NULL");
1854 /* NOTREACHED */
1855 }
1856 level = sopt->sopt_level;
1857 op = sopt->sopt_dir;
1858 optname = sopt->sopt_name;
1859 optlen = sopt->sopt_valsize;
1860 p = sopt->sopt_p;
1861 uproto = (int)so->so_proto->pr_protocol;
1862
1863 privileged = (proc_suser(p) == 0);
1864
1865 if (level == IPPROTO_IPV6) {
1866 switch (op) {
1867
1868 case SOPT_SET:
1869 switch (optname) {
1870 case IPV6_2292PKTOPTIONS:
1871 {
1872 struct mbuf *m;
1873
1874 error = soopt_getm(sopt, &m); /* XXX */
1875 if (error != 0)
1876 break;
1877 error = soopt_mcopyin(sopt, m); /* XXX */
1878 if (error != 0)
1879 break;
1880 error = ip6_pcbopts(&in6p->in6p_outputopts,
1881 m, so, sopt);
1882 m_freem(m); /* XXX */
1883 break;
1884 }
1885
1886 /*
1887 * Use of some Hop-by-Hop options or some
1888 * Destination options, might require special
1889 * privilege. That is, normal applications
1890 * (without special privilege) might be forbidden
1891 * from setting certain options in outgoing packets,
1892 * and might never see certain options in received
1893 * packets. [RFC 2292 Section 6]
1894 * KAME specific note:
1895 * KAME prevents non-privileged users from sending or
1896 * receiving ANY hbh/dst options in order to avoid
1897 * overhead of parsing options in the kernel.
1898 */
1899 case IPV6_RECVHOPOPTS:
1900 case IPV6_RECVDSTOPTS:
1901 case IPV6_RECVRTHDRDSTOPTS:
1902 if (!privileged)
1903 break;
1904 /* FALLTHROUGH */
1905 case IPV6_UNICAST_HOPS:
1906 case IPV6_HOPLIMIT:
1907
1908 case IPV6_RECVPKTINFO:
1909 case IPV6_RECVHOPLIMIT:
1910 case IPV6_RECVRTHDR:
1911 case IPV6_RECVPATHMTU:
1912 case IPV6_RECVTCLASS:
1913 case IPV6_V6ONLY:
1914 case IPV6_AUTOFLOWLABEL:
1915 if (optlen != sizeof(int)) {
1916 error = EINVAL;
1917 break;
1918 }
1919 error = sooptcopyin(sopt, &optval,
1920 sizeof optval, sizeof optval);
1921 if (error)
1922 break;
1923 switch (optname) {
1924
1925 case IPV6_UNICAST_HOPS:
1926 if (optval < -1 || optval >= 256)
1927 error = EINVAL;
1928 else {
1929 /* -1 = kernel default */
1930 in6p->in6p_hops = optval;
1931 if ((in6p->inp_vflag &
1932 INP_IPV4) != 0)
1933 in6p->inp_ip_ttl = optval;
1934 }
1935 break;
1936 #define OPTSET(bit) \
1937 do { \
1938 if (optval) \
1939 in6p->inp_flags |= (bit); \
1940 else \
1941 in6p->inp_flags &= ~(bit); \
1942 } while (/*CONSTCOND*/ 0)
1943 #define OPTSET2292(bit) \
1944 do { \
1945 in6p->inp_flags |= IN6P_RFC2292; \
1946 if (optval) \
1947 in6p->inp_flags |= (bit); \
1948 else \
1949 in6p->inp_flags &= ~(bit); \
1950 } while (/*CONSTCOND*/ 0)
1951 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1952
1953 case IPV6_RECVPKTINFO:
1954 /* cannot mix with RFC2292 */
1955 if (OPTBIT(IN6P_RFC2292)) {
1956 error = EINVAL;
1957 break;
1958 }
1959 OPTSET(IN6P_PKTINFO);
1960 break;
1961
1962 case IPV6_HOPLIMIT:
1963 {
1964 struct ip6_pktopts **optp;
1965
1966 /* cannot mix with RFC2292 */
1967 if (OPTBIT(IN6P_RFC2292)) {
1968 error = EINVAL;
1969 break;
1970 }
1971 optp = &in6p->in6p_outputopts;
1972 error = ip6_pcbopt(IPV6_HOPLIMIT,
1973 (u_char *)&optval, sizeof(optval),
1974 optp, uproto);
1975 break;
1976 }
1977
1978 case IPV6_RECVHOPLIMIT:
1979 /* cannot mix with RFC2292 */
1980 if (OPTBIT(IN6P_RFC2292)) {
1981 error = EINVAL;
1982 break;
1983 }
1984 OPTSET(IN6P_HOPLIMIT);
1985 break;
1986
1987 case IPV6_RECVHOPOPTS:
1988 /* cannot mix with RFC2292 */
1989 if (OPTBIT(IN6P_RFC2292)) {
1990 error = EINVAL;
1991 break;
1992 }
1993 OPTSET(IN6P_HOPOPTS);
1994 break;
1995
1996 case IPV6_RECVDSTOPTS:
1997 /* cannot mix with RFC2292 */
1998 if (OPTBIT(IN6P_RFC2292)) {
1999 error = EINVAL;
2000 break;
2001 }
2002 OPTSET(IN6P_DSTOPTS);
2003 break;
2004
2005 case IPV6_RECVRTHDRDSTOPTS:
2006 /* cannot mix with RFC2292 */
2007 if (OPTBIT(IN6P_RFC2292)) {
2008 error = EINVAL;
2009 break;
2010 }
2011 OPTSET(IN6P_RTHDRDSTOPTS);
2012 break;
2013
2014 case IPV6_RECVRTHDR:
2015 /* cannot mix with RFC2292 */
2016 if (OPTBIT(IN6P_RFC2292)) {
2017 error = EINVAL;
2018 break;
2019 }
2020 OPTSET(IN6P_RTHDR);
2021 break;
2022
2023 case IPV6_RECVPATHMTU:
2024 /*
2025 * We ignore this option for TCP
2026 * sockets.
2027 * (RFC3542 leaves this case
2028 * unspecified.)
2029 */
2030 if (uproto != IPPROTO_TCP)
2031 OPTSET(IN6P_MTU);
2032 break;
2033
2034 case IPV6_V6ONLY:
2035 /*
2036 * make setsockopt(IPV6_V6ONLY)
2037 * available only prior to bind(2).
2038 * see ipng mailing list, Jun 22 2001.
2039 */
2040 if (in6p->inp_lport ||
2041 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
2042 error = EINVAL;
2043 break;
2044 }
2045 OPTSET(IN6P_IPV6_V6ONLY);
2046 if (optval)
2047 in6p->inp_vflag &= ~INP_IPV4;
2048 else
2049 in6p->inp_vflag |= INP_IPV4;
2050 break;
2051 case IPV6_RECVTCLASS:
2052 /* we can mix with RFC2292 */
2053 OPTSET(IN6P_TCLASS);
2054 break;
2055 case IPV6_AUTOFLOWLABEL:
2056 OPTSET(IN6P_AUTOFLOWLABEL);
2057 break;
2058
2059 }
2060 break;
2061
2062 case IPV6_TCLASS:
2063 case IPV6_DONTFRAG:
2064 case IPV6_USE_MIN_MTU:
2065 case IPV6_PREFER_TEMPADDR:
2066 if (optlen != sizeof(optval)) {
2067 error = EINVAL;
2068 break;
2069 }
2070 error = sooptcopyin(sopt, &optval,
2071 sizeof optval, sizeof optval);
2072 if (error)
2073 break;
2074 {
2075 struct ip6_pktopts **optp;
2076 optp = &in6p->in6p_outputopts;
2077 error = ip6_pcbopt(optname,
2078 (u_char *)&optval, sizeof(optval),
2079 optp, uproto);
2080 break;
2081 }
2082
2083 case IPV6_2292PKTINFO:
2084 case IPV6_2292HOPLIMIT:
2085 case IPV6_2292HOPOPTS:
2086 case IPV6_2292DSTOPTS:
2087 case IPV6_2292RTHDR:
2088 /* RFC 2292 */
2089 if (optlen != sizeof(int)) {
2090 error = EINVAL;
2091 break;
2092 }
2093 error = sooptcopyin(sopt, &optval,
2094 sizeof optval, sizeof optval);
2095 if (error)
2096 break;
2097 switch (optname) {
2098 case IPV6_2292PKTINFO:
2099 OPTSET2292(IN6P_PKTINFO);
2100 break;
2101 case IPV6_2292HOPLIMIT:
2102 OPTSET2292(IN6P_HOPLIMIT);
2103 break;
2104 case IPV6_2292HOPOPTS:
2105 /*
2106 * Check super-user privilege.
2107 * See comments for IPV6_RECVHOPOPTS.
2108 */
2109 if (!privileged)
2110 return(EPERM);
2111 OPTSET2292(IN6P_HOPOPTS);
2112 break;
2113 case IPV6_2292DSTOPTS:
2114 if (!privileged)
2115 return(EPERM);
2116 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
2117 break;
2118 case IPV6_2292RTHDR:
2119 OPTSET2292(IN6P_RTHDR);
2120 break;
2121 }
2122 break;
2123 case IPV6_3542PKTINFO:
2124 case IPV6_3542HOPOPTS:
2125 case IPV6_3542RTHDR:
2126 case IPV6_3542DSTOPTS:
2127 case IPV6_RTHDRDSTOPTS:
2128 case IPV6_3542NEXTHOP:
2129 {
2130 struct ip6_pktopts **optp;
2131 /* new advanced API (RFC3542) */
2132 struct mbuf *m;
2133
2134 /* cannot mix with RFC2292 */
2135 if (OPTBIT(IN6P_RFC2292)) {
2136 error = EINVAL;
2137 break;
2138 }
2139 error = soopt_getm(sopt, &m);
2140 if (error != 0)
2141 break;
2142 error = soopt_mcopyin(sopt, m);
2143 if (error) {
2144 m_freem(m);
2145 break;
2146 }
2147 optp = &in6p->in6p_outputopts;
2148 error = ip6_pcbopt(optname, mtod(m, u_char *),
2149 m->m_len, optp, uproto);
2150 m_freem(m);
2151 break;
2152 }
2153 #undef OPTSET
2154
2155 case IPV6_MULTICAST_IF:
2156 case IPV6_MULTICAST_HOPS:
2157 case IPV6_MULTICAST_LOOP:
2158 case IPV6_JOIN_GROUP:
2159 case IPV6_LEAVE_GROUP:
2160 case IPV6_MSFILTER:
2161 case MCAST_BLOCK_SOURCE:
2162 case MCAST_UNBLOCK_SOURCE:
2163 case MCAST_JOIN_GROUP:
2164 case MCAST_LEAVE_GROUP:
2165 case MCAST_JOIN_SOURCE_GROUP:
2166 case MCAST_LEAVE_SOURCE_GROUP:
2167 error = ip6_setmoptions(in6p, sopt);
2168 break;
2169
2170 case IPV6_PORTRANGE:
2171 error = sooptcopyin(sopt, &optval,
2172 sizeof optval, sizeof optval);
2173 if (error)
2174 break;
2175
2176 switch (optval) {
2177 case IPV6_PORTRANGE_DEFAULT:
2178 in6p->inp_flags &= ~(INP_LOWPORT);
2179 in6p->inp_flags &= ~(INP_HIGHPORT);
2180 break;
2181
2182 case IPV6_PORTRANGE_HIGH:
2183 in6p->inp_flags &= ~(INP_LOWPORT);
2184 in6p->inp_flags |= INP_HIGHPORT;
2185 break;
2186
2187 case IPV6_PORTRANGE_LOW:
2188 in6p->inp_flags &= ~(INP_HIGHPORT);
2189 in6p->inp_flags |= INP_LOWPORT;
2190 break;
2191
2192 default:
2193 error = EINVAL;
2194 break;
2195 }
2196 break;
2197
2198 #if IPSEC
2199 case IPV6_IPSEC_POLICY:
2200 {
2201 caddr_t req = NULL;
2202 size_t len = 0;
2203 struct mbuf *m;
2204
2205 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2206 break;
2207 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2208 break;
2209 if (m) {
2210 req = mtod(m, caddr_t);
2211 len = m->m_len;
2212 }
2213 error = ipsec6_set_policy(in6p, optname, req,
2214 len, privileged);
2215 m_freem(m);
2216 }
2217 break;
2218 #endif /* KAME IPSEC */
2219
2220 #if IPFIREWALL
2221 case IPV6_FW_ADD:
2222 case IPV6_FW_DEL:
2223 case IPV6_FW_FLUSH:
2224 case IPV6_FW_ZERO:
2225 {
2226 if (ip6_fw_ctl_ptr == NULL)
2227 load_ip6fw();
2228 if (ip6_fw_ctl_ptr != NULL)
2229 error = (*ip6_fw_ctl_ptr)(sopt);
2230 else
2231 return ENOPROTOOPT;
2232 }
2233 break;
2234 #endif /* IPFIREWALL */
2235
2236 /*
2237 * IPv6 variant of IP_BOUND_IF; for details see
2238 * comments on IP_BOUND_IF in ip_ctloutput().
2239 */
2240 case IPV6_BOUND_IF:
2241 /* This option is settable only on IPv6 */
2242 if (!(in6p->inp_vflag & INP_IPV6)) {
2243 error = EINVAL;
2244 break;
2245 }
2246
2247 error = sooptcopyin(sopt, &optval,
2248 sizeof (optval), sizeof (optval));
2249
2250 if (error)
2251 break;
2252
2253 error = inp_bindif(in6p, optval);
2254 break;
2255
2256 case IPV6_NO_IFT_CELLULAR:
2257 /* This option is settable only for IPv6 */
2258 if (!(in6p->inp_vflag & INP_IPV6)) {
2259 error = EINVAL;
2260 break;
2261 }
2262
2263 error = sooptcopyin(sopt, &optval,
2264 sizeof (optval), sizeof (optval));
2265
2266 if (error)
2267 break;
2268
2269 error = inp_nocellular(in6p, optval);
2270 break;
2271
2272 case IPV6_OUT_IF:
2273 /* This option is not settable */
2274 error = EINVAL;
2275 break;
2276
2277 default:
2278 error = ENOPROTOOPT;
2279 break;
2280 }
2281 break;
2282
2283 case SOPT_GET:
2284 switch (optname) {
2285
2286 case IPV6_2292PKTOPTIONS:
2287 /*
2288 * RFC3542 (effectively) deprecated the
2289 * semantics of the 2292-style pktoptions.
2290 * Since it was not reliable in nature (i.e.,
2291 * applications had to expect the lack of some
2292 * information after all), it would make sense
2293 * to simplify this part by always returning
2294 * empty data.
2295 */
2296 sopt->sopt_valsize = 0;
2297 break;
2298
2299 case IPV6_RECVHOPOPTS:
2300 case IPV6_RECVDSTOPTS:
2301 case IPV6_RECVRTHDRDSTOPTS:
2302 case IPV6_UNICAST_HOPS:
2303 case IPV6_RECVPKTINFO:
2304 case IPV6_RECVHOPLIMIT:
2305 case IPV6_RECVRTHDR:
2306 case IPV6_RECVPATHMTU:
2307
2308 case IPV6_V6ONLY:
2309 case IPV6_PORTRANGE:
2310 case IPV6_RECVTCLASS:
2311 case IPV6_AUTOFLOWLABEL:
2312 switch (optname) {
2313
2314 case IPV6_RECVHOPOPTS:
2315 optval = OPTBIT(IN6P_HOPOPTS);
2316 break;
2317
2318 case IPV6_RECVDSTOPTS:
2319 optval = OPTBIT(IN6P_DSTOPTS);
2320 break;
2321
2322 case IPV6_RECVRTHDRDSTOPTS:
2323 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2324 break;
2325
2326 case IPV6_UNICAST_HOPS:
2327 optval = in6p->in6p_hops;
2328 break;
2329
2330 case IPV6_RECVPKTINFO:
2331 optval = OPTBIT(IN6P_PKTINFO);
2332 break;
2333
2334 case IPV6_RECVHOPLIMIT:
2335 optval = OPTBIT(IN6P_HOPLIMIT);
2336 break;
2337
2338 case IPV6_RECVRTHDR:
2339 optval = OPTBIT(IN6P_RTHDR);
2340 break;
2341
2342 case IPV6_RECVPATHMTU:
2343 optval = OPTBIT(IN6P_MTU);
2344 break;
2345
2346 case IPV6_V6ONLY:
2347 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2348 break;
2349
2350 case IPV6_PORTRANGE:
2351 {
2352 int flags;
2353 flags = in6p->inp_flags;
2354 if (flags & INP_HIGHPORT)
2355 optval = IPV6_PORTRANGE_HIGH;
2356 else if (flags & INP_LOWPORT)
2357 optval = IPV6_PORTRANGE_LOW;
2358 else
2359 optval = 0;
2360 break;
2361 }
2362 case IPV6_RECVTCLASS:
2363 optval = OPTBIT(IN6P_TCLASS);
2364 break;
2365
2366 case IPV6_AUTOFLOWLABEL:
2367 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2368 break;
2369 }
2370 if (error)
2371 break;
2372 error = sooptcopyout(sopt, &optval,
2373 sizeof optval);
2374 break;
2375
2376 case IPV6_PATHMTU:
2377 {
2378 u_int32_t pmtu = 0;
2379 struct ip6_mtuinfo mtuinfo;
2380 struct route_in6 sro;
2381
2382 bzero(&sro, sizeof(sro));
2383
2384 if (!(so->so_state & SS_ISCONNECTED))
2385 return (ENOTCONN);
2386 /*
2387 * XXX: we dot not consider the case of source
2388 * routing, or optional information to specify
2389 * the outgoing interface.
2390 */
2391 error = ip6_getpmtu(&sro, NULL, NULL,
2392 &in6p->in6p_faddr, &pmtu, NULL);
2393 if (sro.ro_rt)
2394 rtfree(sro.ro_rt);
2395 if (error)
2396 break;
2397 if (pmtu > IPV6_MAXPACKET)
2398 pmtu = IPV6_MAXPACKET;
2399
2400 bzero(&mtuinfo, sizeof(mtuinfo));
2401 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2402 optdata = (void *)&mtuinfo;
2403 optdatalen = sizeof(mtuinfo);
2404 error = sooptcopyout(sopt, optdata,
2405 optdatalen);
2406 break;
2407 }
2408
2409 case IPV6_2292PKTINFO:
2410 case IPV6_2292HOPLIMIT:
2411 case IPV6_2292HOPOPTS:
2412 case IPV6_2292RTHDR:
2413 case IPV6_2292DSTOPTS:
2414 switch (optname) {
2415 case IPV6_2292PKTINFO:
2416 optval = OPTBIT(IN6P_PKTINFO);
2417 break;
2418 case IPV6_2292HOPLIMIT:
2419 optval = OPTBIT(IN6P_HOPLIMIT);
2420 break;
2421 case IPV6_2292HOPOPTS:
2422 optval = OPTBIT(IN6P_HOPOPTS);
2423 break;
2424 case IPV6_2292RTHDR:
2425 optval = OPTBIT(IN6P_RTHDR);
2426 break;
2427 case IPV6_2292DSTOPTS:
2428 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2429 break;
2430 }
2431 error = sooptcopyout(sopt, &optval,
2432 sizeof optval);
2433 break;
2434 case IPV6_PKTINFO:
2435 case IPV6_HOPOPTS:
2436 case IPV6_RTHDR:
2437 case IPV6_DSTOPTS:
2438 case IPV6_RTHDRDSTOPTS:
2439 case IPV6_NEXTHOP:
2440 case IPV6_TCLASS:
2441 case IPV6_DONTFRAG:
2442 case IPV6_USE_MIN_MTU:
2443 case IPV6_PREFER_TEMPADDR:
2444 error = ip6_getpcbopt(in6p->in6p_outputopts,
2445 optname, sopt);
2446 break;
2447
2448 case IPV6_MULTICAST_IF:
2449 case IPV6_MULTICAST_HOPS:
2450 case IPV6_MULTICAST_LOOP:
2451 case IPV6_MSFILTER:
2452 error = ip6_getmoptions(in6p, sopt);
2453 break;
2454
2455 #if IPSEC
2456 case IPV6_IPSEC_POLICY:
2457 {
2458 caddr_t req = NULL;
2459 size_t len = 0;
2460 struct mbuf *m = NULL;
2461 struct mbuf **mp = &m;
2462
2463 error = soopt_getm(sopt, &m); /* XXX */
2464 if (error != 0)
2465 break;
2466 error = soopt_mcopyin(sopt, m); /* XXX */
2467 if (error != 0)
2468 break;
2469 if (m) {
2470 req = mtod(m, caddr_t);
2471 len = m->m_len;
2472 }
2473 error = ipsec6_get_policy(in6p, req, len, mp);
2474 if (error == 0)
2475 error = soopt_mcopyout(sopt, m); /*XXX*/
2476 if (error == 0 && m)
2477 m_freem(m);
2478 break;
2479 }
2480 #endif /* KAME IPSEC */
2481
2482 #if IPFIREWALL
2483 case IPV6_FW_GET:
2484 {
2485 if (ip6_fw_ctl_ptr == NULL)
2486 load_ip6fw();
2487 if (ip6_fw_ctl_ptr != NULL)
2488 error = (*ip6_fw_ctl_ptr)(sopt);
2489 else
2490 return ENOPROTOOPT;
2491 }
2492 break;
2493 #endif /* IPFIREWALL */
2494
2495 case IPV6_BOUND_IF:
2496 if (in6p->inp_flags & INP_BOUND_IF)
2497 optval = in6p->inp_boundifp->if_index;
2498 error = sooptcopyout(sopt, &optval,
2499 sizeof (optval));
2500 break;
2501
2502 case IPV6_NO_IFT_CELLULAR:
2503 optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
2504 ? 1 : 0;
2505 error = sooptcopyout(sopt, &optval,
2506 sizeof (optval));
2507 break;
2508
2509 case IPV6_OUT_IF:
2510 optval = (in6p->in6p_last_outifp != NULL) ?
2511 in6p->in6p_last_outifp->if_index : 0;
2512 error = sooptcopyout(sopt, &optval,
2513 sizeof (optval));
2514 break;
2515
2516 default:
2517 error = ENOPROTOOPT;
2518 break;
2519 }
2520 break;
2521 }
2522 } else {
2523 error = EINVAL;
2524 }
2525 return(error);
2526 }
2527
2528 int
2529 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2530 {
2531 int error = 0, optval, optlen;
2532 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2533 struct inpcb *in6p = sotoinpcb(so);
2534 int level, op, optname;
2535
2536 level = sopt->sopt_level;
2537 op = sopt->sopt_dir;
2538 optname = sopt->sopt_name;
2539 optlen = sopt->sopt_valsize;
2540
2541 if (level != IPPROTO_IPV6) {
2542 return (EINVAL);
2543 }
2544
2545 switch (optname) {
2546 case IPV6_CHECKSUM:
2547 /*
2548 * For ICMPv6 sockets, no modification allowed for checksum
2549 * offset, permit "no change" values to help existing apps.
2550 *
2551 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2552 * for an ICMPv6 socket will fail."
2553 * The current behavior does not meet RFC3542.
2554 */
2555 switch (op) {
2556 case SOPT_SET:
2557 if (optlen != sizeof(int)) {
2558 error = EINVAL;
2559 break;
2560 }
2561 error = sooptcopyin(sopt, &optval, sizeof(optval),
2562 sizeof(optval));
2563 if (error)
2564 break;
2565 if ((optval % 2) != 0) {
2566 /* the API assumes even offset values */
2567 error = EINVAL;
2568 } else if (so->so_proto->pr_protocol ==
2569 IPPROTO_ICMPV6) {
2570 if (optval != icmp6off)
2571 error = EINVAL;
2572 } else
2573 in6p->in6p_cksum = optval;
2574 break;
2575
2576 case SOPT_GET:
2577 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2578 optval = icmp6off;
2579 else
2580 optval = in6p->in6p_cksum;
2581
2582 error = sooptcopyout(sopt, &optval, sizeof(optval));
2583 break;
2584
2585 default:
2586 error = EINVAL;
2587 break;
2588 }
2589 break;
2590
2591 default:
2592 error = ENOPROTOOPT;
2593 break;
2594 }
2595
2596 return (error);
2597 }
2598
2599 /*
2600 * Set up IP6 options in pcb for insertion in output packets or
2601 * specifying behavior of outgoing packets.
2602 */
2603 static int
2604 ip6_pcbopts(
2605 struct ip6_pktopts **pktopt,
2606 struct mbuf *m,
2607 __unused struct socket *so,
2608 __unused struct sockopt *sopt)
2609 {
2610 struct ip6_pktopts *opt = *pktopt;
2611 int error = 0;
2612
2613 /* turn off any old options. */
2614 if (opt) {
2615 #if DIAGNOSTIC
2616 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2617 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2618 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2619 printf("ip6_pcbopts: all specified options are cleared.\n");
2620 #endif
2621 ip6_clearpktopts(opt, -1);
2622 } else {
2623 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2624 if (opt == NULL)
2625 return ENOBUFS;
2626 }
2627 *pktopt = NULL;
2628
2629 if (!m || m->m_len == 0) {
2630 /*
2631 * Only turning off any previous options, regardless of
2632 * whether the opt is just created or given.
2633 */
2634 if (opt)
2635 FREE(opt, M_IP6OPT);
2636 return(0);
2637 }
2638
2639 /* set options specified by user. */
2640 if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) {
2641 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2642 FREE(opt, M_IP6OPT);
2643 return(error);
2644 }
2645 *pktopt = opt;
2646 return(0);
2647 }
2648
2649 /*
2650 * initialize ip6_pktopts. beware that there are non-zero default values in
2651 * the struct.
2652 */
2653 void
2654 ip6_initpktopts(struct ip6_pktopts *opt)
2655 {
2656
2657 bzero(opt, sizeof(*opt));
2658 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2659 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2660 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2661 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2662 }
2663
2664 static int
2665 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
2666 int uproto)
2667 {
2668 struct ip6_pktopts *opt;
2669
2670 opt = *pktopt;
2671 if (opt == NULL) {
2672 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2673 if (opt == NULL)
2674 return(ENOBUFS);
2675 ip6_initpktopts(opt);
2676 *pktopt = opt;
2677 }
2678
2679 return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
2680 }
2681
2682 static int
2683 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2684 {
2685 void *optdata = NULL;
2686 int optdatalen = 0;
2687 struct ip6_ext *ip6e;
2688 int error = 0;
2689 struct in6_pktinfo null_pktinfo;
2690 int deftclass = 0, on;
2691 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2692 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2693
2694
2695 switch (optname) {
2696 case IPV6_PKTINFO:
2697 if (pktopt && pktopt->ip6po_pktinfo)
2698 optdata = (void *)pktopt->ip6po_pktinfo;
2699 else {
2700 /* XXX: we don't have to do this every time... */
2701 bzero(&null_pktinfo, sizeof(null_pktinfo));
2702 optdata = (void *)&null_pktinfo;
2703 }
2704 optdatalen = sizeof(struct in6_pktinfo);
2705 break;
2706 case IPV6_TCLASS:
2707 if (pktopt && pktopt->ip6po_tclass >= 0)
2708 optdata = (void *)&pktopt->ip6po_tclass;
2709 else
2710 optdata = (void *)&deftclass;
2711 optdatalen = sizeof(int);
2712 break;
2713 case IPV6_HOPOPTS:
2714 if (pktopt && pktopt->ip6po_hbh) {
2715 optdata = (void *)pktopt->ip6po_hbh;
2716 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2717 optdatalen = (ip6e->ip6e_len + 1) << 3;
2718 }
2719 break;
2720 case IPV6_RTHDR:
2721 if (pktopt && pktopt->ip6po_rthdr) {
2722 optdata = (void *)pktopt->ip6po_rthdr;
2723 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2724 optdatalen = (ip6e->ip6e_len + 1) << 3;
2725 }
2726 break;
2727 case IPV6_RTHDRDSTOPTS:
2728 if (pktopt && pktopt->ip6po_dest1) {
2729 optdata = (void *)pktopt->ip6po_dest1;
2730 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2731 optdatalen = (ip6e->ip6e_len + 1) << 3;
2732 }
2733 break;
2734 case IPV6_DSTOPTS:
2735 if (pktopt && pktopt->ip6po_dest2) {
2736 optdata = (void *)pktopt->ip6po_dest2;
2737 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2738 optdatalen = (ip6e->ip6e_len + 1) << 3;
2739 }
2740 break;
2741 case IPV6_NEXTHOP:
2742 if (pktopt && pktopt->ip6po_nexthop) {
2743 optdata = (void *)pktopt->ip6po_nexthop;
2744 optdatalen = pktopt->ip6po_nexthop->sa_len;
2745 }
2746 break;
2747 case IPV6_USE_MIN_MTU:
2748 if (pktopt)
2749 optdata = (void *)&pktopt->ip6po_minmtu;
2750 else
2751 optdata = (void *)&defminmtu;
2752 optdatalen = sizeof(int);
2753 break;
2754 case IPV6_DONTFRAG:
2755 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2756 on = 1;
2757 else
2758 on = 0;
2759 optdata = (void *)&on;
2760 optdatalen = sizeof(on);
2761 break;
2762 case IPV6_PREFER_TEMPADDR:
2763 if (pktopt)
2764 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2765 else
2766 optdata = (void *)&defpreftemp;
2767 optdatalen = sizeof(int);
2768 break;
2769 default: /* should not happen */
2770 #ifdef DIAGNOSTIC
2771 panic("ip6_getpcbopt: unexpected option\n");
2772 #endif
2773 return (ENOPROTOOPT);
2774 }
2775
2776 error = sooptcopyout(sopt, optdata, optdatalen);
2777
2778 return (error);
2779 }
2780
2781 void
2782 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
2783 {
2784 if (pktopt == NULL)
2785 return;
2786
2787 if (optname == -1 || optname == IPV6_PKTINFO) {
2788 if (pktopt->ip6po_pktinfo)
2789 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
2790 pktopt->ip6po_pktinfo = NULL;
2791 }
2792 if (optname == -1 || optname == IPV6_HOPLIMIT)
2793 pktopt->ip6po_hlim = -1;
2794 if (optname == -1 || optname == IPV6_TCLASS)
2795 pktopt->ip6po_tclass = -1;
2796 if (optname == -1 || optname == IPV6_NEXTHOP) {
2797 if (pktopt->ip6po_nextroute.ro_rt) {
2798 rtfree(pktopt->ip6po_nextroute.ro_rt);
2799 pktopt->ip6po_nextroute.ro_rt = NULL;
2800 }
2801 if (pktopt->ip6po_nexthop)
2802 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
2803 pktopt->ip6po_nexthop = NULL;
2804 }
2805 if (optname == -1 || optname == IPV6_HOPOPTS) {
2806 if (pktopt->ip6po_hbh)
2807 FREE(pktopt->ip6po_hbh, M_IP6OPT);
2808 pktopt->ip6po_hbh = NULL;
2809 }
2810 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2811 if (pktopt->ip6po_dest1)
2812 FREE(pktopt->ip6po_dest1, M_IP6OPT);
2813 pktopt->ip6po_dest1 = NULL;
2814 }
2815 if (optname == -1 || optname == IPV6_RTHDR) {
2816 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2817 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2818 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2819 if (pktopt->ip6po_route.ro_rt) {
2820 rtfree(pktopt->ip6po_route.ro_rt);
2821 pktopt->ip6po_route.ro_rt = NULL;
2822 }
2823 }
2824 if (optname == -1 || optname == IPV6_DSTOPTS) {
2825 if (pktopt->ip6po_dest2)
2826 FREE(pktopt->ip6po_dest2, M_IP6OPT);
2827 pktopt->ip6po_dest2 = NULL;
2828 }
2829 }
2830
2831 #define PKTOPT_EXTHDRCPY(type) \
2832 do {\
2833 if (src->type) {\
2834 int hlen =\
2835 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2836 dst->type = _MALLOC(hlen, M_IP6OPT, canwait);\
2837 if (dst->type == NULL && canwait == M_NOWAIT)\
2838 goto bad;\
2839 bcopy(src->type, dst->type, hlen);\
2840 }\
2841 } while (0)
2842
2843 static int
2844 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2845 {
2846 if (dst == NULL || src == NULL) {
2847 printf("copypktopts: invalid argument\n");
2848 return (EINVAL);
2849 }
2850
2851 dst->ip6po_hlim = src->ip6po_hlim;
2852 dst->ip6po_tclass = src->ip6po_tclass;
2853 dst->ip6po_flags = src->ip6po_flags;
2854 if (src->ip6po_pktinfo) {
2855 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
2856 M_IP6OPT, canwait);
2857 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2858 goto bad;
2859 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2860 }
2861 if (src->ip6po_nexthop) {
2862 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
2863 M_IP6OPT, canwait);
2864 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2865 goto bad;
2866 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2867 src->ip6po_nexthop->sa_len);
2868 }
2869 PKTOPT_EXTHDRCPY(ip6po_hbh);
2870 PKTOPT_EXTHDRCPY(ip6po_dest1);
2871 PKTOPT_EXTHDRCPY(ip6po_dest2);
2872 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2873 return (0);
2874
2875 bad:
2876 ip6_clearpktopts(dst, -1);
2877 return (ENOBUFS);
2878 }
2879 #undef PKTOPT_EXTHDRCPY
2880
2881 struct ip6_pktopts *
2882 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2883 {
2884 int error;
2885 struct ip6_pktopts *dst;
2886
2887 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
2888 if (dst == NULL)
2889 return (NULL);
2890 ip6_initpktopts(dst);
2891
2892 if ((error = copypktopts(dst, src, canwait)) != 0) {
2893 FREE(dst, M_IP6OPT);
2894 return (NULL);
2895 }
2896
2897 return (dst);
2898 }
2899
2900 void
2901 ip6_freepcbopts(struct ip6_pktopts *pktopt)
2902 {
2903 if (pktopt == NULL)
2904 return;
2905
2906 ip6_clearpktopts(pktopt, -1);
2907
2908 FREE(pktopt, M_IP6OPT);
2909 }
2910
2911 void
2912 ip6_moptions_init(void)
2913 {
2914 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
2915
2916 im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
2917 sizeof (struct ip6_moptions_dbg);
2918
2919 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
2920 IM6O_ZONE_NAME);
2921 if (im6o_zone == NULL) {
2922 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
2923 /* NOTREACHED */
2924 }
2925 zone_change(im6o_zone, Z_EXPAND, TRUE);
2926 }
2927
2928 void
2929 im6o_addref(struct ip6_moptions *im6o, int locked)
2930 {
2931 if (!locked)
2932 IM6O_LOCK(im6o);
2933 else
2934 IM6O_LOCK_ASSERT_HELD(im6o);
2935
2936 if (++im6o->im6o_refcnt == 0) {
2937 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
2938 /* NOTREACHED */
2939 } else if (im6o->im6o_trace != NULL) {
2940 (*im6o->im6o_trace)(im6o, TRUE);
2941 }
2942
2943 if (!locked)
2944 IM6O_UNLOCK(im6o);
2945 }
2946
2947 void
2948 im6o_remref(struct ip6_moptions *im6o)
2949 {
2950 int i;
2951
2952 IM6O_LOCK(im6o);
2953 if (im6o->im6o_refcnt == 0) {
2954 panic("%s: im6o %p negative refcnt", __func__, im6o);
2955 /* NOTREACHED */
2956 } else if (im6o->im6o_trace != NULL) {
2957 (*im6o->im6o_trace)(im6o, FALSE);
2958 }
2959
2960 --im6o->im6o_refcnt;
2961 if (im6o->im6o_refcnt > 0) {
2962 IM6O_UNLOCK(im6o);
2963 return;
2964 }
2965
2966 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
2967 struct in6_mfilter *imf;
2968
2969 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
2970 if (imf != NULL)
2971 im6f_leave(imf);
2972
2973 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
2974
2975 if (imf != NULL)
2976 im6f_purge(imf);
2977
2978 IN6M_REMREF(im6o->im6o_membership[i]);
2979 im6o->im6o_membership[i] = NULL;
2980 }
2981 im6o->im6o_num_memberships = 0;
2982 if (im6o->im6o_mfilters != NULL) {
2983 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
2984 im6o->im6o_mfilters = NULL;
2985 }
2986 if (im6o->im6o_membership != NULL) {
2987 FREE(im6o->im6o_membership, M_IP6MOPTS);
2988 im6o->im6o_membership = NULL;
2989 }
2990 IM6O_UNLOCK(im6o);
2991
2992 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
2993
2994 if (!(im6o->im6o_debug & IFD_ALLOC)) {
2995 panic("%s: im6o %p cannot be freed", __func__, im6o);
2996 /* NOTREACHED */
2997 }
2998 zfree(im6o_zone, im6o);
2999 }
3000
3001 static void
3002 im6o_trace(struct ip6_moptions *im6o, int refhold)
3003 {
3004 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
3005 ctrace_t *tr;
3006 u_int32_t idx;
3007 u_int16_t *cnt;
3008
3009 if (!(im6o->im6o_debug & IFD_DEBUG)) {
3010 panic("%s: im6o %p has no debug structure", __func__, im6o);
3011 /* NOTREACHED */
3012 }
3013 if (refhold) {
3014 cnt = &im6o_dbg->im6o_refhold_cnt;
3015 tr = im6o_dbg->im6o_refhold;
3016 } else {
3017 cnt = &im6o_dbg->im6o_refrele_cnt;
3018 tr = im6o_dbg->im6o_refrele;
3019 }
3020
3021 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
3022 ctrace_record(&tr[idx]);
3023 }
3024
3025 struct ip6_moptions *
3026 ip6_allocmoptions(int how)
3027 {
3028 struct ip6_moptions *im6o;
3029
3030 im6o = (how == M_WAITOK) ?
3031 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
3032 if (im6o != NULL) {
3033 bzero(im6o, im6o_size);
3034 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
3035 im6o->im6o_debug |= IFD_ALLOC;
3036 if (im6o_debug != 0) {
3037 im6o->im6o_debug |= IFD_DEBUG;
3038 im6o->im6o_trace = im6o_trace;
3039 }
3040 IM6O_ADDREF(im6o);
3041 }
3042
3043 return (im6o);
3044 }
3045
3046 /*
3047 * Set IPv6 outgoing packet options based on advanced API.
3048 */
3049 int
3050 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
3051 struct ip6_pktopts *stickyopt, int uproto)
3052 {
3053 struct cmsghdr *cm = 0;
3054
3055 if (control == NULL || opt == NULL)
3056 return (EINVAL);
3057
3058 ip6_initpktopts(opt);
3059 if (stickyopt) {
3060 int error;
3061
3062 /*
3063 * If stickyopt is provided, make a local copy of the options
3064 * for this particular packet, then override them by ancillary
3065 * objects.
3066 * XXX: copypktopts() does not copy the cached route to a next
3067 * hop (if any). This is not very good in terms of efficiency,
3068 * but we can allow this since this option should be rarely
3069 * used.
3070 */
3071 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
3072 return (error);
3073 }
3074
3075 /*
3076 * XXX: Currently, we assume all the optional information is stored
3077 * in a single mbuf.
3078 */
3079 if (control->m_next)
3080 return (EINVAL);
3081
3082 if (control->m_len < CMSG_LEN(0))
3083 return (EINVAL);
3084
3085 for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
3086 int error;
3087
3088 if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
3089 return (EINVAL);
3090 if (cm->cmsg_level != IPPROTO_IPV6)
3091 continue;
3092
3093 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
3094 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
3095 if (error)
3096 return (error);
3097 }
3098
3099 return (0);
3100 }
3101 /*
3102 * Set a particular packet option, as a sticky option or an ancillary data
3103 * item. "len" can be 0 only when it's a sticky option.
3104 * We have 4 cases of combination of "sticky" and "cmsg":
3105 * "sticky=0, cmsg=0": impossible
3106 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
3107 * "sticky=1, cmsg=0": RFC3542 socket option
3108 * "sticky=1, cmsg=1": RFC2292 socket option
3109 */
3110 static int
3111 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
3112 int sticky, int cmsg, int uproto)
3113 {
3114 int minmtupolicy, preftemp;
3115 int error;
3116
3117 if (!sticky && !cmsg) {
3118 #ifdef DIAGNOSTIC
3119 printf("ip6_setpktopt: impossible case\n");
3120 #endif
3121 return (EINVAL);
3122 }
3123
3124 /*
3125 * Caller must have ensured that the buffer is at least
3126 * aligned on 32-bit boundary.
3127 */
3128 VERIFY(IS_P2ALIGNED(buf, sizeof (u_int32_t)));
3129
3130 /*
3131 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
3132 * not be specified in the context of RFC3542. Conversely,
3133 * RFC3542 types should not be specified in the context of RFC2292.
3134 */
3135 if (!cmsg) {
3136 switch (optname) {
3137 case IPV6_2292PKTINFO:
3138 case IPV6_2292HOPLIMIT:
3139 case IPV6_2292NEXTHOP:
3140 case IPV6_2292HOPOPTS:
3141 case IPV6_2292DSTOPTS:
3142 case IPV6_2292RTHDR:
3143 case IPV6_2292PKTOPTIONS:
3144 return (ENOPROTOOPT);
3145 }
3146 }
3147 if (sticky && cmsg) {
3148 switch (optname) {
3149 case IPV6_PKTINFO:
3150 case IPV6_HOPLIMIT:
3151 case IPV6_NEXTHOP:
3152 case IPV6_HOPOPTS:
3153 case IPV6_DSTOPTS:
3154 case IPV6_RTHDRDSTOPTS:
3155 case IPV6_RTHDR:
3156 case IPV6_USE_MIN_MTU:
3157 case IPV6_DONTFRAG:
3158 case IPV6_TCLASS:
3159 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3160 return (ENOPROTOOPT);
3161 }
3162 }
3163
3164 switch (optname) {
3165 case IPV6_2292PKTINFO:
3166 case IPV6_PKTINFO:
3167 {
3168 struct ifnet *ifp = NULL;
3169 struct in6_pktinfo *pktinfo;
3170
3171 if (len != sizeof(struct in6_pktinfo))
3172 return (EINVAL);
3173
3174 pktinfo = (struct in6_pktinfo *)(void *)buf;
3175
3176 /*
3177 * An application can clear any sticky IPV6_PKTINFO option by
3178 * doing a "regular" setsockopt with ipi6_addr being
3179 * in6addr_any and ipi6_ifindex being zero.
3180 * [RFC 3542, Section 6]
3181 */
3182 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3183 pktinfo->ipi6_ifindex == 0 &&
3184 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3185 ip6_clearpktopts(opt, optname);
3186 break;
3187 }
3188
3189 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3190 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3191 return (EINVAL);
3192 }
3193
3194 /* validate the interface index if specified. */
3195 ifnet_head_lock_shared();
3196
3197 if (pktinfo->ipi6_ifindex > if_index) {
3198 ifnet_head_done();
3199 return (ENXIO);
3200 }
3201
3202 if (pktinfo->ipi6_ifindex) {
3203 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3204 if (ifp == NULL) {
3205 ifnet_head_done();
3206 return (ENXIO);
3207 }
3208 }
3209
3210 ifnet_head_done();
3211
3212 /*
3213 * We store the address anyway, and let in6_selectsrc()
3214 * validate the specified address. This is because ipi6_addr
3215 * may not have enough information about its scope zone, and
3216 * we may need additional information (such as outgoing
3217 * interface or the scope zone of a destination address) to
3218 * disambiguate the scope.
3219 * XXX: the delay of the validation may confuse the
3220 * application when it is used as a sticky option.
3221 */
3222 if (opt->ip6po_pktinfo == NULL) {
3223 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
3224 M_IP6OPT, M_NOWAIT);
3225 if (opt->ip6po_pktinfo == NULL)
3226 return (ENOBUFS);
3227 }
3228 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3229 break;
3230 }
3231
3232 case IPV6_2292HOPLIMIT:
3233 case IPV6_HOPLIMIT:
3234 {
3235 int *hlimp;
3236
3237 /*
3238 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3239 * to simplify the ordering among hoplimit options.
3240 */
3241 if (optname == IPV6_HOPLIMIT && sticky)
3242 return (ENOPROTOOPT);
3243
3244 if (len != sizeof(int))
3245 return (EINVAL);
3246 hlimp = (int *)(void *)buf;
3247 if (*hlimp < -1 || *hlimp > 255)
3248 return (EINVAL);
3249
3250 opt->ip6po_hlim = *hlimp;
3251 break;
3252 }
3253
3254 case IPV6_TCLASS:
3255 {
3256 int tclass;
3257
3258 if (len != sizeof(int))
3259 return (EINVAL);
3260 tclass = *(int *)(void *)buf;
3261 if (tclass < -1 || tclass > 255)
3262 return (EINVAL);
3263
3264 opt->ip6po_tclass = tclass;
3265 break;
3266 }
3267
3268 case IPV6_2292NEXTHOP:
3269 case IPV6_NEXTHOP:
3270 error = suser(kauth_cred_get(), 0);
3271 if (error)
3272 return (EACCES);
3273
3274 if (len == 0) { /* just remove the option */
3275 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3276 break;
3277 }
3278
3279 /* check if cmsg_len is large enough for sa_len */
3280 if (len < sizeof(struct sockaddr) || len < *buf)
3281 return (EINVAL);
3282
3283 switch (((struct sockaddr *)buf)->sa_family) {
3284 case AF_INET6:
3285 {
3286 struct sockaddr_in6 *sa6 =
3287 (struct sockaddr_in6 *)(void *)buf;
3288
3289 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3290 return (EINVAL);
3291
3292 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3293 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3294 return (EINVAL);
3295 }
3296 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3297 != 0) {
3298 return (error);
3299 }
3300 break;
3301 }
3302 case AF_LINK: /* should eventually be supported */
3303 default:
3304 return (EAFNOSUPPORT);
3305 }
3306
3307 /* turn off the previous option, then set the new option. */
3308 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3309 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3310 if (opt->ip6po_nexthop == NULL)
3311 return (ENOBUFS);
3312 bcopy(buf, opt->ip6po_nexthop, *buf);
3313 break;
3314
3315 case IPV6_2292HOPOPTS:
3316 case IPV6_HOPOPTS:
3317 {
3318 struct ip6_hbh *hbh;
3319 int hbhlen;
3320
3321 /*
3322 * XXX: We don't allow a non-privileged user to set ANY HbH
3323 * options, since per-option restriction has too much
3324 * overhead.
3325 */
3326 error = suser(kauth_cred_get(), 0);
3327 if (error)
3328 return (EACCES);
3329
3330 if (len == 0) {
3331 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3332 break; /* just remove the option */
3333 }
3334
3335 /* message length validation */
3336 if (len < sizeof(struct ip6_hbh))
3337 return (EINVAL);
3338 hbh = (struct ip6_hbh *)(void *)buf;
3339 hbhlen = (hbh->ip6h_len + 1) << 3;
3340 if (len != hbhlen)
3341 return (EINVAL);
3342
3343 /* turn off the previous option, then set the new option. */
3344 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3345 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3346 if (opt->ip6po_hbh == NULL)
3347 return (ENOBUFS);
3348 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3349
3350 break;
3351 }
3352
3353 case IPV6_2292DSTOPTS:
3354 case IPV6_DSTOPTS:
3355 case IPV6_RTHDRDSTOPTS:
3356 {
3357 struct ip6_dest *dest, **newdest = NULL;
3358 int destlen;
3359
3360 error = suser(kauth_cred_get(), 0);
3361 if (error)
3362 return (EACCES);
3363
3364 if (len == 0) {
3365 ip6_clearpktopts(opt, optname);
3366 break; /* just remove the option */
3367 }
3368
3369 /* message length validation */
3370 if (len < sizeof(struct ip6_dest))
3371 return (EINVAL);
3372 dest = (struct ip6_dest *)(void *)buf;
3373 destlen = (dest->ip6d_len + 1) << 3;
3374 if (len != destlen)
3375 return (EINVAL);
3376
3377 /*
3378 * Determine the position that the destination options header
3379 * should be inserted; before or after the routing header.
3380 */
3381 switch (optname) {
3382 case IPV6_2292DSTOPTS:
3383 /*
3384 * The old advacned API is ambiguous on this point.
3385 * Our approach is to determine the position based
3386 * according to the existence of a routing header.
3387 * Note, however, that this depends on the order of the
3388 * extension headers in the ancillary data; the 1st
3389 * part of the destination options header must appear
3390 * before the routing header in the ancillary data,
3391 * too.
3392 * RFC3542 solved the ambiguity by introducing
3393 * separate ancillary data or option types.
3394 */
3395 if (opt->ip6po_rthdr == NULL)
3396 newdest = &opt->ip6po_dest1;
3397 else
3398 newdest = &opt->ip6po_dest2;
3399 break;
3400 case IPV6_RTHDRDSTOPTS:
3401 newdest = &opt->ip6po_dest1;
3402 break;
3403 case IPV6_DSTOPTS:
3404 newdest = &opt->ip6po_dest2;
3405 break;
3406 }
3407
3408 /* turn off the previous option, then set the new option. */
3409 ip6_clearpktopts(opt, optname);
3410 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3411 if (*newdest == NULL)
3412 return (ENOBUFS);
3413 bcopy(dest, *newdest, destlen);
3414
3415 break;
3416 }
3417
3418 case IPV6_2292RTHDR:
3419 case IPV6_RTHDR:
3420 {
3421 struct ip6_rthdr *rth;
3422 int rthlen;
3423
3424 if (len == 0) {
3425 ip6_clearpktopts(opt, IPV6_RTHDR);
3426 break; /* just remove the option */
3427 }
3428
3429 /* message length validation */
3430 if (len < sizeof(struct ip6_rthdr))
3431 return (EINVAL);
3432 rth = (struct ip6_rthdr *)(void *)buf;
3433 rthlen = (rth->ip6r_len + 1) << 3;
3434 if (len != rthlen)
3435 return (EINVAL);
3436
3437 switch (rth->ip6r_type) {
3438 case IPV6_RTHDR_TYPE_0:
3439 if (rth->ip6r_len == 0) /* must contain one addr */
3440 return (EINVAL);
3441 if (rth->ip6r_len % 2) /* length must be even */
3442 return (EINVAL);
3443 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3444 return (EINVAL);
3445 break;
3446 default:
3447 return (EINVAL); /* not supported */
3448 }
3449
3450 /* turn off the previous option */
3451 ip6_clearpktopts(opt, IPV6_RTHDR);
3452 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3453 if (opt->ip6po_rthdr == NULL)
3454 return (ENOBUFS);
3455 bcopy(rth, opt->ip6po_rthdr, rthlen);
3456
3457 break;
3458 }
3459
3460 case IPV6_USE_MIN_MTU:
3461 if (len != sizeof(int))
3462 return (EINVAL);
3463 minmtupolicy = *(int *)(void *)buf;
3464 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3465 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3466 minmtupolicy != IP6PO_MINMTU_ALL) {
3467 return (EINVAL);
3468 }
3469 opt->ip6po_minmtu = minmtupolicy;
3470 break;
3471
3472 case IPV6_DONTFRAG:
3473 if (len != sizeof(int))
3474 return (EINVAL);
3475
3476 if (uproto == IPPROTO_TCP || *(int *)(void *)buf == 0) {
3477 /*
3478 * we ignore this option for TCP sockets.
3479 * (RFC3542 leaves this case unspecified.)
3480 */
3481 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3482 } else
3483 opt->ip6po_flags |= IP6PO_DONTFRAG;
3484 break;
3485
3486 case IPV6_PREFER_TEMPADDR:
3487 if (len != sizeof(int))
3488 return (EINVAL);
3489 preftemp = *(int *)(void *)buf;
3490 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3491 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3492 preftemp != IP6PO_TEMPADDR_PREFER) {
3493 return (EINVAL);
3494 }
3495 opt->ip6po_prefer_tempaddr = preftemp;
3496 break;
3497
3498 default:
3499 return (ENOPROTOOPT);
3500 } /* end of switch */
3501
3502 return (0);
3503 }
3504
3505 /*
3506 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3507 * packet to the input queue of a specified interface. Note that this
3508 * calls the output routine of the loopback "driver", but with an interface
3509 * pointer that might NOT be &loif -- easier than replicating that code here.
3510 */
3511 void
3512 ip6_mloopback(
3513 struct ifnet *ifp,
3514 struct mbuf *m,
3515 struct sockaddr_in6 *dst)
3516 {
3517 struct mbuf *copym;
3518 struct ip6_hdr *ip6;
3519
3520 copym = m_copy(m, 0, M_COPYALL);
3521 if (copym == NULL)
3522 return;
3523
3524 /*
3525 * Make sure to deep-copy IPv6 header portion in case the data
3526 * is in an mbuf cluster, so that we can safely override the IPv6
3527 * header portion later.
3528 */
3529 if ((copym->m_flags & M_EXT) != 0 ||
3530 copym->m_len < sizeof(struct ip6_hdr)) {
3531 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3532 if (copym == NULL)
3533 return;
3534 }
3535
3536 #if DIAGNOSTIC
3537 if (copym->m_len < sizeof(*ip6)) {
3538 m_freem(copym);
3539 return;
3540 }
3541 #endif
3542
3543 ip6 = mtod(copym, struct ip6_hdr *);
3544 /*
3545 * clear embedded scope identifiers if necessary.
3546 * in6_clearscope will touch the addresses only when necessary.
3547 */
3548 in6_clearscope(&ip6->ip6_src);
3549 in6_clearscope(&ip6->ip6_dst);
3550
3551 #ifdef __APPLE__
3552
3553 /* Makes sure the HW checksum flags are cleaned before sending the packet */
3554
3555 if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
3556 in6_delayed_cksum(copym, sizeof(struct ip6_hdr));
3557 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
3558 }
3559 copym->m_pkthdr.rcvif = 0;
3560 copym->m_pkthdr.csum_data = 0;
3561 copym->m_pkthdr.csum_flags = 0;
3562
3563 if (lo_ifp) {
3564 copym->m_pkthdr.rcvif = ifp;
3565 dlil_output(lo_ifp, PF_INET6, copym, 0,
3566 (struct sockaddr *)dst, 0, NULL);
3567 } else
3568 m_free(copym);
3569 #else
3570 (void)if_simloop(ifp, copym, dst->sin6_family, NULL);
3571 #endif
3572 }
3573
3574 /*
3575 * Chop IPv6 header off from the payload.
3576 */
3577 static int
3578 ip6_splithdr(m, exthdrs)
3579 struct mbuf *m;
3580 struct ip6_exthdrs *exthdrs;
3581 {
3582 struct mbuf *mh;
3583 struct ip6_hdr *ip6;
3584
3585 ip6 = mtod(m, struct ip6_hdr *);
3586 if (m->m_len > sizeof(*ip6)) {
3587 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
3588 if (mh == 0) {
3589 m_freem(m);
3590 return ENOBUFS;
3591 }
3592 M_COPY_PKTHDR(mh, m);
3593 MH_ALIGN(mh, sizeof(*ip6));
3594 m->m_flags &= ~M_PKTHDR;
3595 m->m_len -= sizeof(*ip6);
3596 m->m_data += sizeof(*ip6);
3597 mh->m_next = m;
3598 m = mh;
3599 m->m_len = sizeof(*ip6);
3600 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3601 }
3602 exthdrs->ip6e_ip6 = m;
3603 return 0;
3604 }
3605
3606 /*
3607 * Compute IPv6 extension header length.
3608 */
3609 int
3610 ip6_optlen(in6p)
3611 struct in6pcb *in6p;
3612 {
3613 int len;
3614
3615 if (!in6p->in6p_outputopts)
3616 return 0;
3617
3618 len = 0;
3619 #define elen(x) \
3620 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3621
3622 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3623 if (in6p->in6p_outputopts->ip6po_rthdr)
3624 /* dest1 is valid with rthdr only */
3625 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3626 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3627 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3628 return len;
3629 #undef elen
3630 }