]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/ip6_output.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / netinet6 / ip6_output.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/ip6_output.c,v 1.43 2002/10/31 19:45:48 ume Exp $ */
30 /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * Copyright (c) 1982, 1986, 1988, 1990, 1993
63 * The Regents of the University of California. All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 * notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 * notice, this list of conditions and the following disclaimer in the
72 * documentation and/or other materials provided with the distribution.
73 * 3. All advertising materials mentioning features or use of this software
74 * must display the following acknowledgement:
75 * This product includes software developed by the University of
76 * California, Berkeley and its contributors.
77 * 4. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 * @(#)ip_output.c 8.3 (Berkeley) 1/21/94
94 */
95 /*
96 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
97 * support for mandatory and extensible security protections. This notice
98 * is included in support of clause 2.2 (b) of the Apple Public License,
99 * Version 2.0.
100 */
101
102 #include <sys/param.h>
103 #include <sys/malloc.h>
104 #include <sys/mbuf.h>
105 #include <sys/errno.h>
106 #include <sys/protosw.h>
107 #include <sys/socket.h>
108 #include <sys/socketvar.h>
109 #include <sys/systm.h>
110 #include <sys/kernel.h>
111 #include <sys/proc.h>
112 #include <sys/kauth.h>
113 #include <sys/mcache.h>
114 #include <sys/sysctl.h>
115 #include <kern/zalloc.h>
116
117 #include <pexpert/pexpert.h>
118
119 #include <net/if.h>
120 #include <net/route.h>
121 #include <net/dlil.h>
122
123 #include <netinet/in.h>
124 #include <netinet/in_var.h>
125 #include <netinet/ip_var.h>
126 #include <netinet6/in6_var.h>
127 #include <netinet/ip6.h>
128 #include <netinet6/ip6protosw.h>
129 #include <netinet/icmp6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet/in_pcb.h>
132 #include <netinet6/nd6.h>
133 #include <netinet6/scope6_var.h>
134 #include <mach/sdt.h>
135
136 #if IPSEC
137 #include <netinet6/ipsec.h>
138 #if INET6
139 #include <netinet6/ipsec6.h>
140 #endif
141 #include <netkey/key.h>
142 extern int ipsec_bypass;
143 #endif /* IPSEC */
144
145 #if CONFIG_MACF_NET
146 #include <security/mac.h>
147 #endif /* MAC_NET */
148
149 #include <netinet6/ip6_fw.h>
150
151 #include <net/net_osdep.h>
152
153 #include <netinet/kpi_ipfilter_var.h>
154
155 #if PF
156 #include <net/pfvar.h>
157 #endif /* PF */
158
159 #ifndef __APPLE__
160 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
161 #endif
162
163 struct ip6_exthdrs {
164 struct mbuf *ip6e_ip6;
165 struct mbuf *ip6e_hbh;
166 struct mbuf *ip6e_dest1;
167 struct mbuf *ip6e_rthdr;
168 struct mbuf *ip6e_dest2;
169 };
170
171 int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt);
172 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
173 struct socket *, struct sockopt *sopt);
174 static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto);
175 static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt);
176 static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, int sticky, int cmsg, int uproto);
177 static void im6o_trace(struct ip6_moptions *, int);
178 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
179 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
180 struct ip6_frag **);
181 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
182 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
183 static int ip6_getpmtu (struct route_in6 *, struct route_in6 *,
184 struct ifnet *, struct in6_addr *, u_int32_t *, int *);
185
186 #define IM6O_TRACE_HIST_SIZE 32 /* size of trace history */
187
188 /* For gdb */
189 __private_extern__ unsigned int im6o_trace_hist_size = IM6O_TRACE_HIST_SIZE;
190
191 struct ip6_moptions_dbg {
192 struct ip6_moptions im6o; /* ip6_moptions */
193 u_int16_t im6o_refhold_cnt; /* # of IM6O_ADDREF */
194 u_int16_t im6o_refrele_cnt; /* # of IM6O_REMREF */
195 /*
196 * Alloc and free callers.
197 */
198 ctrace_t im6o_alloc;
199 ctrace_t im6o_free;
200 /*
201 * Circular lists of IM6O_ADDREF and IM6O_REMREF callers.
202 */
203 ctrace_t im6o_refhold[IM6O_TRACE_HIST_SIZE];
204 ctrace_t im6o_refrele[IM6O_TRACE_HIST_SIZE];
205 };
206
207 #if DEBUG
208 static unsigned int im6o_debug = 1; /* debugging (enabled) */
209 #else
210 static unsigned int im6o_debug; /* debugging (disabled) */
211 #endif /* !DEBUG */
212
213 static unsigned int im6o_size; /* size of zone element */
214 static struct zone *im6o_zone; /* zone for ip6_moptions */
215
216 #define IM6O_ZONE_MAX 64 /* maximum elements in zone */
217 #define IM6O_ZONE_NAME "ip6_moptions" /* zone name */
218
219
220 /*
221 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
222 * header (with pri, len, nxt, hlim, src, dst).
223 * This function may modify ver and hlim only.
224 * The mbuf chain containing the packet will be freed.
225 * The mbuf opt, if present, will not be freed.
226 *
227 * type of "mtu": rt_rmx.rmx_mtu is u_int32_t, ifnet.ifr_mtu is int, and
228 * nd_ifinfo.linkmtu is u_int32_t. so we use u_int32_t to hold largest one,
229 * which is rt_rmx.rmx_mtu.
230 */
231 int
232 ip6_output(
233 struct mbuf *m0,
234 struct ip6_pktopts *opt,
235 struct route_in6 *ro,
236 int flags,
237 struct ip6_moptions *im6o,
238 struct ifnet **ifpp, /* XXX: just for statistics */
239 struct ip6_out_args *ip6oa)
240 {
241 struct ip6_hdr *ip6, *mhip6;
242 struct ifnet *ifp = NULL, *origifp = NULL;
243 struct mbuf *m = m0;
244 int hlen, tlen, len, off;
245 struct route_in6 ip6route;
246 struct rtentry *rt = NULL;
247 struct sockaddr_in6 *dst, src_sa, dst_sa;
248 int error = 0;
249 struct in6_ifaddr *ia = NULL;
250 u_int32_t mtu;
251 int alwaysfrag = 0, dontfrag = 0;
252 u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
253 struct ip6_exthdrs exthdrs;
254 struct in6_addr finaldst, src0, dst0;
255 u_int32_t zone;
256 struct route_in6 *ro_pmtu = NULL;
257 int hdrsplit = 0;
258 int needipsec = 0;
259 ipfilter_t inject_filter_ref;
260 int tso;
261 unsigned int ifscope;
262 unsigned int nocell;
263 boolean_t select_srcif;
264 struct ipf_pktopts *ippo = NULL, ipf_pktopts;
265 u_int32_t ifmtu;
266
267 #if IPSEC
268 int needipsectun = 0;
269 struct socket *so = NULL;
270 struct secpolicy *sp = NULL;
271
272 /* for AH processing. stupid to have "socket" variable in IP layer... */
273 if (ipsec_bypass == 0)
274 {
275 so = ipsec_getsocket(m);
276 (void)ipsec_setsocket(m, NULL);
277 }
278 #endif /* IPSEC */
279
280 bzero(&ipf_pktopts, sizeof(struct ipf_pktopts));
281 ippo = &ipf_pktopts;
282
283 ip6 = mtod(m, struct ip6_hdr *);
284 inject_filter_ref = ipf_get_inject_filter(m);
285
286 finaldst = ip6->ip6_dst;
287
288 if (ip6_doscopedroute && (flags & IPV6_OUTARGS)) {
289 select_srcif = !(flags & (IPV6_FORWARDING | IPV6_UNSPECSRC | IPV6_FLAG_NOSRCIFSEL));
290 ifscope = ip6oa->ip6oa_boundif;
291 ipf_pktopts.ippo_flags = IPPOF_BOUND_IF;
292 ipf_pktopts.ippo_flags |= (ifscope << IPPOF_SHIFT_IFSCOPE);
293 } else {
294 select_srcif = FALSE;
295 ifscope = IFSCOPE_NONE;
296 }
297
298 if (flags & IPV6_OUTARGS) {
299 nocell = ip6oa->ip6oa_nocell;
300 if (nocell)
301 ipf_pktopts.ippo_flags |= IPPOF_NO_IFT_CELLULAR;
302 } else {
303 nocell = 0;
304 }
305
306 #define MAKE_EXTHDR(hp, mp) \
307 do { \
308 if (hp) { \
309 struct ip6_ext *eh = (struct ip6_ext *)(hp); \
310 error = ip6_copyexthdr((mp), (caddr_t)(hp), \
311 ((eh)->ip6e_len + 1) << 3); \
312 if (error) \
313 goto freehdrs; \
314 } \
315 } while (0)
316
317 bzero(&exthdrs, sizeof(exthdrs));
318
319 if (opt) {
320 /* Hop-by-Hop options header */
321 MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
322 /* Destination options header(1st part) */
323 if (opt->ip6po_rthdr) {
324 /*
325 * Destination options header(1st part)
326 * This only makes sense with a routing header.
327 * See Section 9.2 of RFC 3542.
328 * Disabling this part just for MIP6 convenience is
329 * a bad idea. We need to think carefully about a
330 * way to make the advanced API coexist with MIP6
331 * options, which might automatically be inserted in
332 * the kernel.
333 */
334 MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
335 }
336 /* Routing header */
337 MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
338 /* Destination options header(2nd part) */
339 MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
340 }
341
342 #if IPSEC
343 if (ipsec_bypass != 0)
344 goto skip_ipsec;
345
346 /* get a security policy for this packet */
347 if (so == NULL)
348 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
349 else
350 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
351
352 if (sp == NULL) {
353 IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
354 goto freehdrs;
355 }
356
357 error = 0;
358
359 /* check policy */
360 switch (sp->policy) {
361 case IPSEC_POLICY_DISCARD:
362 case IPSEC_POLICY_GENERATE:
363 /*
364 * This packet is just discarded.
365 */
366 IPSEC_STAT_INCREMENT(ipsec6stat.out_polvio);
367 goto freehdrs;
368
369 case IPSEC_POLICY_BYPASS:
370 case IPSEC_POLICY_NONE:
371 /* no need to do IPsec. */
372 needipsec = 0;
373 break;
374
375 case IPSEC_POLICY_IPSEC:
376 if (sp->req == NULL) {
377 /* acquire a policy */
378 error = key_spdacquire(sp);
379 goto freehdrs;
380 }
381 needipsec = 1;
382 break;
383
384 case IPSEC_POLICY_ENTRUST:
385 default:
386 printf("ip6_output: Invalid policy found. %d\n", sp->policy);
387 }
388 skip_ipsec:
389 #endif /* IPSEC */
390
391 /*
392 * Calculate the total length of the extension header chain.
393 * Keep the length of the unfragmentable part for fragmentation.
394 */
395 optlen = 0;
396 if (exthdrs.ip6e_hbh)
397 optlen += exthdrs.ip6e_hbh->m_len;
398 if (exthdrs.ip6e_dest1)
399 optlen += exthdrs.ip6e_dest1->m_len;
400 if (exthdrs.ip6e_rthdr)
401 optlen += exthdrs.ip6e_rthdr->m_len;
402 unfragpartlen = optlen + sizeof(struct ip6_hdr);
403
404 /* NOTE: we don't add AH/ESP length here. do that later. */
405 if (exthdrs.ip6e_dest2)
406 optlen += exthdrs.ip6e_dest2->m_len;
407
408
409 if (needipsec &&
410 (m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
411 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
412 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
413 }
414
415 /*
416 * If we need IPsec, or there is at least one extension header,
417 * separate IP6 header from the payload.
418 */
419 if ((needipsec || optlen) && !hdrsplit) {
420 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
421 m = NULL;
422 goto freehdrs;
423 }
424 m = exthdrs.ip6e_ip6;
425 hdrsplit++;
426 }
427
428 /* adjust pointer */
429 ip6 = mtod(m, struct ip6_hdr *);
430
431 /* adjust mbuf packet header length */
432 m->m_pkthdr.len += optlen;
433 plen = m->m_pkthdr.len - sizeof(*ip6);
434
435 /* If this is a jumbo payload, insert a jumbo payload option. */
436 if (plen > IPV6_MAXPACKET) {
437 if (!hdrsplit) {
438 if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
439 m = NULL;
440 goto freehdrs;
441 }
442 m = exthdrs.ip6e_ip6;
443 hdrsplit++;
444 }
445 /* adjust pointer */
446 ip6 = mtod(m, struct ip6_hdr *);
447 if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
448 goto freehdrs;
449 ip6->ip6_plen = 0;
450 } else
451 ip6->ip6_plen = htons(plen);
452
453 /*
454 * Concatenate headers and fill in next header fields.
455 * Here we have, on "m"
456 * IPv6 payload
457 * and we insert headers accordingly. Finally, we should be getting:
458 * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
459 *
460 * during the header composing process, "m" points to IPv6 header.
461 * "mprev" points to an extension header prior to esp.
462 */
463 {
464 u_char *nexthdrp = &ip6->ip6_nxt;
465 struct mbuf *mprev = m;
466
467 /*
468 * we treat dest2 specially. this makes IPsec processing
469 * much easier. the goal here is to make mprev point the
470 * mbuf prior to dest2.
471 *
472 * result: IPv6 dest2 payload
473 * m and mprev will point to IPv6 header.
474 */
475 if (exthdrs.ip6e_dest2) {
476 if (!hdrsplit)
477 panic("assumption failed: hdr not split");
478 exthdrs.ip6e_dest2->m_next = m->m_next;
479 m->m_next = exthdrs.ip6e_dest2;
480 *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
481 ip6->ip6_nxt = IPPROTO_DSTOPTS;
482 }
483
484 #define MAKE_CHAIN(m, mp, p, i)\
485 do {\
486 if (m) {\
487 if (!hdrsplit) \
488 panic("assumption failed: hdr not split"); \
489 *mtod((m), u_char *) = *(p);\
490 *(p) = (i);\
491 p = mtod((m), u_char *);\
492 (m)->m_next = (mp)->m_next;\
493 (mp)->m_next = (m);\
494 (mp) = (m);\
495 }\
496 } while (0)
497 /*
498 * result: IPv6 hbh dest1 rthdr dest2 payload
499 * m will point to IPv6 header. mprev will point to the
500 * extension header prior to dest2 (rthdr in the above case).
501 */
502 MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
503 nexthdrp, IPPROTO_HOPOPTS);
504 MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
505 nexthdrp, IPPROTO_DSTOPTS);
506 MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
507 nexthdrp, IPPROTO_ROUTING);
508
509 if (!TAILQ_EMPTY(&ipv6_filters)) {
510 struct ipfilter *filter;
511 int seen = (inject_filter_ref == 0);
512 int fixscope = 0;
513
514 if (im6o != NULL && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
515 ippo->ippo_flags |= IPPOF_MCAST_OPTS;
516 IM6O_LOCK(im6o);
517 ippo->ippo_mcast_ifnet = im6o->im6o_multicast_ifp;
518 ippo->ippo_mcast_ttl = im6o->im6o_multicast_hlim;
519 ippo->ippo_mcast_loop = im6o->im6o_multicast_loop;
520 IM6O_UNLOCK(im6o);
521 }
522
523 /* Hack: embed the scope_id in the destination */
524 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
525 (ip6->ip6_dst.s6_addr16[1] == 0) && (ro != NULL)) {
526 fixscope = 1;
527 ip6->ip6_dst.s6_addr16[1] = htons(ro->ro_dst.sin6_scope_id);
528 }
529 {
530 ipf_ref();
531 TAILQ_FOREACH(filter, &ipv6_filters, ipf_link) {
532 /*
533 * No need to proccess packet twice if we've
534 * already seen it
535 */
536 if (seen == 0) {
537 if ((struct ipfilter *)inject_filter_ref == filter)
538 seen = 1;
539 } else if (filter->ipf_filter.ipf_output) {
540 errno_t result;
541
542 result = filter->ipf_filter.ipf_output(filter->ipf_filter.cookie, (mbuf_t*)&m, ippo);
543 if (result == EJUSTRETURN) {
544 ipf_unref();
545 goto done;
546 }
547 if (result != 0) {
548 ipf_unref();
549 goto bad;
550 }
551 }
552 }
553 ipf_unref();
554 }
555 ip6 = mtod(m, struct ip6_hdr *);
556 /* Hack: cleanup embedded scope_id if we put it there */
557 if (fixscope)
558 ip6->ip6_dst.s6_addr16[1] = 0;
559 }
560
561 #if IPSEC
562 if (!needipsec)
563 goto skip_ipsec2;
564
565 /*
566 * pointers after IPsec headers are not valid any more.
567 * other pointers need a great care too.
568 * (IPsec routines should not mangle mbufs prior to AH/ESP)
569 */
570 exthdrs.ip6e_dest2 = NULL;
571
572 {
573 struct ip6_rthdr *rh = NULL;
574 int segleft_org = 0;
575 struct ipsec_output_state state;
576
577 if (exthdrs.ip6e_rthdr) {
578 rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
579 segleft_org = rh->ip6r_segleft;
580 rh->ip6r_segleft = 0;
581 }
582
583 bzero(&state, sizeof(state));
584 state.m = m;
585 error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
586 &needipsectun);
587 m = state.m;
588 if (error) {
589 /* mbuf is already reclaimed in ipsec6_output_trans. */
590 m = NULL;
591 switch (error) {
592 case EHOSTUNREACH:
593 case ENETUNREACH:
594 case EMSGSIZE:
595 case ENOBUFS:
596 case ENOMEM:
597 break;
598 default:
599 printf("ip6_output (ipsec): error code %d\n", error);
600 /* fall through */
601 case ENOENT:
602 /* don't show these error codes to the user */
603 error = 0;
604 break;
605 }
606 goto bad;
607 }
608 if (exthdrs.ip6e_rthdr) {
609 /* ah6_output doesn't modify mbuf chain */
610 rh->ip6r_segleft = segleft_org;
611 }
612 }
613 }
614 skip_ipsec2:
615 #endif
616
617 /*
618 * If there is a routing header, replace the destination address field
619 * with the first hop of the routing header.
620 */
621 if (exthdrs.ip6e_rthdr) {
622 struct ip6_rthdr *rh =
623 (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
624 struct ip6_rthdr *));
625 struct ip6_rthdr0 *rh0;
626 struct in6_addr *addr;
627 struct sockaddr_in6 sa;
628
629 switch (rh->ip6r_type) {
630 case IPV6_RTHDR_TYPE_0:
631 rh0 = (struct ip6_rthdr0 *)rh;
632 addr = (struct in6_addr *)(rh0 + 1);
633
634 /*
635 * construct a sockaddr_in6 form of
636 * the first hop.
637 *
638 * XXX: we may not have enough
639 * information about its scope zone;
640 * there is no standard API to pass
641 * the information from the
642 * application.
643 */
644 bzero(&sa, sizeof(sa));
645 sa.sin6_family = AF_INET6;
646 sa.sin6_len = sizeof(sa);
647 sa.sin6_addr = addr[0];
648 if ((error = sa6_embedscope(&sa,
649 ip6_use_defzone)) != 0) {
650 goto bad;
651 }
652 ip6->ip6_dst = sa.sin6_addr;
653 bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
654 * (rh0->ip6r0_segleft - 1));
655 addr[rh0->ip6r0_segleft - 1] = finaldst;
656 /* XXX */
657 in6_clearscope(addr + rh0->ip6r0_segleft - 1);
658 break;
659 default: /* is it possible? */
660 error = EINVAL;
661 goto bad;
662 }
663 }
664
665 /* Source address validation */
666 if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
667 (flags & IPV6_UNSPECSRC) == 0) {
668 error = EOPNOTSUPP;
669 ip6stat.ip6s_badscope++;
670 goto bad;
671 }
672 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
673 error = EOPNOTSUPP;
674 ip6stat.ip6s_badscope++;
675 goto bad;
676 }
677
678 ip6stat.ip6s_localout++;
679
680 /*
681 * Route packet.
682 */
683 if (ro == 0) {
684 ro = &ip6route;
685 bzero((caddr_t)ro, sizeof(*ro));
686 }
687 ro_pmtu = ro;
688 if (opt && opt->ip6po_rthdr)
689 ro = &opt->ip6po_route;
690 dst = (struct sockaddr_in6 *)&ro->ro_dst;
691
692 if (ro && ro->ro_rt)
693 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
694 /*
695 * if specified, try to fill in the traffic class field.
696 * do not override if a non-zero value is already set.
697 * we check the diffserv field and the ecn field separately.
698 */
699 if (opt && opt->ip6po_tclass >= 0) {
700 int mask = 0;
701
702 if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
703 mask |= 0xfc;
704 if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
705 mask |= 0x03;
706 if (mask != 0)
707 ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
708 }
709
710 /* fill in or override the hop limit field, if necessary. */
711 if (opt && opt->ip6po_hlim != -1)
712 ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
713 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
714 if (im6o != NULL) {
715 IM6O_LOCK(im6o);
716 ip6->ip6_hlim = im6o->im6o_multicast_hlim;
717 IM6O_UNLOCK(im6o);
718 } else {
719 ip6->ip6_hlim = ip6_defmcasthlim;
720 }
721 }
722
723 /*
724 * If there is a cached route, check that it is to the same
725 * destination and is still up. If not, free it and try again.
726 * Test rt_flags without holding rt_lock for performance reasons;
727 * if the route is down it will hopefully be caught by the layer
728 * below (since it uses this route as a hint) or during the
729 * next transmit.
730 */
731 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
732 dst->sin6_family != AF_INET6 ||
733 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst) ||
734 ro->ro_rt->generation_id != route_generation)) {
735 rtfree(ro->ro_rt);
736 ro->ro_rt = NULL;
737 }
738 if (ro->ro_rt == NULL) {
739 bzero(dst, sizeof(*dst));
740 dst->sin6_family = AF_INET6;
741 dst->sin6_len = sizeof(struct sockaddr_in6);
742 dst->sin6_addr = ip6->ip6_dst;
743 }
744 #if IPSEC
745 if (needipsec && needipsectun) {
746 struct ipsec_output_state state;
747 int tunneledv4 = 0;
748 #if CONFIG_DTRACE
749 struct ifnet *trace_ifp = (ifpp != NULL) ? (*ifpp) : NULL;
750 #endif /* CONFIG_DTRACE */
751 /*
752 * All the extension headers will become inaccessible
753 * (since they can be encrypted).
754 * Don't panic, we need no more updates to extension headers
755 * on inner IPv6 packet (since they are now encapsulated).
756 *
757 * IPv6 [ESP|AH] IPv6 [extension headers] payload
758 */
759 bzero(&exthdrs, sizeof(exthdrs));
760 exthdrs.ip6e_ip6 = m;
761
762 bzero(&state, sizeof(state));
763 state.m = m;
764 state.ro = (struct route *)ro;
765 state.dst = (struct sockaddr *)dst;
766
767 /* Added a trace here so that we can see packets inside a tunnel */
768 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL,
769 struct ip6_hdr *, ip6, struct ifnet *, trace_ifp,
770 struct ip *, NULL, struct ip6_hdr *, ip6);
771
772 error = ipsec6_output_tunnel(&state, sp, flags, &tunneledv4);
773 if (tunneledv4) /* tunneled in IPv4 - packet is gone */
774 goto done;
775 m = state.m;
776 ro = (struct route_in6 *)state.ro;
777 dst = (struct sockaddr_in6 *)state.dst;
778 if (error) {
779 /* mbuf is already reclaimed in ipsec6_output_tunnel. */
780 m0 = m = NULL;
781 m = NULL;
782 switch (error) {
783 case EHOSTUNREACH:
784 case ENETUNREACH:
785 case EMSGSIZE:
786 case ENOBUFS:
787 case ENOMEM:
788 break;
789 default:
790 printf("ip6_output (ipsec): error code %d\n", error);
791 /* fall through */
792 case ENOENT:
793 /* don't show these error codes to the user */
794 error = 0;
795 break;
796 }
797 goto bad;
798 }
799 /*
800 * The packet has been encapsulated so the ifscope is no longer valid
801 * since it does not apply to the outer address: ignore the ifscope.
802 */
803 ifscope = IFSCOPE_NONE;
804 if (opt != NULL && opt->ip6po_pktinfo != NULL) {
805 if (opt->ip6po_pktinfo->ipi6_ifindex != IFSCOPE_NONE)
806 opt->ip6po_pktinfo->ipi6_ifindex = IFSCOPE_NONE;
807 }
808 exthdrs.ip6e_ip6 = m;
809 }
810 #endif /* IPSEC */
811
812 /* for safety */
813 if (ifp != NULL) {
814 ifnet_release(ifp);
815 ifp = NULL;
816 }
817
818 /* adjust pointer */
819 ip6 = mtod(m, struct ip6_hdr *);
820
821 if (select_srcif) {
822 bzero(&src_sa, sizeof(src_sa));
823 src_sa.sin6_family = AF_INET6;
824 src_sa.sin6_len = sizeof(src_sa);
825 src_sa.sin6_addr = ip6->ip6_src;
826 }
827 bzero(&dst_sa, sizeof(dst_sa));
828 dst_sa.sin6_family = AF_INET6;
829 dst_sa.sin6_len = sizeof(dst_sa);
830 dst_sa.sin6_addr = ip6->ip6_dst;
831
832 if ((error = in6_selectroute(select_srcif ? &src_sa : NULL,
833 &dst_sa, opt, im6o, ro, &ifp, &rt, 0, ifscope, nocell)) != 0) {
834 switch (error) {
835 case EHOSTUNREACH:
836 ip6stat.ip6s_noroute++;
837 break;
838 case EADDRNOTAVAIL:
839 default:
840 break; /* XXX statistics? */
841 }
842 if (ifp != NULL)
843 in6_ifstat_inc(ifp, ifs6_out_discard);
844 goto bad;
845 }
846 if (rt == NULL) {
847 /*
848 * If in6_selectroute() does not return a route entry,
849 * dst may not have been updated.
850 */
851 *dst = dst_sa; /* XXX */
852 }
853
854 /*
855 * then rt (for unicast) and ifp must be non-NULL valid values.
856 */
857 if ((flags & IPV6_FORWARDING) == 0) {
858 /* XXX: the FORWARDING flag can be set for mrouting. */
859 in6_ifstat_inc(ifp, ifs6_out_request);
860 }
861 if (rt != NULL) {
862 RT_LOCK(rt);
863 ia = (struct in6_ifaddr *)(rt->rt_ifa);
864 if (ia != NULL)
865 IFA_ADDREF(&ia->ia_ifa);
866 rt->rt_use++;
867 RT_UNLOCK(rt);
868 }
869
870 /*
871 * The outgoing interface must be in the zone of source and
872 * destination addresses. We should use ia_ifp to support the
873 * case of sending packets to an address of our own.
874 */
875 if (ia != NULL && ia->ia_ifp) {
876 ifnet_reference(ia->ia_ifp);
877 if (origifp != NULL)
878 ifnet_release(origifp);
879 origifp = ia->ia_ifp;
880 } else {
881 if (ifp != NULL)
882 ifnet_reference(ifp);
883 if (origifp != NULL)
884 ifnet_release(origifp);
885 origifp = ifp;
886 }
887 src0 = ip6->ip6_src;
888 if (in6_setscope(&src0, origifp, &zone))
889 goto badscope;
890 bzero(&src_sa, sizeof(src_sa));
891 src_sa.sin6_family = AF_INET6;
892 src_sa.sin6_len = sizeof(src_sa);
893 src_sa.sin6_addr = ip6->ip6_src;
894 if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
895 goto badscope;
896
897 dst0 = ip6->ip6_dst;
898 if (in6_setscope(&dst0, origifp, &zone))
899 goto badscope;
900 /* re-initialize to be sure */
901 bzero(&dst_sa, sizeof(dst_sa));
902 dst_sa.sin6_family = AF_INET6;
903 dst_sa.sin6_len = sizeof(dst_sa);
904 dst_sa.sin6_addr = ip6->ip6_dst;
905 if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
906 goto badscope;
907 }
908
909 /* scope check is done. */
910 goto routefound;
911
912 badscope:
913 ip6stat.ip6s_badscope++;
914 in6_ifstat_inc(origifp, ifs6_out_discard);
915 if (error == 0)
916 error = EHOSTUNREACH; /* XXX */
917 goto bad;
918
919 routefound:
920 if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
921 if (opt && opt->ip6po_nextroute.ro_rt) {
922 /*
923 * The nexthop is explicitly specified by the
924 * application. We assume the next hop is an IPv6
925 * address.
926 */
927 dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
928 }
929 else if ((rt->rt_flags & RTF_GATEWAY))
930 dst = (struct sockaddr_in6 *)rt->rt_gateway;
931 }
932
933 if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
934 m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
935 } else {
936 struct in6_multi *in6m;
937
938 m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
939
940 in6_ifstat_inc(ifp, ifs6_out_mcast);
941
942 /*
943 * Confirm that the outgoing interface supports multicast.
944 */
945 if ((ifp->if_flags & IFF_MULTICAST) == 0) {
946 ip6stat.ip6s_noroute++;
947 in6_ifstat_inc(ifp, ifs6_out_discard);
948 error = ENETUNREACH;
949 goto bad;
950 }
951 in6_multihead_lock_shared();
952 IN6_LOOKUP_MULTI(&ip6->ip6_dst, ifp, in6m);
953 in6_multihead_lock_done();
954 if (im6o != NULL)
955 IM6O_LOCK(im6o);
956 if (in6m != NULL &&
957 (im6o == NULL || im6o->im6o_multicast_loop)) {
958 if (im6o != NULL)
959 IM6O_UNLOCK(im6o);
960 /*
961 * If we belong to the destination multicast group
962 * on the outgoing interface, and the caller did not
963 * forbid loopback, loop back a copy.
964 */
965 ip6_mloopback(ifp, m, dst);
966 } else {
967 if (im6o != NULL)
968 IM6O_UNLOCK(im6o);
969 /*
970 * If we are acting as a multicast router, perform
971 * multicast forwarding as if the packet had just
972 * arrived on the interface to which we are about
973 * to send. The multicast forwarding function
974 * recursively calls this function, using the
975 * IPV6_FORWARDING flag to prevent infinite recursion.
976 *
977 * Multicasts that are looped back by ip6_mloopback(),
978 * above, will be forwarded by the ip6_input() routine,
979 * if necessary.
980 */
981 #if MROUTING
982 if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
983 /*
984 * XXX: ip6_mforward expects that rcvif is NULL
985 * when it is called from the originating path.
986 * However, it is not always the case, since
987 * some versions of MGETHDR() does not
988 * initialize the field.
989 */
990 m->m_pkthdr.rcvif = NULL;
991 if (ip6_mforward(ip6, ifp, m) != 0) {
992 m_freem(m);
993 if (in6m != NULL)
994 IN6M_REMREF(in6m);
995 goto done;
996 }
997 }
998 #endif
999 }
1000 if (in6m != NULL)
1001 IN6M_REMREF(in6m);
1002 /*
1003 * Multicasts with a hoplimit of zero may be looped back,
1004 * above, but must not be transmitted on a network.
1005 * Also, multicasts addressed to the loopback interface
1006 * are not sent -- the above call to ip6_mloopback() will
1007 * loop back a copy if this host actually belongs to the
1008 * destination group on the loopback interface.
1009 */
1010 if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
1011 IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
1012 m_freem(m);
1013 goto done;
1014 }
1015 }
1016
1017 /*
1018 * Fill the outgoing inteface to tell the upper layer
1019 * to increment per-interface statistics.
1020 */
1021 if (ifpp != NULL) {
1022 ifnet_reference(ifp); /* for caller */
1023 if (*ifpp != NULL)
1024 ifnet_release(*ifpp);
1025 *ifpp = ifp;
1026 }
1027
1028 /* Determine path MTU. */
1029 if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
1030 &alwaysfrag)) != 0)
1031 goto bad;
1032
1033 /*
1034 * The caller of this function may specify to use the minimum MTU
1035 * in some cases.
1036 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
1037 * setting. The logic is a bit complicated; by default, unicast
1038 * packets will follow path MTU while multicast packets will be sent at
1039 * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
1040 * including unicast ones will be sent at the minimum MTU. Multicast
1041 * packets will always be sent at the minimum MTU unless
1042 * IP6PO_MINMTU_DISABLE is explicitly specified.
1043 * See RFC 3542 for more details.
1044 */
1045 if (mtu > IPV6_MMTU) {
1046 if ((flags & IPV6_MINMTU))
1047 mtu = IPV6_MMTU;
1048 else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
1049 mtu = IPV6_MMTU;
1050 else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
1051 (opt == NULL ||
1052 opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
1053 mtu = IPV6_MMTU;
1054 }
1055 }
1056
1057 /*
1058 * clear embedded scope identifiers if necessary.
1059 * in6_clearscope will touch the addresses only when necessary.
1060 */
1061 in6_clearscope(&ip6->ip6_src);
1062 in6_clearscope(&ip6->ip6_dst);
1063
1064 #if IPFW2
1065 /*
1066 * Check with the firewall...
1067 */
1068 if (ip6_fw_enable && ip6_fw_chk_ptr) {
1069 u_short port = 0;
1070 m->m_pkthdr.rcvif = NULL; /* XXX */
1071 /* If ipfw says divert, we have to just drop packet */
1072 if (ip6_fw_chk_ptr(&ip6, ifp, &port, &m)) {
1073 m_freem(m);
1074 goto done;
1075 }
1076 if (!m) {
1077 error = EACCES;
1078 goto done;
1079 }
1080 }
1081 #endif
1082
1083 /*
1084 * If the outgoing packet contains a hop-by-hop options header,
1085 * it must be examined and processed even by the source node.
1086 * (RFC 2460, section 4.)
1087 */
1088 if (exthdrs.ip6e_hbh) {
1089 struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
1090 u_int32_t dummy; /* XXX unused */
1091
1092 #if DIAGNOSTIC
1093 if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
1094 panic("ip6e_hbh is not continuous");
1095 #endif
1096 /*
1097 * XXX: if we have to send an ICMPv6 error to the sender,
1098 * we need the M_LOOP flag since icmp6_error() expects
1099 * the IPv6 and the hop-by-hop options header are
1100 * continuous unless the flag is set.
1101 */
1102 m->m_flags |= M_LOOP;
1103 m->m_pkthdr.rcvif = ifp;
1104 if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
1105 ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
1106 &dummy, &plen) < 0) {
1107 /* m was already freed at this point */
1108 error = EINVAL;/* better error? */
1109 goto done;
1110 }
1111 m->m_flags &= ~M_LOOP; /* XXX */
1112 m->m_pkthdr.rcvif = NULL;
1113 }
1114
1115 #if PF
1116 if (PF_IS_ENABLED) {
1117 /* Invoke outbound packet filter */
1118 error = pf_af_hook(ifp, NULL, &m, AF_INET6, FALSE);
1119
1120 if (error) {
1121 if (m != NULL) {
1122 panic("%s: unexpected packet %p\n", __func__, m);
1123 /* NOTREACHED */
1124 }
1125 /* Already freed by callee */
1126 goto done;
1127 }
1128 ip6 = mtod(m, struct ip6_hdr *);
1129 }
1130 #endif /* PF */
1131
1132 /*
1133 * Send the packet to the outgoing interface.
1134 * If necessary, do IPv6 fragmentation before sending.
1135 *
1136 * the logic here is rather complex:
1137 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
1138 * 1-a: send as is if tlen <= path mtu
1139 * 1-b: fragment if tlen > path mtu
1140 *
1141 * 2: if user asks us not to fragment (dontfrag == 1)
1142 * 2-a: send as is if tlen <= interface mtu
1143 * 2-b: error if tlen > interface mtu
1144 *
1145 * 3: if we always need to attach fragment header (alwaysfrag == 1)
1146 * always fragment
1147 *
1148 * 4: if dontfrag == 1 && alwaysfrag == 1
1149 * error, as we cannot handle this conflicting request
1150 */
1151 tlen = m->m_pkthdr.len;
1152
1153 if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
1154 dontfrag = 1;
1155 else
1156 dontfrag = 0;
1157 if (dontfrag && alwaysfrag) { /* case 4 */
1158 /* conflicting request - can't transmit */
1159 error = EMSGSIZE;
1160 goto bad;
1161 }
1162
1163 lck_rw_lock_shared(nd_if_rwlock);
1164 ifmtu = IN6_LINKMTU(ifp);
1165 lck_rw_done(nd_if_rwlock);
1166
1167 if (dontfrag && tlen > ifmtu) { /* case 2-b */
1168 /*
1169 * Even if the DONTFRAG option is specified, we cannot send the
1170 * packet when the data length is larger than the MTU of the
1171 * outgoing interface.
1172 * Notify the error by sending IPV6_PATHMTU ancillary data as
1173 * well as returning an error code (the latter is not described
1174 * in the API spec.)
1175 */
1176 u_int32_t mtu32;
1177 struct ip6ctlparam ip6cp;
1178
1179 mtu32 = (u_int32_t)mtu;
1180 bzero(&ip6cp, sizeof(ip6cp));
1181 ip6cp.ip6c_cmdarg = (void *)&mtu32;
1182 pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
1183 (void *)&ip6cp);
1184
1185 error = EMSGSIZE;
1186 goto bad;
1187 }
1188
1189 /*
1190 * transmit packet without fragmentation
1191 */
1192 tso = (ifp->if_hwassist & IFNET_TSO_IPV6) &&
1193 (m->m_pkthdr.csum_flags & CSUM_TSO_IPV6);
1194 if (dontfrag || (!alwaysfrag && /* case 1-a and 2-a */
1195 (tlen <= mtu || tso || (ifp->if_hwassist & CSUM_FRAGMENT_IPV6)))) {
1196 int sw_csum;
1197
1198 ip6 = mtod(m, struct ip6_hdr *);
1199 #ifdef IPSEC
1200 /* clean ipsec history once it goes out of the node */
1201 ipsec_delaux(m);
1202 #endif
1203
1204 if (apple_hwcksum_tx == 0) /* Do not let HW handle cksum */
1205 sw_csum = m->m_pkthdr.csum_flags;
1206 else
1207 sw_csum = m->m_pkthdr.csum_flags &
1208 ~IF_HWASSIST_CSUM_FLAGS(ifp->if_hwassist);
1209
1210 if ((sw_csum & CSUM_DELAY_IPV6_DATA) != 0) {
1211 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1212 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1213 }
1214 if (ro->ro_rt)
1215 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
1216 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1217 goto done;
1218 }
1219
1220 /*
1221 * try to fragment the packet. case 1-b and 3
1222 */
1223 if (mtu < IPV6_MMTU) {
1224 /* path MTU cannot be less than IPV6_MMTU */
1225 error = EMSGSIZE;
1226 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1227 goto bad;
1228 } else if (ip6->ip6_plen == 0) {
1229 /* jumbo payload cannot be fragmented */
1230 error = EMSGSIZE;
1231 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1232 goto bad;
1233 } else {
1234 struct mbuf **mnext, *m_frgpart;
1235 struct ip6_frag *ip6f;
1236 u_int32_t id = htonl(ip6_randomid());
1237 u_char nextproto;
1238
1239 /*
1240 * Too large for the destination or interface;
1241 * fragment if possible.
1242 * Must be able to put at least 8 bytes per fragment.
1243 */
1244 hlen = unfragpartlen;
1245 if (mtu > IPV6_MAXPACKET)
1246 mtu = IPV6_MAXPACKET;
1247
1248 len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
1249 if (len < 8) {
1250 error = EMSGSIZE;
1251 in6_ifstat_inc(ifp, ifs6_out_fragfail);
1252 goto bad;
1253 }
1254
1255 mnext = &m->m_nextpkt;
1256
1257 /*
1258 * Change the next header field of the last header in the
1259 * unfragmentable part.
1260 */
1261 if (exthdrs.ip6e_rthdr) {
1262 nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
1263 *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
1264 } else if (exthdrs.ip6e_dest1) {
1265 nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
1266 *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
1267 } else if (exthdrs.ip6e_hbh) {
1268 nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
1269 *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
1270 } else {
1271 nextproto = ip6->ip6_nxt;
1272 ip6->ip6_nxt = IPPROTO_FRAGMENT;
1273 }
1274
1275 if ((m->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
1276 in6_delayed_cksum(m, sizeof(struct ip6_hdr) + optlen);
1277 m->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
1278 }
1279
1280 /*
1281 * Loop through length of segment after first fragment,
1282 * make new header and copy data of each part and link onto
1283 * chain.
1284 */
1285 m0 = m;
1286 for (off = hlen; off < tlen; off += len) {
1287 MGETHDR(m, M_DONTWAIT, MT_HEADER); /* MAC-OK */
1288 if (!m) {
1289 error = ENOBUFS;
1290 ip6stat.ip6s_odropped++;
1291 goto sendorfree;
1292 }
1293 m->m_pkthdr.rcvif = NULL;
1294 m->m_flags = m0->m_flags & M_COPYFLAGS;
1295 *mnext = m;
1296 mnext = &m->m_nextpkt;
1297 m->m_data += max_linkhdr;
1298 mhip6 = mtod(m, struct ip6_hdr *);
1299 *mhip6 = *ip6;
1300 m->m_len = sizeof(*mhip6);
1301 error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
1302 if (error) {
1303 ip6stat.ip6s_odropped++;
1304 goto sendorfree;
1305 }
1306 ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
1307 if (off + len >= tlen)
1308 len = tlen - off;
1309 else
1310 ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
1311 mhip6->ip6_plen = htons((u_short)(len + hlen +
1312 sizeof(*ip6f) -
1313 sizeof(struct ip6_hdr)));
1314 if ((m_frgpart = m_copy(m0, off, len)) == 0) {
1315 error = ENOBUFS;
1316 ip6stat.ip6s_odropped++;
1317 goto sendorfree;
1318 }
1319 m_cat(m, m_frgpart);
1320 m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
1321 m->m_pkthdr.rcvif = 0;
1322 m->m_pkthdr.socket_id = m0->m_pkthdr.socket_id;
1323 #ifdef __darwin8_notyet
1324 #if CONFIG_MACF_NET
1325 mac_create_fragment(m0, m);
1326 #endif
1327 #endif
1328 ip6f->ip6f_reserved = 0;
1329 ip6f->ip6f_ident = id;
1330 ip6f->ip6f_nxt = nextproto;
1331 ip6stat.ip6s_ofragments++;
1332 in6_ifstat_inc(ifp, ifs6_out_fragcreat);
1333 }
1334
1335 in6_ifstat_inc(ifp, ifs6_out_fragok);
1336 }
1337
1338 /*
1339 * Remove leading garbages.
1340 */
1341 sendorfree:
1342 m = m0->m_nextpkt;
1343 m0->m_nextpkt = 0;
1344 m_freem(m0);
1345 for (m0 = m; m; m = m0) {
1346 m0 = m->m_nextpkt;
1347 m->m_nextpkt = 0;
1348 if (error == 0) {
1349 /* Record statistics for this interface address. */
1350 if (ia) {
1351 #ifndef __APPLE__
1352 ia->ia_ifa.if_opackets++;
1353 ia->ia_ifa.if_obytes += m->m_pkthdr.len;
1354 #endif
1355 }
1356 #if IPSEC
1357 /* clean ipsec history once it goes out of the node */
1358 ipsec_delaux(m);
1359 #endif
1360 error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
1361
1362 } else
1363 m_freem(m);
1364 }
1365
1366 if (error == 0)
1367 ip6stat.ip6s_fragmented++;
1368
1369 done:
1370 if (ro == &ip6route && ro->ro_rt) { /* brace necessary for rtfree */
1371 rtfree(ro->ro_rt);
1372 } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
1373 rtfree(ro_pmtu->ro_rt);
1374 }
1375
1376 #if IPSEC
1377 if (sp != NULL)
1378 key_freesp(sp, KEY_SADB_UNLOCKED);
1379 #endif /* IPSEC */
1380
1381 if (ia != NULL)
1382 IFA_REMREF(&ia->ia_ifa);
1383 if (ifp != NULL)
1384 ifnet_release(ifp);
1385 if (origifp != NULL)
1386 ifnet_release(origifp);
1387 return (error);
1388
1389 freehdrs:
1390 m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
1391 m_freem(exthdrs.ip6e_dest1);
1392 m_freem(exthdrs.ip6e_rthdr);
1393 m_freem(exthdrs.ip6e_dest2);
1394 /* fall through */
1395 bad:
1396 m_freem(m);
1397 goto done;
1398 }
1399
1400 static int
1401 ip6_copyexthdr(mp, hdr, hlen)
1402 struct mbuf **mp;
1403 caddr_t hdr;
1404 int hlen;
1405 {
1406 struct mbuf *m;
1407
1408 if (hlen > MCLBYTES)
1409 return(ENOBUFS); /* XXX */
1410
1411 MGET(m, M_DONTWAIT, MT_DATA);
1412 if (!m)
1413 return(ENOBUFS);
1414
1415 if (hlen > MLEN) {
1416 MCLGET(m, M_DONTWAIT);
1417 if ((m->m_flags & M_EXT) == 0) {
1418 m_free(m);
1419 return (ENOBUFS);
1420 }
1421 }
1422 m->m_len = hlen;
1423 if (hdr)
1424 bcopy(hdr, mtod(m, caddr_t), hlen);
1425
1426 *mp = m;
1427 return (0);
1428 }
1429
1430 /*
1431 * Process a delayed payload checksum calculation.
1432 */
1433 void
1434 in6_delayed_cksum(struct mbuf *m, uint16_t offset)
1435 {
1436 uint16_t csum;
1437
1438 csum = in6_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1439 if (csum == 0 && (m->m_pkthdr.csum_flags & CSUM_UDPIPV6) != 0) {
1440 csum = 0xffff;
1441 }
1442
1443 offset += (m->m_pkthdr.csum_data & 0xffff);
1444 if ((offset + sizeof(csum)) > m->m_len) {
1445 m_copyback(m, offset, sizeof(csum), &csum);
1446 } else {
1447 *(uint16_t *)(mtod(m, char *) + offset) = csum;
1448 }
1449 }
1450 /*
1451 * Insert jumbo payload option.
1452 */
1453 static int
1454 ip6_insert_jumboopt(exthdrs, plen)
1455 struct ip6_exthdrs *exthdrs;
1456 u_int32_t plen;
1457 {
1458 struct mbuf *mopt;
1459 u_char *optbuf;
1460 u_int32_t v;
1461
1462 #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
1463
1464 /*
1465 * If there is no hop-by-hop options header, allocate new one.
1466 * If there is one but it doesn't have enough space to store the
1467 * jumbo payload option, allocate a cluster to store the whole options.
1468 * Otherwise, use it to store the options.
1469 */
1470 if (exthdrs->ip6e_hbh == 0) {
1471 MGET(mopt, M_DONTWAIT, MT_DATA);
1472 if (mopt == 0)
1473 return (ENOBUFS);
1474 mopt->m_len = JUMBOOPTLEN;
1475 optbuf = mtod(mopt, u_char *);
1476 optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
1477 exthdrs->ip6e_hbh = mopt;
1478 } else {
1479 struct ip6_hbh *hbh;
1480
1481 mopt = exthdrs->ip6e_hbh;
1482 if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
1483 /*
1484 * XXX assumption:
1485 * - exthdrs->ip6e_hbh is not referenced from places
1486 * other than exthdrs.
1487 * - exthdrs->ip6e_hbh is not an mbuf chain.
1488 */
1489 u_int32_t oldoptlen = mopt->m_len;
1490 struct mbuf *n;
1491
1492 /*
1493 * XXX: give up if the whole (new) hbh header does
1494 * not fit even in an mbuf cluster.
1495 */
1496 if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
1497 return (ENOBUFS);
1498
1499 /*
1500 * As a consequence, we must always prepare a cluster
1501 * at this point.
1502 */
1503 MGET(n, M_DONTWAIT, MT_DATA);
1504 if (n) {
1505 MCLGET(n, M_DONTWAIT);
1506 if ((n->m_flags & M_EXT) == 0) {
1507 m_freem(n);
1508 n = NULL;
1509 }
1510 }
1511 if (!n)
1512 return (ENOBUFS);
1513 n->m_len = oldoptlen + JUMBOOPTLEN;
1514 bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
1515 oldoptlen);
1516 optbuf = mtod(n, u_char *) + oldoptlen;
1517 m_freem(mopt);
1518 mopt = exthdrs->ip6e_hbh = n;
1519 } else {
1520 optbuf = mtod(mopt, u_char *) + mopt->m_len;
1521 mopt->m_len += JUMBOOPTLEN;
1522 }
1523 optbuf[0] = IP6OPT_PADN;
1524 optbuf[1] = 1;
1525
1526 /*
1527 * Adjust the header length according to the pad and
1528 * the jumbo payload option.
1529 */
1530 hbh = mtod(mopt, struct ip6_hbh *);
1531 hbh->ip6h_len += (JUMBOOPTLEN >> 3);
1532 }
1533
1534 /* fill in the option. */
1535 optbuf[2] = IP6OPT_JUMBO;
1536 optbuf[3] = 4;
1537 v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
1538 bcopy(&v, &optbuf[4], sizeof(u_int32_t));
1539
1540 /* finally, adjust the packet header length */
1541 exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
1542
1543 return (0);
1544 #undef JUMBOOPTLEN
1545 }
1546
1547 /*
1548 * Insert fragment header and copy unfragmentable header portions.
1549 */
1550 static int
1551 ip6_insertfraghdr(m0, m, hlen, frghdrp)
1552 struct mbuf *m0, *m;
1553 int hlen;
1554 struct ip6_frag **frghdrp;
1555 {
1556 struct mbuf *n, *mlast;
1557
1558 if (hlen > sizeof(struct ip6_hdr)) {
1559 n = m_copym(m0, sizeof(struct ip6_hdr),
1560 hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
1561 if (n == 0)
1562 return (ENOBUFS);
1563 m->m_next = n;
1564 } else
1565 n = m;
1566
1567 /* Search for the last mbuf of unfragmentable part. */
1568 for (mlast = n; mlast->m_next; mlast = mlast->m_next)
1569 ;
1570
1571 if ((mlast->m_flags & M_EXT) == 0 &&
1572 M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
1573 /* use the trailing space of the last mbuf for the fragment hdr */
1574 *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
1575 mlast->m_len);
1576 mlast->m_len += sizeof(struct ip6_frag);
1577 m->m_pkthdr.len += sizeof(struct ip6_frag);
1578 } else {
1579 /* allocate a new mbuf for the fragment header */
1580 struct mbuf *mfrg;
1581
1582 MGET(mfrg, M_DONTWAIT, MT_DATA);
1583 if (mfrg == 0)
1584 return (ENOBUFS);
1585 mfrg->m_len = sizeof(struct ip6_frag);
1586 *frghdrp = mtod(mfrg, struct ip6_frag *);
1587 mlast->m_next = mfrg;
1588 }
1589
1590 return (0);
1591 }
1592
1593 extern int load_ipfw(void);
1594 static int
1595 ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
1596 struct ifnet *ifp, struct in6_addr *dst, u_int32_t *mtup,
1597 int *alwaysfragp)
1598 {
1599 u_int32_t mtu = 0;
1600 int alwaysfrag = 0;
1601 int error = 0;
1602
1603 if (ro_pmtu != ro) {
1604 /* The first hop and the final destination may differ. */
1605 struct sockaddr_in6 *sa6_dst =
1606 (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
1607 if (ro_pmtu->ro_rt &&
1608 ((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
1609 ro_pmtu->ro_rt->generation_id != route_generation ||
1610 !IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
1611 rtfree(ro_pmtu->ro_rt);
1612 ro_pmtu->ro_rt = (struct rtentry *)NULL;
1613 }
1614 if (ro_pmtu->ro_rt == NULL) {
1615 bzero(sa6_dst, sizeof(*sa6_dst));
1616 sa6_dst->sin6_family = AF_INET6;
1617 sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
1618 sa6_dst->sin6_addr = *dst;
1619
1620 rtalloc_scoped((struct route *)ro_pmtu,
1621 ifp != NULL ? ifp->if_index : IFSCOPE_NONE);
1622 }
1623 }
1624
1625
1626 if (ro_pmtu->ro_rt != NULL) {
1627 u_int32_t ifmtu;
1628
1629 lck_rw_lock_shared(nd_if_rwlock);
1630 ifmtu = IN6_LINKMTU(ifp);
1631 lck_rw_done(nd_if_rwlock);
1632
1633 RT_LOCK_SPIN(ro_pmtu->ro_rt);
1634 mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
1635 if (mtu > ifmtu || mtu == 0) {
1636 /*
1637 * The MTU on the route is larger than the MTU on
1638 * the interface! This shouldn't happen, unless the
1639 * MTU of the interface has been changed after the
1640 * interface was brought up. Change the MTU in the
1641 * route to match the interface MTU (as long as the
1642 * field isn't locked).
1643 *
1644 * if MTU on the route is 0, we need to fix the MTU.
1645 * this case happens with path MTU discovery timeouts.
1646 */
1647 mtu = ifmtu;
1648 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
1649 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
1650 }
1651 else if (mtu < IPV6_MMTU) {
1652 /*
1653 * RFC2460 section 5, last paragraph:
1654 * if we record ICMPv6 too big message with
1655 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
1656 * or smaller, with framgent header attached.
1657 * (fragment header is needed regardless from the
1658 * packet size, for translators to identify packets)
1659 */
1660 alwaysfrag = 1;
1661 mtu = IPV6_MMTU;
1662 }
1663 RT_UNLOCK(ro_pmtu->ro_rt);
1664 } else {
1665 if (ifp) {
1666 lck_rw_lock_shared(nd_if_rwlock);
1667 mtu = IN6_LINKMTU(ifp);
1668 lck_rw_done(nd_if_rwlock);
1669 } else
1670 error = EHOSTUNREACH; /* XXX */
1671 }
1672
1673 *mtup = mtu;
1674 if (alwaysfragp)
1675 *alwaysfragp = alwaysfrag;
1676 return (error);
1677 }
1678
1679 /*
1680 * IP6 socket option processing.
1681 */
1682 int
1683 ip6_ctloutput(so, sopt)
1684 struct socket *so;
1685 struct sockopt *sopt;
1686 {
1687 int optdatalen, uproto;
1688 void *optdata;
1689 int privileged;
1690 struct inpcb *in6p = sotoinpcb(so);
1691 int error = 0, optval = 0;
1692 int level, op = -1, optname = 0;
1693 int optlen = 0;
1694 struct proc *p;
1695
1696 if (sopt == NULL) {
1697 panic("ip6_ctloutput: arg soopt is NULL");
1698 /* NOTREACHED */
1699 }
1700 level = sopt->sopt_level;
1701 op = sopt->sopt_dir;
1702 optname = sopt->sopt_name;
1703 optlen = sopt->sopt_valsize;
1704 p = sopt->sopt_p;
1705 uproto = (int)so->so_proto->pr_protocol;
1706
1707 privileged = (proc_suser(p) == 0);
1708
1709 if (level == IPPROTO_IPV6) {
1710 switch (op) {
1711
1712 case SOPT_SET:
1713 switch (optname) {
1714 case IPV6_2292PKTOPTIONS:
1715 {
1716 struct mbuf *m;
1717
1718 error = soopt_getm(sopt, &m); /* XXX */
1719 if (error != 0)
1720 break;
1721 error = soopt_mcopyin(sopt, m); /* XXX */
1722 if (error != 0)
1723 break;
1724 error = ip6_pcbopts(&in6p->in6p_outputopts,
1725 m, so, sopt);
1726 m_freem(m); /* XXX */
1727 break;
1728 }
1729
1730 /*
1731 * Use of some Hop-by-Hop options or some
1732 * Destination options, might require special
1733 * privilege. That is, normal applications
1734 * (without special privilege) might be forbidden
1735 * from setting certain options in outgoing packets,
1736 * and might never see certain options in received
1737 * packets. [RFC 2292 Section 6]
1738 * KAME specific note:
1739 * KAME prevents non-privileged users from sending or
1740 * receiving ANY hbh/dst options in order to avoid
1741 * overhead of parsing options in the kernel.
1742 */
1743 case IPV6_RECVHOPOPTS:
1744 case IPV6_RECVDSTOPTS:
1745 case IPV6_RECVRTHDRDSTOPTS:
1746 if (!privileged)
1747 break;
1748 /* FALLTHROUGH */
1749 case IPV6_UNICAST_HOPS:
1750 case IPV6_HOPLIMIT:
1751 case IPV6_FAITH:
1752
1753 case IPV6_RECVPKTINFO:
1754 case IPV6_RECVHOPLIMIT:
1755 case IPV6_RECVRTHDR:
1756 case IPV6_RECVPATHMTU:
1757 case IPV6_RECVTCLASS:
1758 case IPV6_V6ONLY:
1759 case IPV6_AUTOFLOWLABEL:
1760 if (optlen != sizeof(int)) {
1761 error = EINVAL;
1762 break;
1763 }
1764 error = sooptcopyin(sopt, &optval,
1765 sizeof optval, sizeof optval);
1766 if (error)
1767 break;
1768 switch (optname) {
1769
1770 case IPV6_UNICAST_HOPS:
1771 if (optval < -1 || optval >= 256)
1772 error = EINVAL;
1773 else {
1774 /* -1 = kernel default */
1775 in6p->in6p_hops = optval;
1776 if ((in6p->inp_vflag &
1777 INP_IPV4) != 0)
1778 in6p->inp_ip_ttl = optval;
1779 }
1780 break;
1781 #define OPTSET(bit) \
1782 do { \
1783 if (optval) \
1784 in6p->inp_flags |= (bit); \
1785 else \
1786 in6p->inp_flags &= ~(bit); \
1787 } while (/*CONSTCOND*/ 0)
1788 #define OPTSET2292(bit) \
1789 do { \
1790 in6p->inp_flags |= IN6P_RFC2292; \
1791 if (optval) \
1792 in6p->inp_flags |= (bit); \
1793 else \
1794 in6p->inp_flags &= ~(bit); \
1795 } while (/*CONSTCOND*/ 0)
1796 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
1797
1798 case IPV6_RECVPKTINFO:
1799 /* cannot mix with RFC2292 */
1800 if (OPTBIT(IN6P_RFC2292)) {
1801 error = EINVAL;
1802 break;
1803 }
1804 OPTSET(IN6P_PKTINFO);
1805 break;
1806
1807 case IPV6_HOPLIMIT:
1808 {
1809 struct ip6_pktopts **optp;
1810
1811 /* cannot mix with RFC2292 */
1812 if (OPTBIT(IN6P_RFC2292)) {
1813 error = EINVAL;
1814 break;
1815 }
1816 optp = &in6p->in6p_outputopts;
1817 error = ip6_pcbopt(IPV6_HOPLIMIT,
1818 (u_char *)&optval, sizeof(optval),
1819 optp, uproto);
1820 break;
1821 }
1822
1823 case IPV6_RECVHOPLIMIT:
1824 /* cannot mix with RFC2292 */
1825 if (OPTBIT(IN6P_RFC2292)) {
1826 error = EINVAL;
1827 break;
1828 }
1829 OPTSET(IN6P_HOPLIMIT);
1830 break;
1831
1832 case IPV6_RECVHOPOPTS:
1833 /* cannot mix with RFC2292 */
1834 if (OPTBIT(IN6P_RFC2292)) {
1835 error = EINVAL;
1836 break;
1837 }
1838 OPTSET(IN6P_HOPOPTS);
1839 break;
1840
1841 case IPV6_RECVDSTOPTS:
1842 /* cannot mix with RFC2292 */
1843 if (OPTBIT(IN6P_RFC2292)) {
1844 error = EINVAL;
1845 break;
1846 }
1847 OPTSET(IN6P_DSTOPTS);
1848 break;
1849
1850 case IPV6_RECVRTHDRDSTOPTS:
1851 /* cannot mix with RFC2292 */
1852 if (OPTBIT(IN6P_RFC2292)) {
1853 error = EINVAL;
1854 break;
1855 }
1856 OPTSET(IN6P_RTHDRDSTOPTS);
1857 break;
1858
1859 case IPV6_RECVRTHDR:
1860 /* cannot mix with RFC2292 */
1861 if (OPTBIT(IN6P_RFC2292)) {
1862 error = EINVAL;
1863 break;
1864 }
1865 OPTSET(IN6P_RTHDR);
1866 break;
1867
1868 case IPV6_FAITH:
1869 OPTSET(INP_FAITH);
1870 break;
1871
1872 case IPV6_RECVPATHMTU:
1873 /*
1874 * We ignore this option for TCP
1875 * sockets.
1876 * (RFC3542 leaves this case
1877 * unspecified.)
1878 */
1879 if (uproto != IPPROTO_TCP)
1880 OPTSET(IN6P_MTU);
1881 break;
1882
1883 case IPV6_V6ONLY:
1884 /*
1885 * make setsockopt(IPV6_V6ONLY)
1886 * available only prior to bind(2).
1887 * see ipng mailing list, Jun 22 2001.
1888 */
1889 if (in6p->inp_lport ||
1890 !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
1891 error = EINVAL;
1892 break;
1893 }
1894 OPTSET(IN6P_IPV6_V6ONLY);
1895 if (optval)
1896 in6p->inp_vflag &= ~INP_IPV4;
1897 else
1898 in6p->inp_vflag |= INP_IPV4;
1899 break;
1900 case IPV6_RECVTCLASS:
1901 /* we can mix with RFC2292 */
1902 OPTSET(IN6P_TCLASS);
1903 break;
1904 case IPV6_AUTOFLOWLABEL:
1905 OPTSET(IN6P_AUTOFLOWLABEL);
1906 break;
1907
1908 }
1909 break;
1910
1911 case IPV6_TCLASS:
1912 case IPV6_DONTFRAG:
1913 case IPV6_USE_MIN_MTU:
1914 case IPV6_PREFER_TEMPADDR:
1915 if (optlen != sizeof(optval)) {
1916 error = EINVAL;
1917 break;
1918 }
1919 error = sooptcopyin(sopt, &optval,
1920 sizeof optval, sizeof optval);
1921 if (error)
1922 break;
1923 {
1924 struct ip6_pktopts **optp;
1925 optp = &in6p->in6p_outputopts;
1926 error = ip6_pcbopt(optname,
1927 (u_char *)&optval, sizeof(optval),
1928 optp, uproto);
1929 break;
1930 }
1931
1932 case IPV6_2292PKTINFO:
1933 case IPV6_2292HOPLIMIT:
1934 case IPV6_2292HOPOPTS:
1935 case IPV6_2292DSTOPTS:
1936 case IPV6_2292RTHDR:
1937 /* RFC 2292 */
1938 if (optlen != sizeof(int)) {
1939 error = EINVAL;
1940 break;
1941 }
1942 error = sooptcopyin(sopt, &optval,
1943 sizeof optval, sizeof optval);
1944 if (error)
1945 break;
1946 switch (optname) {
1947 case IPV6_2292PKTINFO:
1948 OPTSET2292(IN6P_PKTINFO);
1949 break;
1950 case IPV6_2292HOPLIMIT:
1951 OPTSET2292(IN6P_HOPLIMIT);
1952 break;
1953 case IPV6_2292HOPOPTS:
1954 /*
1955 * Check super-user privilege.
1956 * See comments for IPV6_RECVHOPOPTS.
1957 */
1958 if (!privileged)
1959 return(EPERM);
1960 OPTSET2292(IN6P_HOPOPTS);
1961 break;
1962 case IPV6_2292DSTOPTS:
1963 if (!privileged)
1964 return(EPERM);
1965 OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
1966 break;
1967 case IPV6_2292RTHDR:
1968 OPTSET2292(IN6P_RTHDR);
1969 break;
1970 }
1971 break;
1972 case IPV6_3542PKTINFO:
1973 case IPV6_3542HOPOPTS:
1974 case IPV6_3542RTHDR:
1975 case IPV6_3542DSTOPTS:
1976 case IPV6_RTHDRDSTOPTS:
1977 case IPV6_3542NEXTHOP:
1978 {
1979 /* new advanced API (RFC3542) */
1980 struct mbuf *m;
1981
1982 /* cannot mix with RFC2292 */
1983 if (OPTBIT(IN6P_RFC2292)) {
1984 error = EINVAL;
1985 break;
1986 }
1987 error = soopt_getm(sopt, &m);
1988 if (error != 0)
1989 break;
1990 error = soopt_mcopyin(sopt, m);
1991 if (error) {
1992 m_freem(m);
1993 break;
1994 }
1995 error = ip6_pcbopt(optname, mtod(m, u_char *),
1996 m->m_len, &in6p->in6p_outputopts, uproto);
1997 m_freem(m);
1998 break;
1999 }
2000 #undef OPTSET
2001
2002 case IPV6_MULTICAST_IF:
2003 case IPV6_MULTICAST_HOPS:
2004 case IPV6_MULTICAST_LOOP:
2005 case IPV6_JOIN_GROUP:
2006 case IPV6_LEAVE_GROUP:
2007 case IPV6_MSFILTER:
2008 case MCAST_BLOCK_SOURCE:
2009 case MCAST_UNBLOCK_SOURCE:
2010 case MCAST_JOIN_GROUP:
2011 case MCAST_LEAVE_GROUP:
2012 case MCAST_JOIN_SOURCE_GROUP:
2013 case MCAST_LEAVE_SOURCE_GROUP:
2014 error = ip6_setmoptions(in6p, sopt);
2015 break;
2016
2017 case IPV6_PORTRANGE:
2018 error = sooptcopyin(sopt, &optval,
2019 sizeof optval, sizeof optval);
2020 if (error)
2021 break;
2022
2023 switch (optval) {
2024 case IPV6_PORTRANGE_DEFAULT:
2025 in6p->inp_flags &= ~(INP_LOWPORT);
2026 in6p->inp_flags &= ~(INP_HIGHPORT);
2027 break;
2028
2029 case IPV6_PORTRANGE_HIGH:
2030 in6p->inp_flags &= ~(INP_LOWPORT);
2031 in6p->inp_flags |= INP_HIGHPORT;
2032 break;
2033
2034 case IPV6_PORTRANGE_LOW:
2035 in6p->inp_flags &= ~(INP_HIGHPORT);
2036 in6p->inp_flags |= INP_LOWPORT;
2037 break;
2038
2039 default:
2040 error = EINVAL;
2041 break;
2042 }
2043 break;
2044
2045 #if IPSEC
2046 case IPV6_IPSEC_POLICY:
2047 {
2048 caddr_t req = NULL;
2049 size_t len = 0;
2050 struct mbuf *m;
2051
2052 if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
2053 break;
2054 if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
2055 break;
2056 if (m) {
2057 req = mtod(m, caddr_t);
2058 len = m->m_len;
2059 }
2060 error = ipsec6_set_policy(in6p, optname, req,
2061 len, privileged);
2062 m_freem(m);
2063 }
2064 break;
2065 #endif /* KAME IPSEC */
2066
2067 #if IPFIREWALL
2068 case IPV6_FW_ADD:
2069 case IPV6_FW_DEL:
2070 case IPV6_FW_FLUSH:
2071 case IPV6_FW_ZERO:
2072 {
2073 if (ip6_fw_ctl_ptr == NULL)
2074 load_ip6fw();
2075 if (ip6_fw_ctl_ptr != NULL)
2076 error = (*ip6_fw_ctl_ptr)(sopt);
2077 else
2078 return ENOPROTOOPT;
2079 }
2080 break;
2081 #endif /* IPFIREWALL */
2082
2083 /*
2084 * IPv6 variant of IP_BOUND_IF; for details see
2085 * comments on IP_BOUND_IF in ip_ctloutput().
2086 */
2087 case IPV6_BOUND_IF:
2088 /* This option is settable only on IPv6 */
2089 if (!(in6p->inp_vflag & INP_IPV6)) {
2090 error = EINVAL;
2091 break;
2092 }
2093
2094 error = sooptcopyin(sopt, &optval,
2095 sizeof (optval), sizeof (optval));
2096
2097 if (error)
2098 break;
2099
2100 inp_bindif(in6p, optval);
2101 break;
2102
2103 case IPV6_NO_IFT_CELLULAR:
2104 /* This option is settable only for IPv6 */
2105 if (!(in6p->inp_vflag & INP_IPV6)) {
2106 error = EINVAL;
2107 break;
2108 }
2109
2110 error = sooptcopyin(sopt, &optval,
2111 sizeof (optval), sizeof (optval));
2112
2113 if (error)
2114 break;
2115
2116 error = inp_nocellular(in6p, optval);
2117 break;
2118
2119 case IPV6_OUT_IF:
2120 /* This option is not settable */
2121 error = EINVAL;
2122 break;
2123
2124 default:
2125 error = ENOPROTOOPT;
2126 break;
2127 }
2128 break;
2129
2130 case SOPT_GET:
2131 switch (optname) {
2132
2133 case IPV6_2292PKTOPTIONS:
2134 /*
2135 * RFC3542 (effectively) deprecated the
2136 * semantics of the 2292-style pktoptions.
2137 * Since it was not reliable in nature (i.e.,
2138 * applications had to expect the lack of some
2139 * information after all), it would make sense
2140 * to simplify this part by always returning
2141 * empty data.
2142 */
2143 sopt->sopt_valsize = 0;
2144 break;
2145
2146 case IPV6_RECVHOPOPTS:
2147 case IPV6_RECVDSTOPTS:
2148 case IPV6_RECVRTHDRDSTOPTS:
2149 case IPV6_UNICAST_HOPS:
2150 case IPV6_RECVPKTINFO:
2151 case IPV6_RECVHOPLIMIT:
2152 case IPV6_RECVRTHDR:
2153 case IPV6_RECVPATHMTU:
2154
2155 case IPV6_FAITH:
2156 case IPV6_V6ONLY:
2157 case IPV6_PORTRANGE:
2158 case IPV6_RECVTCLASS:
2159 case IPV6_AUTOFLOWLABEL:
2160 switch (optname) {
2161
2162 case IPV6_RECVHOPOPTS:
2163 optval = OPTBIT(IN6P_HOPOPTS);
2164 break;
2165
2166 case IPV6_RECVDSTOPTS:
2167 optval = OPTBIT(IN6P_DSTOPTS);
2168 break;
2169
2170 case IPV6_RECVRTHDRDSTOPTS:
2171 optval = OPTBIT(IN6P_RTHDRDSTOPTS);
2172 break;
2173
2174 case IPV6_UNICAST_HOPS:
2175 optval = in6p->in6p_hops;
2176 break;
2177
2178 case IPV6_RECVPKTINFO:
2179 optval = OPTBIT(IN6P_PKTINFO);
2180 break;
2181
2182 case IPV6_RECVHOPLIMIT:
2183 optval = OPTBIT(IN6P_HOPLIMIT);
2184 break;
2185
2186 case IPV6_RECVRTHDR:
2187 optval = OPTBIT(IN6P_RTHDR);
2188 break;
2189
2190 case IPV6_RECVPATHMTU:
2191 optval = OPTBIT(IN6P_MTU);
2192 break;
2193
2194 case IPV6_FAITH:
2195 optval = OPTBIT(INP_FAITH);
2196 break;
2197
2198 case IPV6_V6ONLY:
2199 optval = OPTBIT(IN6P_IPV6_V6ONLY);
2200 break;
2201
2202 case IPV6_PORTRANGE:
2203 {
2204 int flags;
2205 flags = in6p->inp_flags;
2206 if (flags & INP_HIGHPORT)
2207 optval = IPV6_PORTRANGE_HIGH;
2208 else if (flags & INP_LOWPORT)
2209 optval = IPV6_PORTRANGE_LOW;
2210 else
2211 optval = 0;
2212 break;
2213 }
2214 case IPV6_RECVTCLASS:
2215 optval = OPTBIT(IN6P_TCLASS);
2216 break;
2217
2218 case IPV6_AUTOFLOWLABEL:
2219 optval = OPTBIT(IN6P_AUTOFLOWLABEL);
2220 break;
2221 }
2222 if (error)
2223 break;
2224 error = sooptcopyout(sopt, &optval,
2225 sizeof optval);
2226 break;
2227
2228 case IPV6_PATHMTU:
2229 {
2230 u_int32_t pmtu = 0;
2231 struct ip6_mtuinfo mtuinfo;
2232 struct route_in6 sro;
2233
2234 bzero(&sro, sizeof(sro));
2235
2236 if (!(so->so_state & SS_ISCONNECTED))
2237 return (ENOTCONN);
2238 /*
2239 * XXX: we dot not consider the case of source
2240 * routing, or optional information to specify
2241 * the outgoing interface.
2242 */
2243 error = ip6_getpmtu(&sro, NULL, NULL,
2244 &in6p->in6p_faddr, &pmtu, NULL);
2245 if (sro.ro_rt)
2246 rtfree(sro.ro_rt);
2247 if (error)
2248 break;
2249 if (pmtu > IPV6_MAXPACKET)
2250 pmtu = IPV6_MAXPACKET;
2251
2252 bzero(&mtuinfo, sizeof(mtuinfo));
2253 mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
2254 optdata = (void *)&mtuinfo;
2255 optdatalen = sizeof(mtuinfo);
2256 error = sooptcopyout(sopt, optdata,
2257 optdatalen);
2258 break;
2259 }
2260
2261 case IPV6_2292PKTINFO:
2262 case IPV6_2292HOPLIMIT:
2263 case IPV6_2292HOPOPTS:
2264 case IPV6_2292RTHDR:
2265 case IPV6_2292DSTOPTS:
2266 switch (optname) {
2267 case IPV6_2292PKTINFO:
2268 optval = OPTBIT(IN6P_PKTINFO);
2269 break;
2270 case IPV6_2292HOPLIMIT:
2271 optval = OPTBIT(IN6P_HOPLIMIT);
2272 break;
2273 case IPV6_2292HOPOPTS:
2274 optval = OPTBIT(IN6P_HOPOPTS);
2275 break;
2276 case IPV6_2292RTHDR:
2277 optval = OPTBIT(IN6P_RTHDR);
2278 break;
2279 case IPV6_2292DSTOPTS:
2280 optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
2281 break;
2282 }
2283 error = sooptcopyout(sopt, &optval,
2284 sizeof optval);
2285 break;
2286 case IPV6_PKTINFO:
2287 case IPV6_HOPOPTS:
2288 case IPV6_RTHDR:
2289 case IPV6_DSTOPTS:
2290 case IPV6_RTHDRDSTOPTS:
2291 case IPV6_NEXTHOP:
2292 case IPV6_TCLASS:
2293 case IPV6_DONTFRAG:
2294 case IPV6_USE_MIN_MTU:
2295 case IPV6_PREFER_TEMPADDR:
2296 error = ip6_getpcbopt(in6p->in6p_outputopts,
2297 optname, sopt);
2298 break;
2299
2300 case IPV6_MULTICAST_IF:
2301 case IPV6_MULTICAST_HOPS:
2302 case IPV6_MULTICAST_LOOP:
2303 case IPV6_MSFILTER:
2304 error = ip6_getmoptions(in6p, sopt);
2305 break;
2306
2307 #if IPSEC
2308 case IPV6_IPSEC_POLICY:
2309 {
2310 caddr_t req = NULL;
2311 size_t len = 0;
2312 struct mbuf *m = NULL;
2313 struct mbuf **mp = &m;
2314
2315 error = soopt_getm(sopt, &m); /* XXX */
2316 if (error != 0)
2317 break;
2318 error = soopt_mcopyin(sopt, m); /* XXX */
2319 if (error != 0)
2320 break;
2321 if (m) {
2322 req = mtod(m, caddr_t);
2323 len = m->m_len;
2324 }
2325 error = ipsec6_get_policy(in6p, req, len, mp);
2326 if (error == 0)
2327 error = soopt_mcopyout(sopt, m); /*XXX*/
2328 if (error == 0 && m)
2329 m_freem(m);
2330 break;
2331 }
2332 #endif /* KAME IPSEC */
2333
2334 #if IPFIREWALL
2335 case IPV6_FW_GET:
2336 {
2337 if (ip6_fw_ctl_ptr == NULL)
2338 load_ip6fw();
2339 if (ip6_fw_ctl_ptr != NULL)
2340 error = (*ip6_fw_ctl_ptr)(sopt);
2341 else
2342 return ENOPROTOOPT;
2343 }
2344 break;
2345 #endif /* IPFIREWALL */
2346
2347 case IPV6_BOUND_IF:
2348 if (in6p->inp_flags & INP_BOUND_IF)
2349 optval = in6p->inp_boundif;
2350 error = sooptcopyout(sopt, &optval,
2351 sizeof (optval));
2352 break;
2353
2354 case IPV6_NO_IFT_CELLULAR:
2355 optval = (in6p->inp_flags & INP_NO_IFT_CELLULAR)
2356 ? 1 : 0;
2357 error = sooptcopyout(sopt, &optval,
2358 sizeof (optval));
2359 break;
2360
2361 case IPV6_OUT_IF:
2362 optval = in6p->in6p_last_outif;
2363 error = sooptcopyout(sopt, &optval,
2364 sizeof (optval));
2365 break;
2366
2367 default:
2368 error = ENOPROTOOPT;
2369 break;
2370 }
2371 break;
2372 }
2373 } else {
2374 error = EINVAL;
2375 }
2376 return(error);
2377 }
2378
2379 int
2380 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
2381 {
2382 int error = 0, optval, optlen;
2383 const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
2384 struct inpcb *in6p = sotoinpcb(so);
2385 int level, op, optname;
2386
2387 level = sopt->sopt_level;
2388 op = sopt->sopt_dir;
2389 optname = sopt->sopt_name;
2390 optlen = sopt->sopt_valsize;
2391
2392 if (level != IPPROTO_IPV6) {
2393 return (EINVAL);
2394 }
2395
2396 switch (optname) {
2397 case IPV6_CHECKSUM:
2398 /*
2399 * For ICMPv6 sockets, no modification allowed for checksum
2400 * offset, permit "no change" values to help existing apps.
2401 *
2402 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
2403 * for an ICMPv6 socket will fail."
2404 * The current behavior does not meet RFC3542.
2405 */
2406 switch (op) {
2407 case SOPT_SET:
2408 if (optlen != sizeof(int)) {
2409 error = EINVAL;
2410 break;
2411 }
2412 error = sooptcopyin(sopt, &optval, sizeof(optval),
2413 sizeof(optval));
2414 if (error)
2415 break;
2416 if ((optval % 2) != 0) {
2417 /* the API assumes even offset values */
2418 error = EINVAL;
2419 } else if (so->so_proto->pr_protocol ==
2420 IPPROTO_ICMPV6) {
2421 if (optval != icmp6off)
2422 error = EINVAL;
2423 } else
2424 in6p->in6p_cksum = optval;
2425 break;
2426
2427 case SOPT_GET:
2428 if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
2429 optval = icmp6off;
2430 else
2431 optval = in6p->in6p_cksum;
2432
2433 error = sooptcopyout(sopt, &optval, sizeof(optval));
2434 break;
2435
2436 default:
2437 error = EINVAL;
2438 break;
2439 }
2440 break;
2441
2442 default:
2443 error = ENOPROTOOPT;
2444 break;
2445 }
2446
2447 return (error);
2448 }
2449
2450 /*
2451 * Set up IP6 options in pcb for insertion in output packets or
2452 * specifying behavior of outgoing packets.
2453 */
2454 static int
2455 ip6_pcbopts(
2456 struct ip6_pktopts **pktopt,
2457 struct mbuf *m,
2458 __unused struct socket *so,
2459 __unused struct sockopt *sopt)
2460 {
2461 struct ip6_pktopts *opt = *pktopt;
2462 int error = 0;
2463
2464 /* turn off any old options. */
2465 if (opt) {
2466 #if DIAGNOSTIC
2467 if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
2468 opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
2469 opt->ip6po_rhinfo.ip6po_rhi_rthdr)
2470 printf("ip6_pcbopts: all specified options are cleared.\n");
2471 #endif
2472 ip6_clearpktopts(opt, -1);
2473 } else {
2474 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2475 if (opt == NULL)
2476 return ENOBUFS;
2477 }
2478 *pktopt = NULL;
2479
2480 if (!m || m->m_len == 0) {
2481 /*
2482 * Only turning off any previous options, regardless of
2483 * whether the opt is just created or given.
2484 */
2485 if (opt)
2486 FREE(opt, M_IP6OPT);
2487 return(0);
2488 }
2489
2490 /* set options specified by user. */
2491 if ((error = ip6_setpktopts(m, opt, NULL, so->so_proto->pr_protocol)) != 0) {
2492 ip6_clearpktopts(opt, -1); /* XXX: discard all options */
2493 FREE(opt, M_IP6OPT);
2494 return(error);
2495 }
2496 *pktopt = opt;
2497 return(0);
2498 }
2499
2500 /*
2501 * initialize ip6_pktopts. beware that there are non-zero default values in
2502 * the struct.
2503 */
2504 void
2505 ip6_initpktopts(struct ip6_pktopts *opt)
2506 {
2507
2508 bzero(opt, sizeof(*opt));
2509 opt->ip6po_hlim = -1; /* -1 means default hop limit */
2510 opt->ip6po_tclass = -1; /* -1 means default traffic class */
2511 opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
2512 opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
2513 }
2514
2515 static int
2516 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, int uproto)
2517 {
2518 struct ip6_pktopts *opt;
2519
2520 opt = *pktopt;
2521 if (opt == NULL) {
2522 opt = _MALLOC(sizeof(*opt), M_IP6OPT, M_WAITOK);
2523 if (opt == NULL)
2524 return(ENOBUFS);
2525 ip6_initpktopts(opt);
2526 *pktopt = opt;
2527 }
2528
2529 return (ip6_setpktopt(optname, buf, len, opt, 1, 0, uproto));
2530 }
2531
2532 static int
2533 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
2534 {
2535 void *optdata = NULL;
2536 int optdatalen = 0;
2537 struct ip6_ext *ip6e;
2538 int error = 0;
2539 struct in6_pktinfo null_pktinfo;
2540 int deftclass = 0, on;
2541 int defminmtu = IP6PO_MINMTU_MCASTONLY;
2542 int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
2543
2544 switch (optname) {
2545 case IPV6_PKTINFO:
2546 if (pktopt && pktopt->ip6po_pktinfo)
2547 optdata = (void *)pktopt->ip6po_pktinfo;
2548 else {
2549 /* XXX: we don't have to do this every time... */
2550 bzero(&null_pktinfo, sizeof(null_pktinfo));
2551 optdata = (void *)&null_pktinfo;
2552 }
2553 optdatalen = sizeof(struct in6_pktinfo);
2554 break;
2555 case IPV6_TCLASS:
2556 if (pktopt && pktopt->ip6po_tclass >= 0)
2557 optdata = (void *)&pktopt->ip6po_tclass;
2558 else
2559 optdata = (void *)&deftclass;
2560 optdatalen = sizeof(int);
2561 break;
2562 case IPV6_HOPOPTS:
2563 if (pktopt && pktopt->ip6po_hbh) {
2564 optdata = (void *)pktopt->ip6po_hbh;
2565 ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
2566 optdatalen = (ip6e->ip6e_len + 1) << 3;
2567 }
2568 break;
2569 case IPV6_RTHDR:
2570 if (pktopt && pktopt->ip6po_rthdr) {
2571 optdata = (void *)pktopt->ip6po_rthdr;
2572 ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
2573 optdatalen = (ip6e->ip6e_len + 1) << 3;
2574 }
2575 break;
2576 case IPV6_RTHDRDSTOPTS:
2577 if (pktopt && pktopt->ip6po_dest1) {
2578 optdata = (void *)pktopt->ip6po_dest1;
2579 ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
2580 optdatalen = (ip6e->ip6e_len + 1) << 3;
2581 }
2582 break;
2583 case IPV6_DSTOPTS:
2584 if (pktopt && pktopt->ip6po_dest2) {
2585 optdata = (void *)pktopt->ip6po_dest2;
2586 ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
2587 optdatalen = (ip6e->ip6e_len + 1) << 3;
2588 }
2589 break;
2590 case IPV6_NEXTHOP:
2591 if (pktopt && pktopt->ip6po_nexthop) {
2592 optdata = (void *)pktopt->ip6po_nexthop;
2593 optdatalen = pktopt->ip6po_nexthop->sa_len;
2594 }
2595 break;
2596 case IPV6_USE_MIN_MTU:
2597 if (pktopt)
2598 optdata = (void *)&pktopt->ip6po_minmtu;
2599 else
2600 optdata = (void *)&defminmtu;
2601 optdatalen = sizeof(int);
2602 break;
2603 case IPV6_DONTFRAG:
2604 if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
2605 on = 1;
2606 else
2607 on = 0;
2608 optdata = (void *)&on;
2609 optdatalen = sizeof(on);
2610 break;
2611 case IPV6_PREFER_TEMPADDR:
2612 if (pktopt)
2613 optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
2614 else
2615 optdata = (void *)&defpreftemp;
2616 optdatalen = sizeof(int);
2617 break;
2618 default: /* should not happen */
2619 #ifdef DIAGNOSTIC
2620 panic("ip6_getpcbopt: unexpected option\n");
2621 #endif
2622 return (ENOPROTOOPT);
2623 }
2624
2625 error = sooptcopyout(sopt, optdata, optdatalen);
2626
2627 return (error);
2628 }
2629
2630 void
2631 ip6_clearpktopts(pktopt, optname)
2632 struct ip6_pktopts *pktopt;
2633 int optname;
2634 {
2635 if (pktopt == NULL)
2636 return;
2637
2638 if (optname == -1 || optname == IPV6_PKTINFO) {
2639 if (pktopt->ip6po_pktinfo)
2640 FREE(pktopt->ip6po_pktinfo, M_IP6OPT);
2641 pktopt->ip6po_pktinfo = NULL;
2642 }
2643 if (optname == -1 || optname == IPV6_HOPLIMIT)
2644 pktopt->ip6po_hlim = -1;
2645 if (optname == -1 || optname == IPV6_TCLASS)
2646 pktopt->ip6po_tclass = -1;
2647 if (optname == -1 || optname == IPV6_NEXTHOP) {
2648 if (pktopt->ip6po_nextroute.ro_rt) {
2649 rtfree(pktopt->ip6po_nextroute.ro_rt);
2650 pktopt->ip6po_nextroute.ro_rt = NULL;
2651 }
2652 if (pktopt->ip6po_nexthop)
2653 FREE(pktopt->ip6po_nexthop, M_IP6OPT);
2654 pktopt->ip6po_nexthop = NULL;
2655 }
2656 if (optname == -1 || optname == IPV6_HOPOPTS) {
2657 if (pktopt->ip6po_hbh)
2658 FREE(pktopt->ip6po_hbh, M_IP6OPT);
2659 pktopt->ip6po_hbh = NULL;
2660 }
2661 if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
2662 if (pktopt->ip6po_dest1)
2663 FREE(pktopt->ip6po_dest1, M_IP6OPT);
2664 pktopt->ip6po_dest1 = NULL;
2665 }
2666 if (optname == -1 || optname == IPV6_RTHDR) {
2667 if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
2668 FREE(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
2669 pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
2670 if (pktopt->ip6po_route.ro_rt) {
2671 rtfree(pktopt->ip6po_route.ro_rt);
2672 pktopt->ip6po_route.ro_rt = NULL;
2673 }
2674 }
2675 if (optname == -1 || optname == IPV6_DSTOPTS) {
2676 if (pktopt->ip6po_dest2)
2677 FREE(pktopt->ip6po_dest2, M_IP6OPT);
2678 pktopt->ip6po_dest2 = NULL;
2679 }
2680 }
2681
2682 #define PKTOPT_EXTHDRCPY(type) \
2683 do {\
2684 if (src->type) {\
2685 int hlen =\
2686 (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
2687 dst->type = _MALLOC(hlen, M_IP6OPT, canwait);\
2688 if (dst->type == NULL && canwait == M_NOWAIT)\
2689 goto bad;\
2690 bcopy(src->type, dst->type, hlen);\
2691 }\
2692 } while (0)
2693
2694 static int
2695 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
2696 {
2697 if (dst == NULL || src == NULL) {
2698 printf("ip6_clearpktopts: invalid argument\n");
2699 return (EINVAL);
2700 }
2701
2702 dst->ip6po_hlim = src->ip6po_hlim;
2703 dst->ip6po_tclass = src->ip6po_tclass;
2704 dst->ip6po_flags = src->ip6po_flags;
2705 if (src->ip6po_pktinfo) {
2706 dst->ip6po_pktinfo = _MALLOC(sizeof(*dst->ip6po_pktinfo),
2707 M_IP6OPT, canwait);
2708 if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
2709 goto bad;
2710 *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
2711 }
2712 if (src->ip6po_nexthop) {
2713 dst->ip6po_nexthop = _MALLOC(src->ip6po_nexthop->sa_len,
2714 M_IP6OPT, canwait);
2715 if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
2716 goto bad;
2717 bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
2718 src->ip6po_nexthop->sa_len);
2719 }
2720 PKTOPT_EXTHDRCPY(ip6po_hbh);
2721 PKTOPT_EXTHDRCPY(ip6po_dest1);
2722 PKTOPT_EXTHDRCPY(ip6po_dest2);
2723 PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
2724 return (0);
2725
2726 bad:
2727 ip6_clearpktopts(dst, -1);
2728 return (ENOBUFS);
2729 }
2730 #undef PKTOPT_EXTHDRCPY
2731
2732 struct ip6_pktopts *
2733 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
2734 {
2735 int error;
2736 struct ip6_pktopts *dst;
2737
2738 dst = _MALLOC(sizeof(*dst), M_IP6OPT, canwait);
2739 if (dst == NULL)
2740 return (NULL);
2741 ip6_initpktopts(dst);
2742
2743 if ((error = copypktopts(dst, src, canwait)) != 0) {
2744 FREE(dst, M_IP6OPT);
2745 return (NULL);
2746 }
2747
2748 return (dst);
2749 }
2750
2751 void
2752 ip6_freepcbopts(pktopt)
2753 struct ip6_pktopts *pktopt;
2754 {
2755 if (pktopt == NULL)
2756 return;
2757
2758 ip6_clearpktopts(pktopt, -1);
2759
2760 FREE(pktopt, M_IP6OPT);
2761 }
2762
2763 void
2764 ip6_moptions_init(void)
2765 {
2766 PE_parse_boot_argn("ifa_debug", &im6o_debug, sizeof (im6o_debug));
2767
2768 im6o_size = (im6o_debug == 0) ? sizeof (struct ip6_moptions) :
2769 sizeof (struct ip6_moptions_dbg);
2770
2771 im6o_zone = zinit(im6o_size, IM6O_ZONE_MAX * im6o_size, 0,
2772 IM6O_ZONE_NAME);
2773 if (im6o_zone == NULL) {
2774 panic("%s: failed allocating %s", __func__, IM6O_ZONE_NAME);
2775 /* NOTREACHED */
2776 }
2777 zone_change(im6o_zone, Z_EXPAND, TRUE);
2778 }
2779
2780 void
2781 im6o_addref(struct ip6_moptions *im6o, int locked)
2782 {
2783 if (!locked)
2784 IM6O_LOCK(im6o);
2785 else
2786 IM6O_LOCK_ASSERT_HELD(im6o);
2787
2788 if (++im6o->im6o_refcnt == 0) {
2789 panic("%s: im6o %p wraparound refcnt\n", __func__, im6o);
2790 /* NOTREACHED */
2791 } else if (im6o->im6o_trace != NULL) {
2792 (*im6o->im6o_trace)(im6o, TRUE);
2793 }
2794
2795 if (!locked)
2796 IM6O_UNLOCK(im6o);
2797 }
2798
2799 void
2800 im6o_remref(struct ip6_moptions *im6o)
2801 {
2802 int i;
2803
2804 IM6O_LOCK(im6o);
2805 if (im6o->im6o_refcnt == 0) {
2806 panic("%s: im6o %p negative refcnt", __func__, im6o);
2807 /* NOTREACHED */
2808 } else if (im6o->im6o_trace != NULL) {
2809 (*im6o->im6o_trace)(im6o, FALSE);
2810 }
2811
2812 --im6o->im6o_refcnt;
2813 if (im6o->im6o_refcnt > 0) {
2814 IM6O_UNLOCK(im6o);
2815 return;
2816 }
2817
2818 for (i = 0; i < im6o->im6o_num_memberships; ++i) {
2819 struct in6_mfilter *imf;
2820
2821 imf = im6o->im6o_mfilters ? &im6o->im6o_mfilters[i] : NULL;
2822 if (imf != NULL)
2823 im6f_leave(imf);
2824
2825 (void) in6_mc_leave(im6o->im6o_membership[i], imf);
2826
2827 if (imf != NULL)
2828 im6f_purge(imf);
2829
2830 IN6M_REMREF(im6o->im6o_membership[i]);
2831 im6o->im6o_membership[i] = NULL;
2832 }
2833 im6o->im6o_num_memberships = 0;
2834 if (im6o->im6o_mfilters != NULL) {
2835 FREE(im6o->im6o_mfilters, M_IN6MFILTER);
2836 im6o->im6o_mfilters = NULL;
2837 }
2838 if (im6o->im6o_membership != NULL) {
2839 FREE(im6o->im6o_membership, M_IP6MOPTS);
2840 im6o->im6o_membership = NULL;
2841 }
2842 IM6O_UNLOCK(im6o);
2843
2844 lck_mtx_destroy(&im6o->im6o_lock, ifa_mtx_grp);
2845
2846 if (!(im6o->im6o_debug & IFD_ALLOC)) {
2847 panic("%s: im6o %p cannot be freed", __func__, im6o);
2848 /* NOTREACHED */
2849 }
2850 zfree(im6o_zone, im6o);
2851 }
2852
2853 static void
2854 im6o_trace(struct ip6_moptions *im6o, int refhold)
2855 {
2856 struct ip6_moptions_dbg *im6o_dbg = (struct ip6_moptions_dbg *)im6o;
2857 ctrace_t *tr;
2858 u_int32_t idx;
2859 u_int16_t *cnt;
2860
2861 if (!(im6o->im6o_debug & IFD_DEBUG)) {
2862 panic("%s: im6o %p has no debug structure", __func__, im6o);
2863 /* NOTREACHED */
2864 }
2865 if (refhold) {
2866 cnt = &im6o_dbg->im6o_refhold_cnt;
2867 tr = im6o_dbg->im6o_refhold;
2868 } else {
2869 cnt = &im6o_dbg->im6o_refrele_cnt;
2870 tr = im6o_dbg->im6o_refrele;
2871 }
2872
2873 idx = atomic_add_16_ov(cnt, 1) % IM6O_TRACE_HIST_SIZE;
2874 ctrace_record(&tr[idx]);
2875 }
2876
2877 struct ip6_moptions *
2878 ip6_allocmoptions(int how)
2879 {
2880 struct ip6_moptions *im6o;
2881
2882 im6o = (how == M_WAITOK) ?
2883 zalloc(im6o_zone) : zalloc_noblock(im6o_zone);
2884 if (im6o != NULL) {
2885 bzero(im6o, im6o_size);
2886 lck_mtx_init(&im6o->im6o_lock, ifa_mtx_grp, ifa_mtx_attr);
2887 im6o->im6o_debug |= IFD_ALLOC;
2888 if (im6o_debug != 0) {
2889 im6o->im6o_debug |= IFD_DEBUG;
2890 im6o->im6o_trace = im6o_trace;
2891 }
2892 IM6O_ADDREF(im6o);
2893 }
2894
2895 return (im6o);
2896 }
2897
2898 /*
2899 * Set IPv6 outgoing packet options based on advanced API.
2900 */
2901 int
2902 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
2903 struct ip6_pktopts *stickyopt, int uproto)
2904 {
2905 struct cmsghdr *cm = 0;
2906
2907 if (control == NULL || opt == NULL)
2908 return (EINVAL);
2909
2910 ip6_initpktopts(opt);
2911 if (stickyopt) {
2912 int error;
2913
2914 /*
2915 * If stickyopt is provided, make a local copy of the options
2916 * for this particular packet, then override them by ancillary
2917 * objects.
2918 * XXX: copypktopts() does not copy the cached route to a next
2919 * hop (if any). This is not very good in terms of efficiency,
2920 * but we can allow this since this option should be rarely
2921 * used.
2922 */
2923 if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
2924 return (error);
2925 }
2926
2927 /*
2928 * XXX: Currently, we assume all the optional information is stored
2929 * in a single mbuf.
2930 */
2931 if (control->m_next)
2932 return (EINVAL);
2933
2934 if (control->m_len < CMSG_LEN(0))
2935 return (EINVAL);
2936
2937 for (cm = M_FIRST_CMSGHDR(control); cm; cm = M_NXT_CMSGHDR(control, cm)) {
2938 int error;
2939
2940 if (cm->cmsg_len < sizeof(struct cmsghdr) || cm->cmsg_len > control->m_len)
2941 return (EINVAL);
2942 if (cm->cmsg_level != IPPROTO_IPV6)
2943 continue;
2944
2945 error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
2946 cm->cmsg_len - CMSG_LEN(0), opt, 0, 1, uproto);
2947 if (error)
2948 return (error);
2949 }
2950
2951 return (0);
2952 }
2953 /*
2954 * Set a particular packet option, as a sticky option or an ancillary data
2955 * item. "len" can be 0 only when it's a sticky option.
2956 * We have 4 cases of combination of "sticky" and "cmsg":
2957 * "sticky=0, cmsg=0": impossible
2958 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
2959 * "sticky=1, cmsg=0": RFC3542 socket option
2960 * "sticky=1, cmsg=1": RFC2292 socket option
2961 */
2962 static int
2963 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
2964 int sticky, int cmsg, int uproto)
2965 {
2966 int minmtupolicy, preftemp;
2967 int error;
2968
2969 if (!sticky && !cmsg) {
2970 #ifdef DIAGNOSTIC
2971 printf("ip6_setpktopt: impossible case\n");
2972 #endif
2973 return (EINVAL);
2974 }
2975
2976 /*
2977 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
2978 * not be specified in the context of RFC3542. Conversely,
2979 * RFC3542 types should not be specified in the context of RFC2292.
2980 */
2981 if (!cmsg) {
2982 switch (optname) {
2983 case IPV6_2292PKTINFO:
2984 case IPV6_2292HOPLIMIT:
2985 case IPV6_2292NEXTHOP:
2986 case IPV6_2292HOPOPTS:
2987 case IPV6_2292DSTOPTS:
2988 case IPV6_2292RTHDR:
2989 case IPV6_2292PKTOPTIONS:
2990 return (ENOPROTOOPT);
2991 }
2992 }
2993 if (sticky && cmsg) {
2994 switch (optname) {
2995 case IPV6_PKTINFO:
2996 case IPV6_HOPLIMIT:
2997 case IPV6_NEXTHOP:
2998 case IPV6_HOPOPTS:
2999 case IPV6_DSTOPTS:
3000 case IPV6_RTHDRDSTOPTS:
3001 case IPV6_RTHDR:
3002 case IPV6_USE_MIN_MTU:
3003 case IPV6_DONTFRAG:
3004 case IPV6_TCLASS:
3005 case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
3006 return (ENOPROTOOPT);
3007 }
3008 }
3009
3010 switch (optname) {
3011 case IPV6_2292PKTINFO:
3012 case IPV6_PKTINFO:
3013 {
3014 struct ifnet *ifp = NULL;
3015 struct in6_pktinfo *pktinfo;
3016
3017 if (len != sizeof(struct in6_pktinfo))
3018 return (EINVAL);
3019
3020 pktinfo = (struct in6_pktinfo *)buf;
3021
3022 /*
3023 * An application can clear any sticky IPV6_PKTINFO option by
3024 * doing a "regular" setsockopt with ipi6_addr being
3025 * in6addr_any and ipi6_ifindex being zero.
3026 * [RFC 3542, Section 6]
3027 */
3028 if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
3029 pktinfo->ipi6_ifindex == 0 &&
3030 IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3031 ip6_clearpktopts(opt, optname);
3032 break;
3033 }
3034
3035 if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
3036 sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
3037 return (EINVAL);
3038 }
3039
3040 /* validate the interface index if specified. */
3041 ifnet_head_lock_shared();
3042
3043 if (pktinfo->ipi6_ifindex > if_index) {
3044 ifnet_head_done();
3045 return (ENXIO);
3046 }
3047
3048 if (pktinfo->ipi6_ifindex) {
3049 ifp = ifindex2ifnet[pktinfo->ipi6_ifindex];
3050 if (ifp == NULL) {
3051 ifnet_head_done();
3052 return (ENXIO);
3053 }
3054 }
3055
3056 ifnet_head_done();
3057
3058 /*
3059 * We store the address anyway, and let in6_selectsrc()
3060 * validate the specified address. This is because ipi6_addr
3061 * may not have enough information about its scope zone, and
3062 * we may need additional information (such as outgoing
3063 * interface or the scope zone of a destination address) to
3064 * disambiguate the scope.
3065 * XXX: the delay of the validation may confuse the
3066 * application when it is used as a sticky option.
3067 */
3068 if (opt->ip6po_pktinfo == NULL) {
3069 opt->ip6po_pktinfo = _MALLOC(sizeof(*pktinfo),
3070 M_IP6OPT, M_NOWAIT);
3071 if (opt->ip6po_pktinfo == NULL)
3072 return (ENOBUFS);
3073 }
3074 bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
3075 break;
3076 }
3077
3078 case IPV6_2292HOPLIMIT:
3079 case IPV6_HOPLIMIT:
3080 {
3081 int *hlimp;
3082
3083 /*
3084 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
3085 * to simplify the ordering among hoplimit options.
3086 */
3087 if (optname == IPV6_HOPLIMIT && sticky)
3088 return (ENOPROTOOPT);
3089
3090 if (len != sizeof(int))
3091 return (EINVAL);
3092 hlimp = (int *)buf;
3093 if (*hlimp < -1 || *hlimp > 255)
3094 return (EINVAL);
3095
3096 opt->ip6po_hlim = *hlimp;
3097 break;
3098 }
3099
3100 case IPV6_TCLASS:
3101 {
3102 int tclass;
3103
3104 if (len != sizeof(int))
3105 return (EINVAL);
3106 tclass = *(int *)buf;
3107 if (tclass < -1 || tclass > 255)
3108 return (EINVAL);
3109
3110 opt->ip6po_tclass = tclass;
3111 break;
3112 }
3113
3114 case IPV6_2292NEXTHOP:
3115 case IPV6_NEXTHOP:
3116 error = suser(kauth_cred_get(), 0);
3117 if (error)
3118 return (EACCES);
3119
3120 if (len == 0) { /* just remove the option */
3121 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3122 break;
3123 }
3124
3125 /* check if cmsg_len is large enough for sa_len */
3126 if (len < sizeof(struct sockaddr) || len < *buf)
3127 return (EINVAL);
3128
3129 switch (((struct sockaddr *)buf)->sa_family) {
3130 case AF_INET6:
3131 {
3132 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
3133
3134 if (sa6->sin6_len != sizeof(struct sockaddr_in6))
3135 return (EINVAL);
3136
3137 if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
3138 IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
3139 return (EINVAL);
3140 }
3141 if ((error = sa6_embedscope(sa6, ip6_use_defzone))
3142 != 0) {
3143 return (error);
3144 }
3145 break;
3146 }
3147 case AF_LINK: /* should eventually be supported */
3148 default:
3149 return (EAFNOSUPPORT);
3150 }
3151
3152 /* turn off the previous option, then set the new option. */
3153 ip6_clearpktopts(opt, IPV6_NEXTHOP);
3154 opt->ip6po_nexthop = _MALLOC(*buf, M_IP6OPT, M_NOWAIT);
3155 if (opt->ip6po_nexthop == NULL)
3156 return (ENOBUFS);
3157 bcopy(buf, opt->ip6po_nexthop, *buf);
3158 break;
3159
3160 case IPV6_2292HOPOPTS:
3161 case IPV6_HOPOPTS:
3162 {
3163 struct ip6_hbh *hbh;
3164 int hbhlen;
3165
3166 /*
3167 * XXX: We don't allow a non-privileged user to set ANY HbH
3168 * options, since per-option restriction has too much
3169 * overhead.
3170 */
3171 error = suser(kauth_cred_get(), 0);
3172 if (error)
3173 return (EACCES);
3174
3175 if (len == 0) {
3176 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3177 break; /* just remove the option */
3178 }
3179
3180 /* message length validation */
3181 if (len < sizeof(struct ip6_hbh))
3182 return (EINVAL);
3183 hbh = (struct ip6_hbh *)buf;
3184 hbhlen = (hbh->ip6h_len + 1) << 3;
3185 if (len != hbhlen)
3186 return (EINVAL);
3187
3188 /* turn off the previous option, then set the new option. */
3189 ip6_clearpktopts(opt, IPV6_HOPOPTS);
3190 opt->ip6po_hbh = _MALLOC(hbhlen, M_IP6OPT, M_NOWAIT);
3191 if (opt->ip6po_hbh == NULL)
3192 return (ENOBUFS);
3193 bcopy(hbh, opt->ip6po_hbh, hbhlen);
3194
3195 break;
3196 }
3197
3198 case IPV6_2292DSTOPTS:
3199 case IPV6_DSTOPTS:
3200 case IPV6_RTHDRDSTOPTS:
3201 {
3202 struct ip6_dest *dest, **newdest = NULL;
3203 int destlen;
3204
3205 error = suser(kauth_cred_get(), 0);
3206 if (error)
3207 return (EACCES);
3208
3209 if (len == 0) {
3210 ip6_clearpktopts(opt, optname);
3211 break; /* just remove the option */
3212 }
3213
3214 /* message length validation */
3215 if (len < sizeof(struct ip6_dest))
3216 return (EINVAL);
3217 dest = (struct ip6_dest *)buf;
3218 destlen = (dest->ip6d_len + 1) << 3;
3219 if (len != destlen)
3220 return (EINVAL);
3221
3222 /*
3223 * Determine the position that the destination options header
3224 * should be inserted; before or after the routing header.
3225 */
3226 switch (optname) {
3227 case IPV6_2292DSTOPTS:
3228 /*
3229 * The old advacned API is ambiguous on this point.
3230 * Our approach is to determine the position based
3231 * according to the existence of a routing header.
3232 * Note, however, that this depends on the order of the
3233 * extension headers in the ancillary data; the 1st
3234 * part of the destination options header must appear
3235 * before the routing header in the ancillary data,
3236 * too.
3237 * RFC3542 solved the ambiguity by introducing
3238 * separate ancillary data or option types.
3239 */
3240 if (opt->ip6po_rthdr == NULL)
3241 newdest = &opt->ip6po_dest1;
3242 else
3243 newdest = &opt->ip6po_dest2;
3244 break;
3245 case IPV6_RTHDRDSTOPTS:
3246 newdest = &opt->ip6po_dest1;
3247 break;
3248 case IPV6_DSTOPTS:
3249 newdest = &opt->ip6po_dest2;
3250 break;
3251 }
3252
3253 /* turn off the previous option, then set the new option. */
3254 ip6_clearpktopts(opt, optname);
3255 *newdest = _MALLOC(destlen, M_IP6OPT, M_NOWAIT);
3256 if (*newdest == NULL)
3257 return (ENOBUFS);
3258 bcopy(dest, *newdest, destlen);
3259
3260 break;
3261 }
3262
3263 case IPV6_2292RTHDR:
3264 case IPV6_RTHDR:
3265 {
3266 struct ip6_rthdr *rth;
3267 int rthlen;
3268
3269 if (len == 0) {
3270 ip6_clearpktopts(opt, IPV6_RTHDR);
3271 break; /* just remove the option */
3272 }
3273
3274 /* message length validation */
3275 if (len < sizeof(struct ip6_rthdr))
3276 return (EINVAL);
3277 rth = (struct ip6_rthdr *)buf;
3278 rthlen = (rth->ip6r_len + 1) << 3;
3279 if (len != rthlen)
3280 return (EINVAL);
3281
3282 switch (rth->ip6r_type) {
3283 case IPV6_RTHDR_TYPE_0:
3284 if (rth->ip6r_len == 0) /* must contain one addr */
3285 return (EINVAL);
3286 if (rth->ip6r_len % 2) /* length must be even */
3287 return (EINVAL);
3288 if (rth->ip6r_len / 2 != rth->ip6r_segleft)
3289 return (EINVAL);
3290 break;
3291 default:
3292 return (EINVAL); /* not supported */
3293 }
3294
3295 /* turn off the previous option */
3296 ip6_clearpktopts(opt, IPV6_RTHDR);
3297 opt->ip6po_rthdr = _MALLOC(rthlen, M_IP6OPT, M_NOWAIT);
3298 if (opt->ip6po_rthdr == NULL)
3299 return (ENOBUFS);
3300 bcopy(rth, opt->ip6po_rthdr, rthlen);
3301
3302 break;
3303 }
3304
3305 case IPV6_USE_MIN_MTU:
3306 if (len != sizeof(int))
3307 return (EINVAL);
3308 minmtupolicy = *(int *)buf;
3309 if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
3310 minmtupolicy != IP6PO_MINMTU_DISABLE &&
3311 minmtupolicy != IP6PO_MINMTU_ALL) {
3312 return (EINVAL);
3313 }
3314 opt->ip6po_minmtu = minmtupolicy;
3315 break;
3316
3317 case IPV6_DONTFRAG:
3318 if (len != sizeof(int))
3319 return (EINVAL);
3320
3321 if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
3322 /*
3323 * we ignore this option for TCP sockets.
3324 * (RFC3542 leaves this case unspecified.)
3325 */
3326 opt->ip6po_flags &= ~IP6PO_DONTFRAG;
3327 } else
3328 opt->ip6po_flags |= IP6PO_DONTFRAG;
3329 break;
3330
3331 case IPV6_PREFER_TEMPADDR:
3332 if (len != sizeof(int))
3333 return (EINVAL);
3334 preftemp = *(int *)buf;
3335 if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
3336 preftemp != IP6PO_TEMPADDR_NOTPREFER &&
3337 preftemp != IP6PO_TEMPADDR_PREFER) {
3338 return (EINVAL);
3339 }
3340 opt->ip6po_prefer_tempaddr = preftemp;
3341 break;
3342
3343 default:
3344 return (ENOPROTOOPT);
3345 } /* end of switch */
3346
3347 return (0);
3348 }
3349
3350 /*
3351 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
3352 * packet to the input queue of a specified interface. Note that this
3353 * calls the output routine of the loopback "driver", but with an interface
3354 * pointer that might NOT be &loif -- easier than replicating that code here.
3355 */
3356 void
3357 ip6_mloopback(
3358 struct ifnet *ifp,
3359 struct mbuf *m,
3360 struct sockaddr_in6 *dst)
3361 {
3362 struct mbuf *copym;
3363 struct ip6_hdr *ip6;
3364
3365 copym = m_copy(m, 0, M_COPYALL);
3366 if (copym == NULL)
3367 return;
3368
3369 /*
3370 * Make sure to deep-copy IPv6 header portion in case the data
3371 * is in an mbuf cluster, so that we can safely override the IPv6
3372 * header portion later.
3373 */
3374 if ((copym->m_flags & M_EXT) != 0 ||
3375 copym->m_len < sizeof(struct ip6_hdr)) {
3376 copym = m_pullup(copym, sizeof(struct ip6_hdr));
3377 if (copym == NULL)
3378 return;
3379 }
3380
3381 #if DIAGNOSTIC
3382 if (copym->m_len < sizeof(*ip6)) {
3383 m_freem(copym);
3384 return;
3385 }
3386 #endif
3387
3388 ip6 = mtod(copym, struct ip6_hdr *);
3389 /*
3390 * clear embedded scope identifiers if necessary.
3391 * in6_clearscope will touch the addresses only when necessary.
3392 */
3393 in6_clearscope(&ip6->ip6_src);
3394 in6_clearscope(&ip6->ip6_dst);
3395
3396 #ifdef __APPLE__
3397
3398 /* Makes sure the HW checksum flags are cleaned before sending the packet */
3399
3400 if ((copym->m_pkthdr.csum_flags & CSUM_DELAY_IPV6_DATA) != 0) {
3401 in6_delayed_cksum(copym, sizeof(struct ip6_hdr));
3402 copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_IPV6_DATA;
3403 }
3404 copym->m_pkthdr.rcvif = 0;
3405 copym->m_pkthdr.csum_data = 0;
3406 copym->m_pkthdr.csum_flags = 0;
3407
3408 if (lo_ifp) {
3409 copym->m_pkthdr.rcvif = ifp;
3410 dlil_output(lo_ifp, PF_INET6, copym, 0, (struct sockaddr *)dst, 0);
3411 } else
3412 m_free(copym);
3413 #else
3414 (void)if_simloop(ifp, copym, dst->sin6_family, NULL);
3415 #endif
3416 }
3417
3418 /*
3419 * Chop IPv6 header off from the payload.
3420 */
3421 static int
3422 ip6_splithdr(m, exthdrs)
3423 struct mbuf *m;
3424 struct ip6_exthdrs *exthdrs;
3425 {
3426 struct mbuf *mh;
3427 struct ip6_hdr *ip6;
3428
3429 ip6 = mtod(m, struct ip6_hdr *);
3430 if (m->m_len > sizeof(*ip6)) {
3431 MGETHDR(mh, M_DONTWAIT, MT_HEADER); /* MAC-OK */
3432 if (mh == 0) {
3433 m_freem(m);
3434 return ENOBUFS;
3435 }
3436 M_COPY_PKTHDR(mh, m);
3437 MH_ALIGN(mh, sizeof(*ip6));
3438 m->m_flags &= ~M_PKTHDR;
3439 m->m_len -= sizeof(*ip6);
3440 m->m_data += sizeof(*ip6);
3441 mh->m_next = m;
3442 m = mh;
3443 m->m_len = sizeof(*ip6);
3444 bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
3445 }
3446 exthdrs->ip6e_ip6 = m;
3447 return 0;
3448 }
3449
3450 /*
3451 * Compute IPv6 extension header length.
3452 */
3453 int
3454 ip6_optlen(in6p)
3455 struct in6pcb *in6p;
3456 {
3457 int len;
3458
3459 if (!in6p->in6p_outputopts)
3460 return 0;
3461
3462 len = 0;
3463 #define elen(x) \
3464 (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
3465
3466 len += elen(in6p->in6p_outputopts->ip6po_hbh);
3467 if (in6p->in6p_outputopts->ip6po_rthdr)
3468 /* dest1 is valid with rthdr only */
3469 len += elen(in6p->in6p_outputopts->ip6po_dest1);
3470 len += elen(in6p->in6p_outputopts->ip6po_rthdr);
3471 len += elen(in6p->in6p_outputopts->ip6po_dest2);
3472 return len;
3473 #undef elen
3474 }