]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/in6_src.c
1eb5cd60fb98f2e23afdbd5cee60c0a4a56f8d6b
[apple/xnu.git] / bsd / netinet6 / in6_src.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */
30 /* $KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * Copyright (c) 1982, 1986, 1991, 1993
63 * The Regents of the University of California. All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 * notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 * notice, this list of conditions and the following disclaimer in the
72 * documentation and/or other materials provided with the distribution.
73 * 3. All advertising materials mentioning features or use of this software
74 * must display the following acknowledgement:
75 * This product includes software developed by the University of
76 * California, Berkeley and its contributors.
77 * 4. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
94 */
95
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/malloc.h>
100 #include <sys/mbuf.h>
101 #include <sys/protosw.h>
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/errno.h>
105 #include <sys/time.h>
106 #include <sys/proc.h>
107 #include <sys/sysctl.h>
108 #include <sys/kauth.h>
109 #include <sys/priv.h>
110 #include <kern/lock.h>
111
112 #include <net/if.h>
113 #include <net/if_types.h>
114 #include <net/route.h>
115
116 #include <netinet/in.h>
117 #include <netinet/in_var.h>
118 #include <netinet/in_systm.h>
119 #include <netinet/ip.h>
120 #include <netinet/in_pcb.h>
121 #include <netinet6/in6_var.h>
122 #include <netinet/ip6.h>
123 #include <netinet6/in6_pcb.h>
124 #include <netinet6/ip6_var.h>
125 #include <netinet6/scope6_var.h>
126 #include <netinet6/nd6.h>
127
128 #include <net/net_osdep.h>
129
130 #include "loop.h"
131
132 SYSCTL_DECL(_net_inet6_ip6);
133
134 static int ip6_select_srcif_debug = 0;
135 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug,
136 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0,
137 "log source interface selection debug info");
138
139 #define ADDR_LABEL_NOTAPP (-1)
140 struct in6_addrpolicy defaultaddrpolicy;
141
142 int ip6_prefer_tempaddr = 1;
143 #ifdef ENABLE_ADDRSEL
144 extern lck_mtx_t *addrsel_mutex;
145 #define ADDRSEL_LOCK() lck_mtx_lock(addrsel_mutex)
146 #define ADDRSEL_UNLOCK() lck_mtx_unlock(addrsel_mutex)
147 #else
148 #define ADDRSEL_LOCK()
149 #define ADDRSEL_UNLOCK()
150 #endif
151
152 static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
153 struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
154 struct ifnet **, struct rtentry **, int, int, unsigned int,
155 unsigned int);
156 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
157 struct ip6_moptions *, struct route_in6 *ro, unsigned int,
158 unsigned int, struct ifnet **);
159 static void init_policy_queue(void);
160 static int add_addrsel_policyent(const struct in6_addrpolicy *);
161 #ifdef ENABLE_ADDRSEL
162 static int delete_addrsel_policyent(const struct in6_addrpolicy *);
163 #endif
164 static int walk_addrsel_policy(int (*)(const struct in6_addrpolicy *, void *),
165 void *);
166 static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *);
167 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
168 void addrsel_policy_init(void);
169
170 /*
171 * Return an IPv6 address, which is the most appropriate for a given
172 * destination and user specified options.
173 * If necessary, this function lookups the routing table and returns
174 * an entry to the caller for later use.
175 */
176 #define REPLACE(r) do {\
177 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
178 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
179 ip6stat.ip6s_sources_rule[(r)]++; \
180 goto replace; \
181 } while(0)
182 #define NEXTSRC(r) do {\
183 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
184 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
185 ip6stat.ip6s_sources_rule[(r)]++; \
186 goto next; /* XXX: we can't use 'continue' here */ \
187 } while(0)
188 #define BREAK(r) do { \
189 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
190 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
191 ip6stat.ip6s_sources_rule[(r)]++; \
192 goto out; /* XXX: we can't use 'break' here */ \
193 } while(0)
194
195 struct in6_addr *
196 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
197 struct inpcb *inp, struct route_in6 *ro,
198 struct ifnet **ifpp, struct in6_addr *src_storage, unsigned int ifscope,
199 int *errorp)
200 {
201 struct in6_addr dst;
202 struct ifnet *ifp = NULL;
203 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
204 struct in6_pktinfo *pi = NULL;
205 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
206 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
207 u_int32_t odstzone;
208 int prefer_tempaddr;
209 struct ip6_moptions *mopts;
210 struct timeval timenow;
211 unsigned int nocell;
212 boolean_t islocal = FALSE;
213
214 getmicrotime(&timenow);
215
216 dst = dstsock->sin6_addr; /* make a copy for local operation */
217 *errorp = 0;
218 if (ifpp != NULL)
219 *ifpp = NULL;
220
221 if (inp != NULL) {
222 mopts = inp->in6p_moptions;
223 nocell = (inp->inp_flags & INP_NO_IFT_CELLULAR) ? 1 : 0;
224 } else {
225 mopts = NULL;
226 nocell = 0;
227 }
228
229 /*
230 * If the source address is explicitly specified by the caller,
231 * check if the requested source address is indeed a unicast address
232 * assigned to the node, and can be used as the packet's source
233 * address. If everything is okay, use the address as source.
234 */
235 if (opts && (pi = opts->ip6po_pktinfo) &&
236 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
237 struct sockaddr_in6 srcsock;
238 struct in6_ifaddr *ia6;
239
240 /* get the outgoing interface */
241 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, ifscope,
242 nocell, &ifp)) != 0) {
243 return (NULL);
244 }
245
246 /*
247 * determine the appropriate zone id of the source based on
248 * the zone of the destination and the outgoing interface.
249 * If the specified address is ambiguous wrt the scope zone,
250 * the interface must be specified; otherwise, ifa_ifwithaddr()
251 * will fail matching the address.
252 */
253 bzero(&srcsock, sizeof(srcsock));
254 srcsock.sin6_family = AF_INET6;
255 srcsock.sin6_len = sizeof(srcsock);
256 srcsock.sin6_addr = pi->ipi6_addr;
257 if (ifp) {
258 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
259 if (*errorp != 0) {
260 ifnet_release(ifp);
261 return (NULL);
262 }
263 }
264 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock));
265 if (ia6 == NULL) {
266 *errorp = EADDRNOTAVAIL;
267 if (ifp != NULL)
268 ifnet_release(ifp);
269 return (NULL);
270 }
271 IFA_LOCK_SPIN(&ia6->ia_ifa);
272 if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) ||
273 (nocell && (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) {
274 IFA_UNLOCK(&ia6->ia_ifa);
275 IFA_REMREF(&ia6->ia_ifa);
276 *errorp = EADDRNOTAVAIL;
277 if (ifp != NULL)
278 ifnet_release(ifp);
279 return (NULL);
280 }
281
282 *src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
283 IFA_UNLOCK(&ia6->ia_ifa);
284 IFA_REMREF(&ia6->ia_ifa);
285 if (ifpp != NULL) {
286 /* if ifp is non-NULL, refcnt held in in6_selectif() */
287 *ifpp = ifp;
288 } else if (ifp != NULL) {
289 ifnet_release(ifp);
290 }
291 return (src_storage);
292 }
293
294 /*
295 * Otherwise, if the socket has already bound the source, just use it.
296 */
297 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
298 return (&inp->in6p_laddr);
299
300 /*
301 * If the address is not specified, choose the best one based on
302 * the outgoing interface and the destination address.
303 */
304
305 /* get the outgoing interface */
306 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, ifscope, nocell,
307 &ifp)) != 0)
308 return (NULL);
309
310 #ifdef DIAGNOSTIC
311 if (ifp == NULL) /* this should not happen */
312 panic("in6_selectsrc: NULL ifp");
313 #endif
314 *errorp = in6_setscope(&dst, ifp, &odstzone);
315 if (*errorp != 0) {
316 if (ifp != NULL)
317 ifnet_release(ifp);
318 return (NULL);
319 }
320 lck_rw_lock_shared(&in6_ifaddr_rwlock);
321
322 for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
323 int new_scope = -1, new_matchlen = -1;
324 struct in6_addrpolicy *new_policy = NULL;
325 u_int32_t srczone, osrczone, dstzone;
326 struct in6_addr src;
327 struct ifnet *ifp1 = ia->ia_ifp;
328
329 IFA_LOCK(&ia->ia_ifa);
330 /*
331 * We'll never take an address that breaks the scope zone
332 * of the destination. We also skip an address if its zone
333 * does not contain the outgoing interface.
334 * XXX: we should probably use sin6_scope_id here.
335 */
336 if (in6_setscope(&dst, ifp1, &dstzone) ||
337 odstzone != dstzone)
338 goto next;
339
340 src = ia->ia_addr.sin6_addr;
341 if (in6_setscope(&src, ifp, &osrczone) ||
342 in6_setscope(&src, ifp1, &srczone) ||
343 osrczone != srczone)
344 goto next;
345
346 /* avoid unusable addresses */
347 if ((ia->ia6_flags &
348 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED)))
349 goto next;
350
351 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
352 goto next;
353
354 /* Rule 1: Prefer same address */
355 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
356 BREAK(1); /* there should be no better candidate */
357
358 if (ia_best == NULL)
359 REPLACE(0);
360
361 /* Rule 2: Prefer appropriate scope */
362 if (dst_scope < 0)
363 dst_scope = in6_addrscope(&dst);
364 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
365 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
366 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
367 REPLACE(2);
368 NEXTSRC(2);
369 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
370 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
371 NEXTSRC(2);
372 REPLACE(2);
373 }
374
375 /*
376 * Rule 3: Avoid deprecated addresses. Note that the case of
377 * !ip6_use_deprecated is already rejected above.
378 */
379 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
380 NEXTSRC(3);
381 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
382 REPLACE(3);
383
384 /* Rule 4: Prefer home addresses */
385 /*
386 * XXX: This is a TODO. We should probably merge the MIP6
387 * case above.
388 */
389
390 /* Rule 5: Prefer outgoing interface */
391 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
392 NEXTSRC(5);
393 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
394 REPLACE(5);
395
396 /*
397 * Rule 6: Prefer matching label
398 * Note that best_policy should be non-NULL here.
399 */
400 if (dst_policy == NULL)
401 dst_policy = in6_addrsel_lookup_policy(dstsock);
402 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
403 new_policy = in6_addrsel_lookup_policy(&ia->ia_addr);
404 if (dst_policy->label == best_policy->label &&
405 dst_policy->label != new_policy->label)
406 NEXTSRC(6);
407 if (dst_policy->label != best_policy->label &&
408 dst_policy->label == new_policy->label)
409 REPLACE(6);
410 }
411
412 /*
413 * Rule 7: Prefer public addresses.
414 * We allow users to reverse the logic by configuring
415 * a sysctl variable, so that privacy conscious users can
416 * always prefer temporary addresses.
417 * Don't use temporary addresses for local destinations or
418 * for multicast addresses unless we were passed in an option.
419 */
420 if (IN6_IS_ADDR_MULTICAST(&dst) ||
421 in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst) >=
422 in6_mask2len(&ia_best->ia_prefixmask.sin6_addr, NULL))
423 islocal = TRUE;
424 if (opts == NULL ||
425 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
426 prefer_tempaddr = islocal ? 0 : ip6_prefer_tempaddr;
427 } else if (opts->ip6po_prefer_tempaddr ==
428 IP6PO_TEMPADDR_NOTPREFER) {
429 prefer_tempaddr = 0;
430 } else
431 prefer_tempaddr = 1;
432 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
433 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
434 if (prefer_tempaddr)
435 REPLACE(7);
436 else
437 NEXTSRC(7);
438 }
439 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
440 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
441 if (prefer_tempaddr)
442 NEXTSRC(7);
443 else
444 REPLACE(7);
445 }
446
447 /*
448 * Rule 8: prefer addresses on alive interfaces.
449 * This is a KAME specific rule.
450 */
451 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
452 !(ia->ia_ifp->if_flags & IFF_UP))
453 NEXTSRC(8);
454 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
455 (ia->ia_ifp->if_flags & IFF_UP))
456 REPLACE(8);
457
458 /*
459 * Rule 14: Use longest matching prefix.
460 * Note: in the address selection draft, this rule is
461 * documented as "Rule 8". However, since it is also
462 * documented that this rule can be overridden, we assign
463 * a large number so that it is easy to assign smaller numbers
464 * to more preferred rules.
465 */
466 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
467 if (best_matchlen < new_matchlen)
468 REPLACE(14);
469 if (new_matchlen < best_matchlen)
470 NEXTSRC(14);
471
472 /* Rule 15 is reserved. */
473
474 /*
475 * Last resort: just keep the current candidate.
476 * Or, do we need more rules?
477 */
478 IFA_UNLOCK(&ia->ia_ifa);
479 continue;
480
481 replace:
482 best_scope = (new_scope >= 0 ? new_scope :
483 in6_addrscope(&ia->ia_addr.sin6_addr));
484 best_policy = (new_policy ? new_policy :
485 in6_addrsel_lookup_policy(&ia->ia_addr));
486 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
487 in6_matchlen(&ia->ia_addr.sin6_addr, &dst));
488 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
489 IFA_UNLOCK(&ia->ia_ifa);
490 if (ia_best != NULL)
491 IFA_REMREF(&ia_best->ia_ifa);
492 ia_best = ia;
493 continue;
494
495 next:
496 IFA_UNLOCK(&ia->ia_ifa);
497 continue;
498
499 out:
500 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
501 IFA_UNLOCK(&ia->ia_ifa);
502 if (ia_best != NULL)
503 IFA_REMREF(&ia_best->ia_ifa);
504 ia_best = ia;
505 break;
506 }
507
508 lck_rw_done(&in6_ifaddr_rwlock);
509
510 if (nocell && ia_best != NULL &&
511 (ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR)) {
512 IFA_REMREF(&ia_best->ia_ifa);
513 ia_best = NULL;
514 }
515
516 if ( (ia = ia_best) == NULL) {
517 *errorp = EADDRNOTAVAIL;
518 if (ifp != NULL)
519 ifnet_release(ifp);
520 return (NULL);
521 }
522
523 IFA_LOCK_SPIN(&ia->ia_ifa);
524 *src_storage = satosin6(&ia->ia_addr)->sin6_addr;
525 IFA_UNLOCK(&ia->ia_ifa);
526 IFA_REMREF(&ia->ia_ifa);
527 if (ifpp != NULL) {
528 /* if ifp is non-NULL, refcnt held in in6_selectif() */
529 *ifpp = ifp;
530 } else if (ifp != NULL) {
531 ifnet_release(ifp);
532 }
533 return (src_storage);
534 }
535
536 /*
537 * Given a source IPv6 address (and route, if available), determine the best
538 * interface to send the packet from. Checking for (and updating) the
539 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
540 * without any locks, based on the assumption that in the event this is
541 * called from ip6_output(), the output operation is single-threaded per-pcb,
542 * i.e. for any given pcb there can only be one thread performing output at
543 * the IPv6 layer.
544 *
545 * This routine is analogous to in_selectsrcif() for IPv4.
546 *
547 * clone - meaningful only for bsdi and freebsd
548 */
549 static int
550 selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
551 struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
552 struct ifnet **retifp, struct rtentry **retrt, int clone,
553 int norouteok, unsigned int ifscope, unsigned int nocell)
554 {
555 int error = 0;
556 struct ifnet *ifp = NULL;
557 struct route_in6 *route = NULL;
558 struct sockaddr_in6 *sin6_next;
559 struct in6_pktinfo *pi = NULL;
560 struct in6_addr *dst = &dstsock->sin6_addr;
561 struct ifaddr *ifa = NULL;
562 char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
563 boolean_t select_srcif;
564
565 #if 0
566 char ip6buf[INET6_ADDRSTRLEN];
567
568 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
569 dstsock->sin6_addr.s6_addr32[1] == 0 &&
570 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
571 printf("in6_selectroute: strange destination %s\n",
572 ip6_sprintf(ip6buf, &dstsock->sin6_addr));
573 } else {
574 printf("in6_selectroute: destination = %s%%%d\n",
575 ip6_sprintf(ip6buf, &dstsock->sin6_addr),
576 dstsock->sin6_scope_id); /* for debug */
577 }
578 #endif
579
580 if (retifp != NULL)
581 *retifp = NULL;
582
583 if (retrt != NULL)
584 *retrt = NULL;
585
586 if (ip6_select_srcif_debug) {
587 struct in6_addr src;
588 src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any;
589 (void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src));
590 (void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst));
591 }
592
593 /*
594 * If the destination address is UNSPECIFIED addr, bail out.
595 */
596 if (IN6_IS_ADDR_UNSPECIFIED(dst)) {
597 error = EHOSTUNREACH;
598 goto done;
599 }
600
601 /*
602 * Perform source interface selection only if Scoped Routing
603 * is enabled and a source address that isn't unspecified.
604 */
605 select_srcif = (ip6_doscopedroute && srcsock != NULL &&
606 !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr));
607
608 /*
609 * If Scoped Routing is disabled, ignore the given ifscope.
610 * Otherwise even if source selection won't be performed,
611 * we still obey IPV6_BOUND_IF.
612 */
613 if (!ip6_doscopedroute && ifscope != IFSCOPE_NONE)
614 ifscope = IFSCOPE_NONE;
615
616 /* If the caller specified the outgoing interface explicitly, use it */
617 if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL &&
618 pi->ipi6_ifindex != 0) {
619 /*
620 * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF.
621 */
622 ifscope = pi->ipi6_ifindex;
623 ifnet_head_lock_shared();
624 /* ifp may be NULL if detached or out of range */
625 ifp = (ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL;
626 ifnet_head_done();
627 if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) {
628 /*
629 * We do not have to check or get the route for
630 * multicast. If the caller didn't ask/care for
631 * the route and we have no interface to use,
632 * it's an error.
633 */
634 if (ifp == NULL)
635 error = EHOSTUNREACH;
636 goto done;
637 } else {
638 goto getsrcif;
639 }
640 }
641
642 /*
643 * If the destination address is a multicast address and the outgoing
644 * interface for the address is specified by the caller, use it.
645 */
646 if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
647 IM6O_LOCK(mopts);
648 if ((ifp = mopts->im6o_multicast_ifp) != NULL) {
649 IM6O_UNLOCK(mopts);
650 goto done; /* we do not need a route for multicast. */
651 }
652 IM6O_UNLOCK(mopts);
653 }
654
655 getsrcif:
656 /*
657 * If the outgoing interface was not set via IPV6_BOUND_IF or
658 * IPV6_PKTINFO, use the scope ID in the destination address.
659 */
660 if (ip6_doscopedroute && ifscope == IFSCOPE_NONE)
661 ifscope = dstsock->sin6_scope_id;
662
663 /*
664 * Perform source interface selection; the source IPv6 address
665 * must belong to one of the addresses of the interface used
666 * by the route. For performance reasons, do this only if
667 * there is no route, or if the routing table has changed,
668 * or if we haven't done source interface selection on this
669 * route (for this PCB instance) before.
670 */
671 if (!select_srcif || (ro != NULL && ro->ro_rt != NULL &&
672 (ro->ro_rt->rt_flags & RTF_UP) &&
673 ro->ro_rt->generation_id == route_generation &&
674 (ro->ro_flags & ROF_SRCIF_SELECTED))) {
675 if (ro != NULL && ro->ro_rt != NULL) {
676 ifa = ro->ro_rt->rt_ifa;
677 IFA_ADDREF(ifa);
678 }
679 goto getroute;
680 }
681
682 /*
683 * Given the source IPv6 address, find a suitable source interface
684 * to use for transmission; if a scope ID has been specified,
685 * optimize the search by looking at the addresses only for that
686 * interface. This is still suboptimal, however, as we need to
687 * traverse the per-interface list.
688 */
689 if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) {
690 unsigned int scope = ifscope;
691 struct ifnet *rt_ifp;
692
693 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
694
695 /*
696 * If no scope is specified and the route is stale (pointing
697 * to a defunct interface) use the current primary interface;
698 * this happens when switching between interfaces configured
699 * with the same IPv6 address. Otherwise pick up the scope
700 * information from the route; the ULP may have looked up a
701 * correct route and we just need to verify it here and mark
702 * it with the ROF_SRCIF_SELECTED flag below.
703 */
704 if (scope == IFSCOPE_NONE) {
705 scope = rt_ifp->if_index;
706 if (scope != get_primary_ifscope(AF_INET6) &&
707 ro->ro_rt->generation_id != route_generation)
708 scope = get_primary_ifscope(AF_INET6);
709 }
710
711 ifa = (struct ifaddr *)
712 ifa_foraddr6_scoped(&srcsock->sin6_addr, scope);
713
714 if (ip6_select_srcif_debug && ifa != NULL) {
715 if (ro->ro_rt != NULL) {
716 printf("%s->%s ifscope %d->%d ifa_if %s "
717 "ro_if %s\n", s_src, s_dst, ifscope,
718 scope, if_name(ifa->ifa_ifp),
719 if_name(rt_ifp));
720 } else {
721 printf("%s->%s ifscope %d->%d ifa_if %s\n",
722 s_src, s_dst, ifscope, scope,
723 if_name(ifa->ifa_ifp));
724 }
725 }
726 }
727
728 /*
729 * Slow path; search for an interface having the corresponding source
730 * IPv6 address if the scope was not specified by the caller, and:
731 *
732 * 1) There currently isn't any route, or,
733 * 2) The interface used by the route does not own that source
734 * IPv6 address; in this case, the route will get blown away
735 * and we'll do a more specific scoped search using the newly
736 * found interface.
737 */
738 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
739 ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr);
740
741 if (ip6_select_srcif_debug && ifa != NULL) {
742 printf("%s->%s ifscope %d ifa_if %s\n",
743 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
744 }
745
746 }
747
748 getroute:
749 if (ifa != NULL)
750 ifscope = ifa->ifa_ifp->if_index;
751
752 /*
753 * If the next hop address for the packet is specified by the caller,
754 * use it as the gateway.
755 */
756 if (opts != NULL && opts->ip6po_nexthop != NULL) {
757 struct route_in6 *ron;
758
759 sin6_next = satosin6(opts->ip6po_nexthop);
760
761 /* at this moment, we only support AF_INET6 next hops */
762 if (sin6_next->sin6_family != AF_INET6) {
763 error = EAFNOSUPPORT; /* or should we proceed? */
764 goto done;
765 }
766
767 /*
768 * If the next hop is an IPv6 address, then the node identified
769 * by that address must be a neighbor of the sending host.
770 */
771 ron = &opts->ip6po_nextroute;
772 if (ron->ro_rt != NULL)
773 RT_LOCK(ron->ro_rt);
774 if ((ron->ro_rt != NULL &&
775 ((ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
776 (RTF_UP | RTF_LLINFO) ||
777 ron->ro_rt->generation_id != route_generation ||
778 (select_srcif && (ifa == NULL ||
779 ifa->ifa_ifp != ron->ro_rt->rt_ifp)))) ||
780 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
781 &sin6_next->sin6_addr)) {
782 if (ron->ro_rt != NULL) {
783 RT_UNLOCK(ron->ro_rt);
784 rtfree(ron->ro_rt);
785 ron->ro_rt = NULL;
786 }
787 *satosin6(&ron->ro_dst) = *sin6_next;
788 }
789 if (ron->ro_rt == NULL) {
790 rtalloc_scoped((struct route *)ron, ifscope);
791 if (ron->ro_rt != NULL)
792 RT_LOCK(ron->ro_rt);
793 if (ron->ro_rt == NULL ||
794 !(ron->ro_rt->rt_flags & RTF_LLINFO) ||
795 !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))->
796 sin6_addr, &sin6_next->sin6_addr)) {
797 if (ron->ro_rt != NULL) {
798 RT_UNLOCK(ron->ro_rt);
799 rtfree(ron->ro_rt);
800 ron->ro_rt = NULL;
801 }
802 error = EHOSTUNREACH;
803 goto done;
804 }
805 }
806 route = ron;
807 ifp = ron->ro_rt->rt_ifp;
808
809 /*
810 * When cloning is required, try to allocate a route to the
811 * destination so that the caller can store path MTU
812 * information.
813 */
814 if (!clone) {
815 if (select_srcif) {
816 /* Keep the route locked */
817 goto validateroute;
818 }
819 RT_UNLOCK(ron->ro_rt);
820 goto done;
821 }
822 RT_UNLOCK(ron->ro_rt);
823 }
824
825 /*
826 * Use a cached route if it exists and is valid, else try to allocate
827 * a new one. Note that we should check the address family of the
828 * cached destination, in case of sharing the cache with IPv4.
829 */
830 if (ro == NULL)
831 goto done;
832 if (ro->ro_rt != NULL)
833 RT_LOCK(ro->ro_rt);
834 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
835 satosin6(&ro->ro_dst)->sin6_family != AF_INET6 ||
836 ro->ro_rt->generation_id != route_generation ||
837 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) ||
838 (select_srcif && (ifa == NULL ||
839 ifa->ifa_ifp != ro->ro_rt->rt_ifp)))) {
840 RT_UNLOCK(ro->ro_rt);
841 rtfree(ro->ro_rt);
842 ro->ro_rt = NULL;
843 }
844 if (ro->ro_rt == NULL) {
845 struct sockaddr_in6 *sa6;
846
847 if (ro->ro_rt != NULL)
848 RT_UNLOCK(ro->ro_rt);
849 /* No route yet, so try to acquire one */
850 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
851 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
852 sa6->sin6_family = AF_INET6;
853 sa6->sin6_len = sizeof(struct sockaddr_in6);
854 sa6->sin6_addr = *dst;
855 if (IN6_IS_ADDR_MULTICAST(dst)) {
856 ro->ro_rt = rtalloc1_scoped(
857 &((struct route *)ro)->ro_dst, 0, 0, ifscope);
858 } else {
859 rtalloc_scoped((struct route *)ro, ifscope);
860 }
861 if (ro->ro_rt != NULL)
862 RT_LOCK(ro->ro_rt);
863 }
864
865 /*
866 * Do not care about the result if we have the nexthop
867 * explicitly specified (in case we're asked to clone.)
868 */
869 if (opts != NULL && opts->ip6po_nexthop != NULL) {
870 if (ro->ro_rt != NULL)
871 RT_UNLOCK(ro->ro_rt);
872 goto done;
873 }
874
875 if (ro->ro_rt != NULL) {
876 RT_LOCK_ASSERT_HELD(ro->ro_rt);
877 ifp = ro->ro_rt->rt_ifp;
878 } else {
879 error = EHOSTUNREACH;
880 }
881 route = ro;
882
883 validateroute:
884 if (select_srcif) {
885 boolean_t has_route = (route != NULL && route->ro_rt != NULL);
886
887 if (has_route)
888 RT_LOCK_ASSERT_HELD(route->ro_rt);
889 /*
890 * If there is a non-loopback route with the wrong interface,
891 * or if there is no interface configured with such an address,
892 * blow it away. Except for local/loopback, we look for one
893 * with a matching interface scope/index.
894 */
895 if (has_route && (ifa == NULL ||
896 (ifa->ifa_ifp != ifp && ifp != lo_ifp) ||
897 !(route->ro_rt->rt_flags & RTF_UP))) {
898 if (ip6_select_srcif_debug) {
899 if (ifa != NULL) {
900 printf("%s->%s ifscope %d ro_if %s "
901 "!= ifa_if %s (cached route "
902 "cleared)\n", s_src, s_dst,
903 ifscope, if_name(ifp),
904 if_name(ifa->ifa_ifp));
905 } else {
906 printf("%s->%s ifscope %d ro_if %s "
907 "(no ifa_if found)\n", s_src,
908 s_dst, ifscope, if_name(ifp));
909 }
910 }
911 RT_UNLOCK(route->ro_rt);
912 rtfree(route->ro_rt);
913 route->ro_rt = NULL;
914 route->ro_flags &= ~ROF_SRCIF_SELECTED;
915 error = EHOSTUNREACH;
916 /* Undo the settings done above */
917 route = NULL;
918 ifp = NULL;
919 } else if (has_route) {
920 route->ro_flags |= ROF_SRCIF_SELECTED;
921 route->ro_rt->generation_id = route_generation;
922 RT_UNLOCK(route->ro_rt);
923 }
924 } else {
925 if (ro->ro_rt != NULL)
926 RT_UNLOCK(ro->ro_rt);
927 if (ifp != NULL && opts != NULL &&
928 opts->ip6po_pktinfo != NULL &&
929 opts->ip6po_pktinfo->ipi6_ifindex != 0) {
930 /*
931 * Check if the outgoing interface conflicts with the
932 * interface specified by ipi6_ifindex (if specified).
933 * Note that loopback interface is always okay.
934 * (this may happen when we are sending a packet to
935 * one of our own addresses.)
936 */
937 if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index !=
938 opts->ip6po_pktinfo->ipi6_ifindex) {
939 error = EHOSTUNREACH;
940 goto done;
941 }
942 }
943 }
944
945 done:
946 if (nocell && error == 0) {
947 if ((ifp != NULL && ifp->if_type == IFT_CELLULAR) ||
948 (route != NULL && route->ro_rt != NULL &&
949 route->ro_rt->rt_ifp->if_type == IFT_CELLULAR)) {
950 if (route != NULL && route->ro_rt != NULL) {
951 rtfree(route->ro_rt);
952 route->ro_rt = NULL;
953 route->ro_flags &= ~ROF_SRCIF_SELECTED;
954 route = NULL;
955 }
956 ifp = NULL;
957 error = EHOSTUNREACH;
958 }
959 }
960
961 if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) {
962 /*
963 * This can happen if the caller did not pass a cached route
964 * nor any other hints. We treat this case an error.
965 */
966 error = EHOSTUNREACH;
967 }
968 if (error == EHOSTUNREACH)
969 ip6stat.ip6s_noroute++;
970
971 if (error == 0) {
972 if (retifp != NULL) {
973 if (ifp != NULL)
974 ifnet_reference(ifp); /* for caller */
975 *retifp = ifp;
976 }
977 if (retrt != NULL && route != NULL)
978 *retrt = route->ro_rt; /* ro_rt may be NULL */
979 } else if (select_srcif && ip6_select_srcif_debug) {
980 printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
981 s_src, s_dst, ifscope,
982 (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE",
983 (ifp != NULL) ? if_name(ifp) : "NONE", error);
984 }
985
986 if (ifa != NULL)
987 IFA_REMREF(ifa);
988
989 return (error);
990 }
991
992 static int
993 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
994 struct ip6_moptions *mopts, struct route_in6 *ro, unsigned int ifscope,
995 unsigned int nocell, struct ifnet **retifp)
996 {
997 int error;
998 struct route_in6 sro;
999 struct rtentry *rt = NULL;
1000
1001 if (ro == NULL) {
1002 bzero(&sro, sizeof(sro));
1003 ro = &sro;
1004 }
1005
1006 if ((error = selectroute(NULL, dstsock, opts, mopts, ro, retifp,
1007 &rt, 0, 1, ifscope, nocell)) != 0) {
1008 if (ro == &sro && rt && rt == sro.ro_rt)
1009 rtfree(rt);
1010 return (error);
1011 }
1012
1013 /*
1014 * do not use a rejected or black hole route.
1015 * XXX: this check should be done in the L2 output routine.
1016 * However, if we skipped this check here, we'd see the following
1017 * scenario:
1018 * - install a rejected route for a scoped address prefix
1019 * (like fe80::/10)
1020 * - send a packet to a destination that matches the scoped prefix,
1021 * with ambiguity about the scope zone.
1022 * - pick the outgoing interface from the route, and disambiguate the
1023 * scope zone with the interface.
1024 * - ip6_output() would try to get another route with the "new"
1025 * destination, which may be valid.
1026 * - we'd see no error on output.
1027 * Although this may not be very harmful, it should still be confusing.
1028 * We thus reject the case here.
1029 */
1030 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1031 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
1032
1033 if (ro == &sro && rt && rt == sro.ro_rt)
1034 rtfree(rt);
1035 return (flags);
1036 }
1037
1038 /*
1039 * Adjust the "outgoing" interface. If we're going to loop the packet
1040 * back to ourselves, the ifp would be the loopback interface.
1041 * However, we'd rather know the interface associated to the
1042 * destination address (which should probably be one of our own
1043 * addresses.)
1044 */
1045 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) {
1046 if (*retifp != NULL)
1047 ifnet_release(*retifp);
1048 *retifp = rt->rt_ifa->ifa_ifp;
1049 ifnet_reference(*retifp);
1050 }
1051
1052 if (ro == &sro && rt && rt == sro.ro_rt)
1053 rtfree(rt);
1054 return (0);
1055 }
1056
1057 /*
1058 * clone - meaningful only for bsdi and freebsd
1059 */
1060 int
1061 in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
1062 struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
1063 struct ifnet **retifp, struct rtentry **retrt, int clone,
1064 unsigned int ifscope, unsigned int nocell)
1065 {
1066
1067 return (selectroute(srcsock, dstsock, opts, mopts, ro, retifp,
1068 retrt, clone, 0, ifscope, nocell));
1069 }
1070
1071 /*
1072 * Default hop limit selection. The precedence is as follows:
1073 * 1. Hoplimit value specified via ioctl.
1074 * 2. (If the outgoing interface is detected) the current
1075 * hop limit of the interface specified by router advertisement.
1076 * 3. The system default hoplimit.
1077 */
1078 int
1079 in6_selecthlim(
1080 struct in6pcb *in6p,
1081 struct ifnet *ifp)
1082 {
1083 if (in6p && in6p->in6p_hops >= 0) {
1084 return(in6p->in6p_hops);
1085 } else {
1086 lck_rw_lock_shared(nd_if_rwlock);
1087 if (ifp && ifp->if_index < nd_ifinfo_indexlim) {
1088 u_int8_t chlim = nd_ifinfo[ifp->if_index].chlim;
1089 lck_rw_done(nd_if_rwlock);
1090 return (chlim);
1091 } else {
1092 lck_rw_done(nd_if_rwlock);
1093 return(ip6_defhlim);
1094 }
1095 }
1096 }
1097
1098 /*
1099 * XXX: this is borrowed from in6_pcbbind(). If possible, we should
1100 * share this function by all *bsd*...
1101 */
1102 int
1103 in6_pcbsetport(
1104 __unused struct in6_addr *laddr,
1105 struct inpcb *inp,
1106 struct proc *p,
1107 int locked)
1108 {
1109 struct socket *so = inp->inp_socket;
1110 u_int16_t lport = 0, first, last, *lastport;
1111 int count, error = 0, wild = 0;
1112 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1113 kauth_cred_t cred;
1114 if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
1115 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1116 socket_unlock(inp->inp_socket, 0);
1117 lck_rw_lock_exclusive(pcbinfo->mtx);
1118 socket_lock(inp->inp_socket, 0);
1119 }
1120 }
1121
1122 /* XXX: this is redundant when called from in6_pcbbind */
1123 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
1124 wild = INPLOOKUP_WILDCARD;
1125
1126 inp->inp_flags |= INP_ANONPORT;
1127
1128 if (inp->inp_flags & INP_HIGHPORT) {
1129 first = ipport_hifirstauto; /* sysctl */
1130 last = ipport_hilastauto;
1131 lastport = &pcbinfo->lasthi;
1132 } else if (inp->inp_flags & INP_LOWPORT) {
1133 cred = kauth_cred_proc_ref(p);
1134 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
1135 kauth_cred_unref(&cred);
1136 if (error != 0) {
1137 if (!locked)
1138 lck_rw_done(pcbinfo->mtx);
1139 return error;
1140 }
1141 first = ipport_lowfirstauto; /* 1023 */
1142 last = ipport_lowlastauto; /* 600 */
1143 lastport = &pcbinfo->lastlow;
1144 } else {
1145 first = ipport_firstauto; /* sysctl */
1146 last = ipport_lastauto;
1147 lastport = &pcbinfo->lastport;
1148 }
1149 /*
1150 * Simple check to ensure all ports are not used up causing
1151 * a deadlock here.
1152 *
1153 * We split the two cases (up and down) so that the direction
1154 * is not being tested on each round of the loop.
1155 */
1156 if (first > last) {
1157 /*
1158 * counting down
1159 */
1160 count = first - last;
1161
1162 do {
1163 if (count-- < 0) { /* completely used? */
1164 /*
1165 * Undo any address bind that may have
1166 * occurred above.
1167 */
1168 inp->in6p_laddr = in6addr_any;
1169 inp->in6p_last_outif = 0;
1170 if (!locked)
1171 lck_rw_done(pcbinfo->mtx);
1172 return (EAGAIN);
1173 }
1174 --*lastport;
1175 if (*lastport > first || *lastport < last)
1176 *lastport = first;
1177 lport = htons(*lastport);
1178 } while (in6_pcblookup_local(pcbinfo,
1179 &inp->in6p_laddr, lport, wild));
1180 } else {
1181 /*
1182 * counting up
1183 */
1184 count = last - first;
1185
1186 do {
1187 if (count-- < 0) { /* completely used? */
1188 /*
1189 * Undo any address bind that may have
1190 * occurred above.
1191 */
1192 inp->in6p_laddr = in6addr_any;
1193 inp->in6p_last_outif = 0;
1194 if (!locked)
1195 lck_rw_done(pcbinfo->mtx);
1196 return (EAGAIN);
1197 }
1198 ++*lastport;
1199 if (*lastport < first || *lastport > last)
1200 *lastport = first;
1201 lport = htons(*lastport);
1202 } while (in6_pcblookup_local(pcbinfo,
1203 &inp->in6p_laddr, lport, wild));
1204 }
1205
1206 inp->inp_lport = lport;
1207 if (in_pcbinshash(inp, 1) != 0) {
1208 inp->in6p_laddr = in6addr_any;
1209 inp->inp_lport = 0;
1210 inp->in6p_last_outif = 0;
1211 if (!locked)
1212 lck_rw_done(pcbinfo->mtx);
1213 return (EAGAIN);
1214 }
1215
1216 if (!locked)
1217 lck_rw_done(pcbinfo->mtx);
1218 return(0);
1219 }
1220
1221 /*
1222 * * The followings are implementation of the policy table using a
1223 * * simple tail queue.
1224 * * XXX such details should be hidden.
1225 * * XXX implementation using binary tree should be more efficient.
1226 * */
1227 struct addrsel_policyent {
1228 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1229 struct in6_addrpolicy ape_policy;
1230 };
1231
1232 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1233
1234 struct addrsel_policyhead addrsel_policytab;
1235
1236 static void
1237 init_policy_queue(void)
1238 {
1239
1240 TAILQ_INIT(&addrsel_policytab);
1241 }
1242
1243 void
1244 addrsel_policy_init(void)
1245 {
1246 /*
1247 * Default address selection policy based on RFC 3484 and
1248 * draft-arifumi-6man-rfc3484-revise-03.
1249 */
1250 static const struct in6_addrpolicy defaddrsel[] = {
1251 /* localhost */
1252 { .addr = { .sin6_family = AF_INET6,
1253 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
1254 .sin6_len = sizeof(struct sockaddr_in6) },
1255 .addrmask = { .sin6_family = AF_INET6,
1256 .sin6_addr = IN6MASK128,
1257 .sin6_len = sizeof(struct sockaddr_in6) },
1258 .preced = 60,
1259 .label = 0 },
1260 /* ULA */
1261 { .addr = { .sin6_family = AF_INET6,
1262 .sin6_addr = {{{ 0xfc }}},
1263 .sin6_len = sizeof(struct sockaddr_in6) },
1264 .addrmask = { .sin6_family = AF_INET6,
1265 .sin6_addr = IN6MASK7,
1266 .sin6_len = sizeof(struct sockaddr_in6) },
1267 .preced = 50,
1268 .label = 1 },
1269 /* any IPv6 src */
1270 { .addr = { .sin6_family = AF_INET6,
1271 .sin6_addr = IN6ADDR_ANY_INIT,
1272 .sin6_len = sizeof(struct sockaddr_in6) },
1273 .addrmask = { .sin6_family = AF_INET6,
1274 .sin6_addr = IN6MASK0,
1275 .sin6_len = sizeof(struct sockaddr_in6) },
1276 .preced = 40,
1277 .label = 2 },
1278 /* any IPv4 src */
1279 { .addr = { .sin6_family = AF_INET6,
1280 .sin6_addr = IN6ADDR_V4MAPPED_INIT,
1281 .sin6_len = sizeof(struct sockaddr_in6) },
1282 .addrmask = { .sin6_family = AF_INET6,
1283 .sin6_addr = IN6MASK96,
1284 .sin6_len = sizeof(struct sockaddr_in6) },
1285 .preced = 30,
1286 .label = 3 },
1287 /* 6to4 */
1288 { .addr = { .sin6_family = AF_INET6,
1289 .sin6_addr = {{{ 0x20, 0x02 }}},
1290 .sin6_len = sizeof(struct sockaddr_in6) },
1291 .addrmask = { .sin6_family = AF_INET6,
1292 .sin6_addr = IN6MASK16,
1293 .sin6_len = sizeof(struct sockaddr_in6) },
1294 .preced = 20,
1295 .label = 4 },
1296 /* Teredo */
1297 { .addr = { .sin6_family = AF_INET6,
1298 .sin6_addr = {{{ 0x20, 0x01 }}},
1299 .sin6_len = sizeof(struct sockaddr_in6) },
1300 .addrmask = { .sin6_family = AF_INET6,
1301 .sin6_addr = IN6MASK32,
1302 .sin6_len = sizeof(struct sockaddr_in6) },
1303 .preced = 10,
1304 .label = 5 },
1305 /* v4 compat addresses */
1306 { .addr = { .sin6_family = AF_INET6,
1307 .sin6_addr = IN6ADDR_ANY_INIT,
1308 .sin6_len = sizeof(struct sockaddr_in6) },
1309 .addrmask = { .sin6_family = AF_INET6,
1310 .sin6_addr = IN6MASK96,
1311 .sin6_len = sizeof(struct sockaddr_in6) },
1312 .preced = 1,
1313 .label = 10 },
1314 /* site-local (deprecated) */
1315 { .addr = { .sin6_family = AF_INET6,
1316 .sin6_addr = {{{ 0xfe, 0xc0 }}},
1317 .sin6_len = sizeof(struct sockaddr_in6) },
1318 .addrmask = { .sin6_family = AF_INET6,
1319 .sin6_addr = IN6MASK16,
1320 .sin6_len = sizeof(struct sockaddr_in6) },
1321 .preced = 1,
1322 .label = 11 },
1323 /* 6bone (deprecated) */
1324 { .addr = { .sin6_family = AF_INET6,
1325 .sin6_addr = {{{ 0x3f, 0xfe }}},
1326 .sin6_len = sizeof(struct sockaddr_in6) },
1327 .addrmask = { .sin6_family = AF_INET6,
1328 .sin6_addr = IN6MASK16,
1329 .sin6_len = sizeof(struct sockaddr_in6) },
1330 .preced = 1,
1331 .label = 12 },
1332 };
1333 int i;
1334
1335 init_policy_queue();
1336
1337 /* initialize the "last resort" policy */
1338 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy));
1339 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
1340
1341 for (i = 0; i < sizeof(defaddrsel) / sizeof(defaddrsel[0]); i++)
1342 add_addrsel_policyent(&defaddrsel[i]);
1343
1344 }
1345
1346 struct in6_addrpolicy *
1347 in6_addrsel_lookup_policy(struct sockaddr_in6 *key)
1348 {
1349 struct in6_addrpolicy *match = NULL;
1350
1351 ADDRSEL_LOCK();
1352 match = match_addrsel_policy(key);
1353
1354 if (match == NULL)
1355 match = &defaultaddrpolicy;
1356 else
1357 match->use++;
1358 ADDRSEL_UNLOCK();
1359
1360 return (match);
1361 }
1362
1363 static struct in6_addrpolicy *
1364 match_addrsel_policy(struct sockaddr_in6 *key)
1365 {
1366 struct addrsel_policyent *pent;
1367 struct in6_addrpolicy *bestpol = NULL, *pol;
1368 int matchlen, bestmatchlen = -1;
1369 u_char *mp, *ep, *k, *p, m;
1370
1371 TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) {
1372 matchlen = 0;
1373
1374 pol = &pent->ape_policy;
1375 mp = (u_char *)&pol->addrmask.sin6_addr;
1376 ep = mp + 16; /* XXX: scope field? */
1377 k = (u_char *)&key->sin6_addr;
1378 p = (u_char *)&pol->addr.sin6_addr;
1379 for (; mp < ep && *mp; mp++, k++, p++) {
1380 m = *mp;
1381 if ((*k & m) != *p)
1382 goto next; /* not match */
1383 if (m == 0xff) /* short cut for a typical case */
1384 matchlen += 8;
1385 else {
1386 while (m >= 0x80) {
1387 matchlen++;
1388 m <<= 1;
1389 }
1390 }
1391 }
1392
1393 /* matched. check if this is better than the current best. */
1394 if (bestpol == NULL ||
1395 matchlen > bestmatchlen) {
1396 bestpol = pol;
1397 bestmatchlen = matchlen;
1398 }
1399
1400 next:
1401 continue;
1402 }
1403
1404 return (bestpol);
1405 }
1406
1407 static int
1408 add_addrsel_policyent(const struct in6_addrpolicy *newpolicy)
1409 {
1410 struct addrsel_policyent *new, *pol;
1411
1412 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR,
1413 M_WAITOK);
1414
1415 ADDRSEL_LOCK();
1416
1417 /* duplication check */
1418 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1419 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1420 &pol->ape_policy.addr.sin6_addr) &&
1421 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1422 &pol->ape_policy.addrmask.sin6_addr)) {
1423 ADDRSEL_UNLOCK();
1424 FREE(new, M_IFADDR);
1425 return (EEXIST); /* or override it? */
1426 }
1427 }
1428
1429 bzero(new, sizeof(*new));
1430
1431 /* XXX: should validate entry */
1432 new->ape_policy = *newpolicy;
1433
1434 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
1435 ADDRSEL_UNLOCK();
1436
1437 return (0);
1438 }
1439 #ifdef ENABLE_ADDRSEL
1440 static int
1441 delete_addrsel_policyent(const struct in6_addrpolicy *key)
1442 {
1443 struct addrsel_policyent *pol;
1444
1445
1446 ADDRSEL_LOCK();
1447
1448 /* search for the entry in the table */
1449 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1450 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1451 &pol->ape_policy.addr.sin6_addr) &&
1452 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1453 &pol->ape_policy.addrmask.sin6_addr)) {
1454 break;
1455 }
1456 }
1457 if (pol == NULL) {
1458 ADDRSEL_UNLOCK();
1459 return (ESRCH);
1460 }
1461
1462 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1463 FREE(pol, M_IFADDR);
1464 pol = NULL;
1465 ADDRSEL_UNLOCK();
1466
1467 return (0);
1468 }
1469 #endif /* ENABLE_ADDRSEL */
1470
1471 int
1472 walk_addrsel_policy(int (*callback)(const struct in6_addrpolicy *, void *),
1473 void *w)
1474 {
1475 struct addrsel_policyent *pol;
1476 int error = 0;
1477
1478 ADDRSEL_LOCK();
1479 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1480 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1481 ADDRSEL_UNLOCK();
1482 return (error);
1483 }
1484 }
1485 ADDRSEL_UNLOCK();
1486 return (error);
1487 }
1488 /*
1489 * Subroutines to manage the address selection policy table via sysctl.
1490 */
1491 struct walkarg {
1492 struct sysctl_req *w_req;
1493 };
1494
1495
1496 static int
1497 dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg)
1498 {
1499 int error = 0;
1500 struct walkarg *w = arg;
1501
1502 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
1503
1504 return (error);
1505 }
1506
1507 static int
1508 in6_src_sysctl SYSCTL_HANDLER_ARGS
1509 {
1510 #pragma unused(oidp, arg1, arg2)
1511 struct walkarg w;
1512
1513 if (req->newptr)
1514 return EPERM;
1515 bzero(&w, sizeof(w));
1516 w.w_req = req;
1517
1518 return (walk_addrsel_policy(dump_addrsel_policyent, &w));
1519 }
1520
1521
1522 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
1523 CTLFLAG_RD | CTLFLAG_LOCKED, in6_src_sysctl, "");
1524 int
1525 in6_src_ioctl(u_long cmd, caddr_t data)
1526 {
1527 int i;
1528 struct in6_addrpolicy ent0;
1529
1530 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
1531 return (EOPNOTSUPP); /* check for safety */
1532
1533 ent0 = *(struct in6_addrpolicy *)data;
1534
1535 if (ent0.label == ADDR_LABEL_NOTAPP)
1536 return (EINVAL);
1537 /* check if the prefix mask is consecutive. */
1538 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
1539 return (EINVAL);
1540 /* clear trailing garbages (if any) of the prefix address. */
1541 for (i = 0; i < 4; i++) {
1542 ent0.addr.sin6_addr.s6_addr32[i] &=
1543 ent0.addrmask.sin6_addr.s6_addr32[i];
1544 }
1545 ent0.use = 0;
1546
1547 switch (cmd) {
1548 case SIOCAADDRCTL_POLICY:
1549 #ifdef ENABLE_ADDRSEL
1550 return (add_addrsel_policyent(&ent0));
1551 #else
1552 return (ENOTSUP);
1553 #endif
1554 case SIOCDADDRCTL_POLICY:
1555 #ifdef ENABLE_ADDRSEL
1556 return (delete_addrsel_policyent(&ent0));
1557 #else
1558 return (ENOTSUP);
1559 #endif
1560 }
1561
1562 return (0); /* XXX: compromise compilers */
1563 }
1564
1565 /*
1566 * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
1567 * If the address scope of is link-local, embed the interface index in the
1568 * address. The routine determines our precedence
1569 * between advanced API scope/interface specification and basic API
1570 * specification.
1571 *
1572 * this function should be nuked in the future, when we get rid of
1573 * embedded scopeid thing.
1574 *
1575 * XXX actually, it is over-specification to return ifp against sin6_scope_id.
1576 * there can be multiple interfaces that belong to a particular scope zone
1577 * (in specification, we have 1:N mapping between a scope zone and interfaces).
1578 * we may want to change the function to return something other than ifp.
1579 */
1580 int
1581 in6_embedscope(
1582 struct in6_addr *in6,
1583 const struct sockaddr_in6 *sin6,
1584 struct in6pcb *in6p,
1585 struct ifnet **ifpp,
1586 struct ip6_pktopts *opt)
1587 {
1588 struct ifnet *ifp = NULL;
1589 u_int32_t scopeid;
1590 struct ip6_pktopts *optp = NULL;
1591
1592 *in6 = sin6->sin6_addr;
1593 scopeid = sin6->sin6_scope_id;
1594 if (ifpp != NULL)
1595 *ifpp = NULL;
1596
1597 /*
1598 * don't try to read sin6->sin6_addr beyond here, since the caller may
1599 * ask us to overwrite existing sockaddr_in6
1600 */
1601
1602 #ifdef ENABLE_DEFAULT_SCOPE
1603 if (scopeid == 0)
1604 scopeid = scope6_addr2default(in6);
1605 #endif
1606
1607 if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
1608 struct in6_pktinfo *pi;
1609 struct ifnet *im6o_multicast_ifp = NULL;
1610
1611 if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
1612 in6p->in6p_moptions != NULL) {
1613 IM6O_LOCK(in6p->in6p_moptions);
1614 im6o_multicast_ifp =
1615 in6p->in6p_moptions->im6o_multicast_ifp;
1616 IM6O_UNLOCK(in6p->in6p_moptions);
1617 }
1618
1619 if (opt)
1620 optp = opt;
1621 else if (in6p)
1622 optp = in6p->in6p_outputopts;
1623 /*
1624 * KAME assumption: link id == interface id
1625 */
1626 ifnet_head_lock_shared();
1627 if (in6p && optp && (pi = optp->ip6po_pktinfo) &&
1628 pi->ipi6_ifindex) {
1629 ifp = ifindex2ifnet[pi->ipi6_ifindex];
1630 in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
1631 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) &&
1632 in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) {
1633 ifp = im6o_multicast_ifp;
1634 in6->s6_addr16[1] = htons(ifp->if_index);
1635 } else if (scopeid) {
1636 /*
1637 * Since scopeid is unsigned, we only have to check it
1638 * against if_index
1639 */
1640 if (if_index < scopeid) {
1641 ifnet_head_done();
1642 return ENXIO; /* XXX EINVAL? */
1643
1644 }
1645 ifp = ifindex2ifnet[scopeid];
1646 /*XXX assignment to 16bit from 32bit variable */
1647 in6->s6_addr16[1] = htons(scopeid & 0xffff);
1648 }
1649 ifnet_head_done();
1650
1651 if (ifpp != NULL) {
1652 if (ifp != NULL)
1653 ifnet_reference(ifp); /* for caller */
1654 *ifpp = ifp;
1655 }
1656 }
1657
1658 return 0;
1659 }
1660
1661 /*
1662 * generate standard sockaddr_in6 from embedded form.
1663 * touches sin6_addr and sin6_scope_id only.
1664 *
1665 * this function should be nuked in the future, when we get rid of
1666 * embedded scopeid thing.
1667 */
1668 int
1669 in6_recoverscope(
1670 struct sockaddr_in6 *sin6,
1671 const struct in6_addr *in6,
1672 struct ifnet *ifp)
1673 {
1674 u_int32_t scopeid;
1675
1676 sin6->sin6_addr = *in6;
1677
1678 /*
1679 * don't try to read *in6 beyond here, since the caller may
1680 * ask us to overwrite existing sockaddr_in6
1681 */
1682
1683 sin6->sin6_scope_id = 0;
1684 if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
1685 /*
1686 * KAME assumption: link id == interface id
1687 */
1688 scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]);
1689 if (scopeid) {
1690 /*
1691 * sanity check
1692 *
1693 * Since scopeid is unsigned, we only have to check it
1694 * against if_index
1695 */
1696 if (if_index < scopeid)
1697 return ENXIO;
1698 if (ifp && ifp->if_index != scopeid)
1699 return ENXIO;
1700 sin6->sin6_addr.s6_addr16[1] = 0;
1701 sin6->sin6_scope_id = scopeid;
1702 }
1703 }
1704
1705 return 0;
1706 }