]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/in6_src.c
40bad0948c0377cf270c86367302ba3a2d398949
[apple/xnu.git] / bsd / netinet6 / in6_src.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/in6_src.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */
30 /* $KAME: in6_src.c,v 1.37 2001/03/29 05:34:31 itojun Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * Copyright (c) 1982, 1986, 1991, 1993
63 * The Regents of the University of California. All rights reserved.
64 *
65 * Redistribution and use in source and binary forms, with or without
66 * modification, are permitted provided that the following conditions
67 * are met:
68 * 1. Redistributions of source code must retain the above copyright
69 * notice, this list of conditions and the following disclaimer.
70 * 2. Redistributions in binary form must reproduce the above copyright
71 * notice, this list of conditions and the following disclaimer in the
72 * documentation and/or other materials provided with the distribution.
73 * 3. All advertising materials mentioning features or use of this software
74 * must display the following acknowledgement:
75 * This product includes software developed by the University of
76 * California, Berkeley and its contributors.
77 * 4. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 *
93 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
94 */
95
96
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/malloc.h>
100 #include <sys/mbuf.h>
101 #include <sys/protosw.h>
102 #include <sys/socket.h>
103 #include <sys/socketvar.h>
104 #include <sys/errno.h>
105 #include <sys/time.h>
106 #include <sys/proc.h>
107 #include <sys/sysctl.h>
108 #include <sys/kauth.h>
109 #include <sys/priv.h>
110 #include <kern/lock.h>
111
112 #include <net/if.h>
113 #include <net/if_types.h>
114 #include <net/route.h>
115
116 #include <netinet/in.h>
117 #include <netinet/in_var.h>
118 #include <netinet/in_systm.h>
119 #include <netinet/ip.h>
120 #include <netinet/in_pcb.h>
121 #include <netinet6/in6_var.h>
122 #include <netinet/ip6.h>
123 #include <netinet6/in6_pcb.h>
124 #include <netinet6/ip6_var.h>
125 #include <netinet6/scope6_var.h>
126 #include <netinet6/nd6.h>
127
128 #include <net/net_osdep.h>
129
130 #include "loop.h"
131
132 SYSCTL_DECL(_net_inet6_ip6);
133
134 static int ip6_select_srcif_debug = 0;
135 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug,
136 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0,
137 "log source interface selection debug info");
138
139 #define ADDR_LABEL_NOTAPP (-1)
140 struct in6_addrpolicy defaultaddrpolicy;
141
142 int ip6_prefer_tempaddr = 1;
143 #ifdef ENABLE_ADDRSEL
144 extern lck_mtx_t *addrsel_mutex;
145 #define ADDRSEL_LOCK() lck_mtx_lock(addrsel_mutex)
146 #define ADDRSEL_UNLOCK() lck_mtx_unlock(addrsel_mutex)
147 #else
148 #define ADDRSEL_LOCK()
149 #define ADDRSEL_UNLOCK()
150 #endif
151
152 static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
153 struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *,
154 struct ifnet **, struct rtentry **, int, int,
155 const struct ip6_out_args *ip6oa);
156 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
157 struct ip6_moptions *, struct route_in6 *ro,
158 const struct ip6_out_args *, struct ifnet **);
159 static void init_policy_queue(void);
160 static int add_addrsel_policyent(const struct in6_addrpolicy *);
161 #ifdef ENABLE_ADDRSEL
162 static int delete_addrsel_policyent(const struct in6_addrpolicy *);
163 #endif
164 static int walk_addrsel_policy(int (*)(const struct in6_addrpolicy *, void *),
165 void *);
166 static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *);
167 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
168 void addrsel_policy_init(void);
169
170 /*
171 * Return an IPv6 address, which is the most appropriate for a given
172 * destination and user specified options.
173 * If necessary, this function lookups the routing table and returns
174 * an entry to the caller for later use.
175 */
176 #define REPLACE(r) do {\
177 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
178 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
179 ip6stat.ip6s_sources_rule[(r)]++; \
180 goto replace; \
181 } while(0)
182 #define NEXTSRC(r) do {\
183 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
184 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
185 ip6stat.ip6s_sources_rule[(r)]++; \
186 goto next; /* XXX: we can't use 'continue' here */ \
187 } while(0)
188 #define BREAK(r) do { \
189 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
190 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
191 ip6stat.ip6s_sources_rule[(r)]++; \
192 goto out; /* XXX: we can't use 'break' here */ \
193 } while(0)
194
195 /*
196 * Regardless of error, it will return an ifp with a reference held if the
197 * caller provides a non-NULL ifpp. The caller is responsible for checking
198 * if the returned ifp is valid and release its reference at all times.
199 */
200 struct in6_addr *
201 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
202 struct inpcb *inp, struct route_in6 *ro,
203 struct ifnet **ifpp, struct in6_addr *src_storage, unsigned int ifscope,
204 int *errorp)
205 {
206 struct in6_addr dst;
207 struct ifnet *ifp = NULL;
208 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
209 struct in6_pktinfo *pi = NULL;
210 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
211 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
212 u_int32_t odstzone;
213 int prefer_tempaddr;
214 struct ip6_moptions *mopts;
215 struct timeval timenow;
216 struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF };
217 boolean_t islocal = FALSE;
218
219 getmicrotime(&timenow);
220
221 dst = dstsock->sin6_addr; /* make a copy for local operation */
222 *errorp = 0;
223 if (ifpp != NULL)
224 *ifpp = NULL;
225
226 if (inp != NULL) {
227 mopts = inp->in6p_moptions;
228 if (inp->inp_flags & INP_NO_IFT_CELLULAR)
229 ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
230 } else {
231 mopts = NULL;
232 }
233
234 if (ip6oa.ip6oa_boundif != IFSCOPE_NONE)
235 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
236
237 /*
238 * If the source address is explicitly specified by the caller,
239 * check if the requested source address is indeed a unicast address
240 * assigned to the node, and can be used as the packet's source
241 * address. If everything is okay, use the address as source.
242 */
243 if (opts && (pi = opts->ip6po_pktinfo) &&
244 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
245 struct sockaddr_in6 srcsock;
246 struct in6_ifaddr *ia6;
247
248 /* get the outgoing interface */
249 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
250 &ifp)) != 0) {
251 src_storage = NULL;
252 goto done;
253 }
254
255 /*
256 * determine the appropriate zone id of the source based on
257 * the zone of the destination and the outgoing interface.
258 * If the specified address is ambiguous wrt the scope zone,
259 * the interface must be specified; otherwise, ifa_ifwithaddr()
260 * will fail matching the address.
261 */
262 bzero(&srcsock, sizeof(srcsock));
263 srcsock.sin6_family = AF_INET6;
264 srcsock.sin6_len = sizeof(srcsock);
265 srcsock.sin6_addr = pi->ipi6_addr;
266 if (ifp != NULL) {
267 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
268 if (*errorp != 0) {
269 src_storage = NULL;
270 goto done;
271 }
272 }
273 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)
274 (&srcsock));
275 if (ia6 == NULL) {
276 *errorp = EADDRNOTAVAIL;
277 src_storage = NULL;
278 goto done;
279 }
280 IFA_LOCK_SPIN(&ia6->ia_ifa);
281 if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) ||
282 ((ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) &&
283 (ia6->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR))) {
284 IFA_UNLOCK(&ia6->ia_ifa);
285 IFA_REMREF(&ia6->ia_ifa);
286 *errorp = EADDRNOTAVAIL;
287 src_storage = NULL;
288 goto done;
289 }
290
291 *src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
292 IFA_UNLOCK(&ia6->ia_ifa);
293 IFA_REMREF(&ia6->ia_ifa);
294 goto done;
295 }
296
297 /*
298 * Otherwise, if the socket has already bound the source, just use it.
299 */
300 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
301 src_storage = &inp->in6p_laddr;
302 goto done;
303 }
304
305 /*
306 * If the address is not specified, choose the best one based on
307 * the outgoing interface and the destination address.
308 */
309
310 /* get the outgoing interface */
311 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
312 &ifp)) != 0) {
313 src_storage = NULL;
314 goto done;
315 }
316
317 *errorp = in6_setscope(&dst, ifp, &odstzone);
318 if (*errorp != 0) {
319 src_storage = NULL;
320 goto done;
321 }
322 lck_rw_lock_shared(&in6_ifaddr_rwlock);
323
324 for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
325 int new_scope = -1, new_matchlen = -1;
326 struct in6_addrpolicy *new_policy = NULL;
327 u_int32_t srczone, osrczone, dstzone;
328 struct in6_addr src;
329 struct ifnet *ifp1 = ia->ia_ifp;
330
331 IFA_LOCK(&ia->ia_ifa);
332 /*
333 * We'll never take an address that breaks the scope zone
334 * of the destination. We also skip an address if its zone
335 * does not contain the outgoing interface.
336 * XXX: we should probably use sin6_scope_id here.
337 */
338 if (in6_setscope(&dst, ifp1, &dstzone) ||
339 odstzone != dstzone)
340 goto next;
341
342 src = ia->ia_addr.sin6_addr;
343 if (in6_setscope(&src, ifp, &osrczone) ||
344 in6_setscope(&src, ifp1, &srczone) ||
345 osrczone != srczone)
346 goto next;
347
348 /* avoid unusable addresses */
349 if ((ia->ia6_flags &
350 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED)))
351 goto next;
352
353 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
354 goto next;
355
356 if (!nd6_optimistic_dad &&
357 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
358 goto next;
359
360 /* Rule 1: Prefer same address */
361 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
362 BREAK(1); /* there should be no better candidate */
363
364 if (ia_best == NULL)
365 REPLACE(0);
366
367 /* Rule 2: Prefer appropriate scope */
368 if (dst_scope < 0)
369 dst_scope = in6_addrscope(&dst);
370 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
371 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
372 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
373 REPLACE(2);
374 NEXTSRC(2);
375 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
376 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
377 NEXTSRC(2);
378 REPLACE(2);
379 }
380
381 /*
382 * Rule 3: Avoid deprecated addresses. Note that the case of
383 * !ip6_use_deprecated is already rejected above.
384 */
385 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
386 NEXTSRC(3);
387 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
388 REPLACE(3);
389
390 /*
391 * RFC 4429 says that optimistic addresses are equivalent to
392 * deprecated addresses, so avoid them here.
393 */
394 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) == 0 &&
395 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
396 NEXTSRC(3);
397 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) != 0 &&
398 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0)
399 REPLACE(3);
400
401 /* Rule 4: Prefer home addresses */
402 /*
403 * XXX: This is a TODO. We should probably merge the MIP6
404 * case above.
405 */
406
407 /* Rule 5: Prefer outgoing interface */
408 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
409 NEXTSRC(5);
410 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
411 REPLACE(5);
412
413 /*
414 * Rule 6: Prefer matching label
415 * Note that best_policy should be non-NULL here.
416 */
417 if (dst_policy == NULL)
418 dst_policy = in6_addrsel_lookup_policy(dstsock);
419 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
420 new_policy = in6_addrsel_lookup_policy(&ia->ia_addr);
421 if (dst_policy->label == best_policy->label &&
422 dst_policy->label != new_policy->label)
423 NEXTSRC(6);
424 if (dst_policy->label != best_policy->label &&
425 dst_policy->label == new_policy->label)
426 REPLACE(6);
427 }
428
429 /*
430 * Rule 7: Prefer public addresses.
431 * We allow users to reverse the logic by configuring
432 * a sysctl variable, so that privacy conscious users can
433 * always prefer temporary addresses.
434 * Don't use temporary addresses for local destinations or
435 * for multicast addresses unless we were passed in an option.
436 */
437 if (IN6_IS_ADDR_MULTICAST(&dst) ||
438 in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst) >=
439 in6_mask2len(&ia_best->ia_prefixmask.sin6_addr, NULL))
440 islocal = TRUE;
441 if (opts == NULL ||
442 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
443 prefer_tempaddr = islocal ? 0 : ip6_prefer_tempaddr;
444 } else if (opts->ip6po_prefer_tempaddr ==
445 IP6PO_TEMPADDR_NOTPREFER) {
446 prefer_tempaddr = 0;
447 } else
448 prefer_tempaddr = 1;
449 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
450 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
451 if (prefer_tempaddr)
452 REPLACE(7);
453 else
454 NEXTSRC(7);
455 }
456 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
457 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
458 if (prefer_tempaddr)
459 NEXTSRC(7);
460 else
461 REPLACE(7);
462 }
463
464 /*
465 * Rule 8: prefer addresses on alive interfaces.
466 * This is a KAME specific rule.
467 */
468 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
469 !(ia->ia_ifp->if_flags & IFF_UP))
470 NEXTSRC(8);
471 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
472 (ia->ia_ifp->if_flags & IFF_UP))
473 REPLACE(8);
474
475 /*
476 * Rule 14: Use longest matching prefix.
477 * Note: in the address selection draft, this rule is
478 * documented as "Rule 8". However, since it is also
479 * documented that this rule can be overridden, we assign
480 * a large number so that it is easy to assign smaller numbers
481 * to more preferred rules.
482 */
483 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
484 if (best_matchlen < new_matchlen)
485 REPLACE(14);
486 if (new_matchlen < best_matchlen)
487 NEXTSRC(14);
488
489 /* Rule 15 is reserved. */
490
491 /*
492 * Last resort: just keep the current candidate.
493 * Or, do we need more rules?
494 */
495 IFA_UNLOCK(&ia->ia_ifa);
496 continue;
497
498 replace:
499 best_scope = (new_scope >= 0 ? new_scope :
500 in6_addrscope(&ia->ia_addr.sin6_addr));
501 best_policy = (new_policy ? new_policy :
502 in6_addrsel_lookup_policy(&ia->ia_addr));
503 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
504 in6_matchlen(&ia->ia_addr.sin6_addr, &dst));
505 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
506 IFA_UNLOCK(&ia->ia_ifa);
507 if (ia_best != NULL)
508 IFA_REMREF(&ia_best->ia_ifa);
509 ia_best = ia;
510 continue;
511
512 next:
513 IFA_UNLOCK(&ia->ia_ifa);
514 continue;
515
516 out:
517 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
518 IFA_UNLOCK(&ia->ia_ifa);
519 if (ia_best != NULL)
520 IFA_REMREF(&ia_best->ia_ifa);
521 ia_best = ia;
522 break;
523 }
524
525 lck_rw_done(&in6_ifaddr_rwlock);
526
527 if (ia_best != NULL &&
528 (ip6oa.ip6oa_flags & IP6OAF_NO_CELLULAR) &&
529 ia_best->ia_ifa.ifa_ifp->if_type == IFT_CELLULAR) {
530 IFA_REMREF(&ia_best->ia_ifa);
531 ia_best = NULL;
532 }
533
534 if ((ia = ia_best) == NULL) {
535 *errorp = EADDRNOTAVAIL;
536 src_storage = NULL;
537 goto done;
538 }
539
540 IFA_LOCK_SPIN(&ia->ia_ifa);
541 *src_storage = satosin6(&ia->ia_addr)->sin6_addr;
542 IFA_UNLOCK(&ia->ia_ifa);
543 IFA_REMREF(&ia->ia_ifa);
544 done:
545 if (ifpp != NULL) {
546 /* if ifp is non-NULL, refcnt held in in6_selectif() */
547 *ifpp = ifp;
548 } else if (ifp != NULL) {
549 ifnet_release(ifp);
550 }
551 return (src_storage);
552 }
553
554 /*
555 * Given a source IPv6 address (and route, if available), determine the best
556 * interface to send the packet from. Checking for (and updating) the
557 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
558 * without any locks, based on the assumption that in the event this is
559 * called from ip6_output(), the output operation is single-threaded per-pcb,
560 * i.e. for any given pcb there can only be one thread performing output at
561 * the IPv6 layer.
562 *
563 * This routine is analogous to in_selectsrcif() for IPv4. Regardless of
564 * error, it will return an ifp with a reference held if the caller provides
565 * a non-NULL retifp. The caller is responsible for checking if the
566 * returned ifp is valid and release its reference at all times.
567 *
568 * clone - meaningful only for bsdi and freebsd
569 */
570 static int
571 selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
572 struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
573 struct ifnet **retifp, struct rtentry **retrt, int clone,
574 int norouteok, const struct ip6_out_args *ip6oa)
575 {
576 int error = 0;
577 struct ifnet *ifp = NULL, *ifp0 = NULL;
578 struct route_in6 *route = NULL;
579 struct sockaddr_in6 *sin6_next;
580 struct in6_pktinfo *pi = NULL;
581 struct in6_addr *dst = &dstsock->sin6_addr;
582 struct ifaddr *ifa = NULL;
583 char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
584 boolean_t select_srcif, proxied_ifa = FALSE;
585 unsigned int ifscope = ip6oa->ip6oa_boundif;
586
587 #if 0
588 char ip6buf[INET6_ADDRSTRLEN];
589
590 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
591 dstsock->sin6_addr.s6_addr32[1] == 0 &&
592 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
593 printf("in6_selectroute: strange destination %s\n",
594 ip6_sprintf(ip6buf, &dstsock->sin6_addr));
595 } else {
596 printf("in6_selectroute: destination = %s%%%d\n",
597 ip6_sprintf(ip6buf, &dstsock->sin6_addr),
598 dstsock->sin6_scope_id); /* for debug */
599 }
600 #endif
601
602 if (retifp != NULL)
603 *retifp = NULL;
604
605 if (retrt != NULL)
606 *retrt = NULL;
607
608 if (ip6_select_srcif_debug) {
609 struct in6_addr src;
610 src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any;
611 (void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src));
612 (void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst));
613 }
614
615 /*
616 * If the destination address is UNSPECIFIED addr, bail out.
617 */
618 if (IN6_IS_ADDR_UNSPECIFIED(dst)) {
619 error = EHOSTUNREACH;
620 goto done;
621 }
622
623 /*
624 * Perform source interface selection only if Scoped Routing
625 * is enabled and a source address that isn't unspecified.
626 */
627 select_srcif = (ip6_doscopedroute && srcsock != NULL &&
628 !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr));
629
630 /*
631 * If Scoped Routing is disabled, ignore the given ifscope.
632 * Otherwise even if source selection won't be performed,
633 * we still obey IPV6_BOUND_IF.
634 */
635 if (!ip6_doscopedroute && ifscope != IFSCOPE_NONE)
636 ifscope = IFSCOPE_NONE;
637
638 /* If the caller specified the outgoing interface explicitly, use it */
639 if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL &&
640 pi->ipi6_ifindex != 0) {
641 /*
642 * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF.
643 */
644 ifscope = pi->ipi6_ifindex;
645 ifnet_head_lock_shared();
646 /* ifp may be NULL if detached or out of range */
647 ifp = ifp0 =
648 ((ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL);
649 ifnet_head_done();
650 if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) {
651 /*
652 * We do not have to check or get the route for
653 * multicast. If the caller didn't ask/care for
654 * the route and we have no interface to use,
655 * it's an error.
656 */
657 if (ifp == NULL)
658 error = EHOSTUNREACH;
659 goto done;
660 } else {
661 goto getsrcif;
662 }
663 }
664
665 /*
666 * If the destination address is a multicast address and the outgoing
667 * interface for the address is specified by the caller, use it.
668 */
669 if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
670 IM6O_LOCK(mopts);
671 if ((ifp = ifp0 = mopts->im6o_multicast_ifp) != NULL) {
672 IM6O_UNLOCK(mopts);
673 goto done; /* we do not need a route for multicast. */
674 }
675 IM6O_UNLOCK(mopts);
676 }
677
678 getsrcif:
679 /*
680 * If the outgoing interface was not set via IPV6_BOUND_IF or
681 * IPV6_PKTINFO, use the scope ID in the destination address.
682 */
683 if (ip6_doscopedroute && ifscope == IFSCOPE_NONE)
684 ifscope = dstsock->sin6_scope_id;
685
686 /*
687 * Perform source interface selection; the source IPv6 address
688 * must belong to one of the addresses of the interface used
689 * by the route. For performance reasons, do this only if
690 * there is no route, or if the routing table has changed,
691 * or if we haven't done source interface selection on this
692 * route (for this PCB instance) before.
693 */
694 if (!select_srcif || (ro != NULL && ro->ro_rt != NULL &&
695 (ro->ro_rt->rt_flags & RTF_UP) &&
696 ro->ro_rt->generation_id == route_generation &&
697 (ro->ro_flags & ROF_SRCIF_SELECTED))) {
698 if (ro != NULL && ro->ro_rt != NULL) {
699 ifa = ro->ro_rt->rt_ifa;
700 IFA_ADDREF(ifa);
701 }
702 goto getroute;
703 }
704
705 /*
706 * Given the source IPv6 address, find a suitable source interface
707 * to use for transmission; if a scope ID has been specified,
708 * optimize the search by looking at the addresses only for that
709 * interface. This is still suboptimal, however, as we need to
710 * traverse the per-interface list.
711 */
712 if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) {
713 unsigned int scope = ifscope;
714 struct ifnet *rt_ifp;
715
716 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
717
718 /*
719 * If no scope is specified and the route is stale (pointing
720 * to a defunct interface) use the current primary interface;
721 * this happens when switching between interfaces configured
722 * with the same IPv6 address. Otherwise pick up the scope
723 * information from the route; the ULP may have looked up a
724 * correct route and we just need to verify it here and mark
725 * it with the ROF_SRCIF_SELECTED flag below.
726 */
727 if (scope == IFSCOPE_NONE) {
728 scope = rt_ifp->if_index;
729 if (scope != get_primary_ifscope(AF_INET6) &&
730 ro->ro_rt->generation_id != route_generation)
731 scope = get_primary_ifscope(AF_INET6);
732 }
733
734 ifa = (struct ifaddr *)
735 ifa_foraddr6_scoped(&srcsock->sin6_addr, scope);
736
737 /*
738 * If we are forwarding and proxying prefix(es), see if the
739 * source address is one of ours and is a proxied address;
740 * if so, use it.
741 */
742 if (ifa == NULL && ip6_forwarding && nd6_prproxy) {
743 ifa = (struct ifaddr *)
744 ifa_foraddr6(&srcsock->sin6_addr);
745 if (ifa != NULL && !(proxied_ifa =
746 nd6_prproxy_ifaddr((struct in6_ifaddr *)ifa))) {
747 IFA_REMREF(ifa);
748 ifa = NULL;
749 }
750 }
751
752 if (ip6_select_srcif_debug && ifa != NULL) {
753 if (ro->ro_rt != NULL) {
754 printf("%s->%s ifscope %d->%d ifa_if %s "
755 "ro_if %s\n", s_src, s_dst, ifscope,
756 scope, if_name(ifa->ifa_ifp),
757 if_name(rt_ifp));
758 } else {
759 printf("%s->%s ifscope %d->%d ifa_if %s\n",
760 s_src, s_dst, ifscope, scope,
761 if_name(ifa->ifa_ifp));
762 }
763 }
764 }
765
766 /*
767 * Slow path; search for an interface having the corresponding source
768 * IPv6 address if the scope was not specified by the caller, and:
769 *
770 * 1) There currently isn't any route, or,
771 * 2) The interface used by the route does not own that source
772 * IPv6 address; in this case, the route will get blown away
773 * and we'll do a more specific scoped search using the newly
774 * found interface.
775 */
776 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
777 ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr);
778
779 if (ip6_select_srcif_debug && ifa != NULL) {
780 printf("%s->%s ifscope %d ifa_if %s\n",
781 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
782 }
783
784 }
785
786 getroute:
787 if (ifa != NULL && !proxied_ifa)
788 ifscope = ifa->ifa_ifp->if_index;
789
790 /*
791 * If the next hop address for the packet is specified by the caller,
792 * use it as the gateway.
793 */
794 if (opts != NULL && opts->ip6po_nexthop != NULL) {
795 struct route_in6 *ron;
796
797 sin6_next = satosin6(opts->ip6po_nexthop);
798
799 /* at this moment, we only support AF_INET6 next hops */
800 if (sin6_next->sin6_family != AF_INET6) {
801 error = EAFNOSUPPORT; /* or should we proceed? */
802 goto done;
803 }
804
805 /*
806 * If the next hop is an IPv6 address, then the node identified
807 * by that address must be a neighbor of the sending host.
808 */
809 ron = &opts->ip6po_nextroute;
810 if (ron->ro_rt != NULL)
811 RT_LOCK(ron->ro_rt);
812 if ((ron->ro_rt != NULL &&
813 ((ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) !=
814 (RTF_UP | RTF_LLINFO) ||
815 ron->ro_rt->generation_id != route_generation ||
816 (select_srcif && (ifa == NULL ||
817 (ifa->ifa_ifp != ron->ro_rt->rt_ifp && !proxied_ifa))))) ||
818 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
819 &sin6_next->sin6_addr)) {
820 if (ron->ro_rt != NULL) {
821 RT_UNLOCK(ron->ro_rt);
822 rtfree(ron->ro_rt);
823 ron->ro_rt = NULL;
824 }
825 *satosin6(&ron->ro_dst) = *sin6_next;
826 }
827 if (ron->ro_rt == NULL) {
828 rtalloc_scoped((struct route *)ron, ifscope);
829 if (ron->ro_rt != NULL)
830 RT_LOCK(ron->ro_rt);
831 if (ron->ro_rt == NULL ||
832 !(ron->ro_rt->rt_flags & RTF_LLINFO) ||
833 !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))->
834 sin6_addr, &sin6_next->sin6_addr)) {
835 if (ron->ro_rt != NULL) {
836 RT_UNLOCK(ron->ro_rt);
837 rtfree(ron->ro_rt);
838 ron->ro_rt = NULL;
839 }
840 error = EHOSTUNREACH;
841 goto done;
842 }
843 }
844 route = ron;
845 ifp = ifp0 = ron->ro_rt->rt_ifp;
846
847 /*
848 * When cloning is required, try to allocate a route to the
849 * destination so that the caller can store path MTU
850 * information.
851 */
852 if (!clone) {
853 if (select_srcif) {
854 /* Keep the route locked */
855 goto validateroute;
856 }
857 RT_UNLOCK(ron->ro_rt);
858 goto done;
859 }
860 RT_UNLOCK(ron->ro_rt);
861 }
862
863 /*
864 * Use a cached route if it exists and is valid, else try to allocate
865 * a new one. Note that we should check the address family of the
866 * cached destination, in case of sharing the cache with IPv4.
867 */
868 if (ro == NULL)
869 goto done;
870 if (ro->ro_rt != NULL)
871 RT_LOCK(ro->ro_rt);
872 if (ro->ro_rt != NULL && (!(ro->ro_rt->rt_flags & RTF_UP) ||
873 satosin6(&ro->ro_dst)->sin6_family != AF_INET6 ||
874 ro->ro_rt->generation_id != route_generation ||
875 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) ||
876 (select_srcif && (ifa == NULL ||
877 (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa))))) {
878 RT_UNLOCK(ro->ro_rt);
879 rtfree(ro->ro_rt);
880 ro->ro_rt = NULL;
881 }
882 if (ro->ro_rt == NULL) {
883 struct sockaddr_in6 *sa6;
884
885 if (ro->ro_rt != NULL)
886 RT_UNLOCK(ro->ro_rt);
887 /* No route yet, so try to acquire one */
888 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
889 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
890 sa6->sin6_family = AF_INET6;
891 sa6->sin6_len = sizeof(struct sockaddr_in6);
892 sa6->sin6_addr = *dst;
893 if (IN6_IS_ADDR_MULTICAST(dst)) {
894 ro->ro_rt = rtalloc1_scoped(
895 &((struct route *)ro)->ro_dst, 0, 0, ifscope);
896 } else {
897 rtalloc_scoped((struct route *)ro, ifscope);
898 }
899 if (ro->ro_rt != NULL)
900 RT_LOCK(ro->ro_rt);
901 }
902
903 /*
904 * Do not care about the result if we have the nexthop
905 * explicitly specified (in case we're asked to clone.)
906 */
907 if (opts != NULL && opts->ip6po_nexthop != NULL) {
908 if (ro->ro_rt != NULL)
909 RT_UNLOCK(ro->ro_rt);
910 goto done;
911 }
912
913 if (ro->ro_rt != NULL) {
914 RT_LOCK_ASSERT_HELD(ro->ro_rt);
915 ifp = ifp0 = ro->ro_rt->rt_ifp;
916 } else {
917 error = EHOSTUNREACH;
918 }
919 route = ro;
920
921 validateroute:
922 if (select_srcif) {
923 boolean_t has_route = (route != NULL && route->ro_rt != NULL);
924 boolean_t srcif_selected = FALSE;
925
926 if (has_route)
927 RT_LOCK_ASSERT_HELD(route->ro_rt);
928 /*
929 * If there is a non-loopback route with the wrong interface,
930 * or if there is no interface configured with such an address,
931 * blow it away. Except for local/loopback, we look for one
932 * with a matching interface scope/index.
933 */
934 if (has_route && (ifa == NULL ||
935 (ifa->ifa_ifp != ifp && ifp != lo_ifp) ||
936 !(route->ro_rt->rt_flags & RTF_UP))) {
937 /*
938 * If the destination address belongs to a proxied
939 * prefix, relax the requirement and allow the packet
940 * to come out of the proxy interface with the source
941 * address of the real interface.
942 */
943 if (ifa != NULL && proxied_ifa &&
944 (route->ro_rt->rt_flags & (RTF_UP|RTF_PROXY)) ==
945 (RTF_UP|RTF_PROXY)) {
946 srcif_selected = TRUE;
947 } else {
948 if (ip6_select_srcif_debug) {
949 if (ifa != NULL) {
950 printf("%s->%s ifscope %d "
951 "ro_if %s != ifa_if %s "
952 "(cached route cleared)\n",
953 s_src, s_dst,
954 ifscope, if_name(ifp),
955 if_name(ifa->ifa_ifp));
956 } else {
957 printf("%s->%s ifscope %d "
958 "ro_if %s (no ifa_if "
959 "found)\n", s_src, s_dst,
960 ifscope, if_name(ifp));
961 }
962 }
963 RT_UNLOCK(route->ro_rt);
964 rtfree(route->ro_rt);
965 route->ro_rt = NULL;
966 route->ro_flags &= ~ROF_SRCIF_SELECTED;
967 error = EHOSTUNREACH;
968 /* Undo the settings done above */
969 route = NULL;
970 ifp = NULL; /* ditch ifp; keep ifp0 */
971 has_route = FALSE;
972 }
973 } else if (has_route) {
974 srcif_selected = TRUE;
975 }
976
977 if (srcif_selected) {
978 VERIFY(has_route);
979 route->ro_flags |= ROF_SRCIF_SELECTED;
980 route->ro_rt->generation_id = route_generation;
981 RT_UNLOCK(route->ro_rt);
982 }
983 } else {
984 if (ro->ro_rt != NULL)
985 RT_UNLOCK(ro->ro_rt);
986 if (ifp != NULL && opts != NULL &&
987 opts->ip6po_pktinfo != NULL &&
988 opts->ip6po_pktinfo->ipi6_ifindex != 0) {
989 /*
990 * Check if the outgoing interface conflicts with the
991 * interface specified by ipi6_ifindex (if specified).
992 * Note that loopback interface is always okay.
993 * (this may happen when we are sending a packet to
994 * one of our own addresses.)
995 */
996 if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index !=
997 opts->ip6po_pktinfo->ipi6_ifindex) {
998 error = EHOSTUNREACH;
999 goto done;
1000 }
1001 }
1002 }
1003
1004 done:
1005 if (error == 0) {
1006 if ((ip6oa->ip6oa_flags & IP6OAF_NO_CELLULAR) &&
1007 ((ifp != NULL && ifp->if_type == IFT_CELLULAR) ||
1008 (route != NULL && route->ro_rt != NULL &&
1009 route->ro_rt->rt_ifp->if_type == IFT_CELLULAR))) {
1010 if (route != NULL && route->ro_rt != NULL) {
1011 rtfree(route->ro_rt);
1012 route->ro_rt = NULL;
1013 route->ro_flags &= ~ROF_SRCIF_SELECTED;
1014 route = NULL;
1015 }
1016 ifp = NULL; /* ditch ifp; keep ifp0 */
1017 error = EHOSTUNREACH;
1018 }
1019 }
1020
1021 if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) {
1022 /*
1023 * This can happen if the caller did not pass a cached route
1024 * nor any other hints. We treat this case an error.
1025 */
1026 error = EHOSTUNREACH;
1027 }
1028 if (error == EHOSTUNREACH)
1029 ip6stat.ip6s_noroute++;
1030
1031 /*
1032 * We'll return ifp regardless of error, so pick it up from ifp0
1033 * in case it was nullified above. Caller is responsible for
1034 * releasing the ifp if it is non-NULL.
1035 */
1036 ifp = ifp0;
1037 if (retifp != NULL) {
1038 if (ifp != NULL)
1039 ifnet_reference(ifp); /* for caller */
1040 *retifp = ifp;
1041 }
1042
1043 if (error == 0) {
1044 if (retrt != NULL && route != NULL)
1045 *retrt = route->ro_rt; /* ro_rt may be NULL */
1046 } else if (select_srcif && ip6_select_srcif_debug) {
1047 printf("%s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
1048 s_src, s_dst, ifscope,
1049 (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE",
1050 (ifp != NULL) ? if_name(ifp) : "NONE", error);
1051 }
1052
1053 if (ifa != NULL)
1054 IFA_REMREF(ifa);
1055
1056 return (error);
1057 }
1058
1059 /*
1060 * Regardless of error, it will return an ifp with a reference held if the
1061 * caller provides a non-NULL retifp. The caller is responsible for checking
1062 * if the returned ifp is valid and release its reference at all times.
1063 */
1064 static int
1065 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
1066 struct ip6_moptions *mopts, struct route_in6 *ro,
1067 const struct ip6_out_args *ip6oa, struct ifnet **retifp)
1068 {
1069 int err = 0;
1070 struct route_in6 sro;
1071 struct rtentry *rt = NULL;
1072
1073 if (ro == NULL) {
1074 bzero(&sro, sizeof(sro));
1075 ro = &sro;
1076 }
1077
1078 if ((err = selectroute(NULL, dstsock, opts, mopts, ro, retifp,
1079 &rt, 0, 1, ip6oa)) != 0)
1080 goto done;
1081
1082 /*
1083 * do not use a rejected or black hole route.
1084 * XXX: this check should be done in the L2 output routine.
1085 * However, if we skipped this check here, we'd see the following
1086 * scenario:
1087 * - install a rejected route for a scoped address prefix
1088 * (like fe80::/10)
1089 * - send a packet to a destination that matches the scoped prefix,
1090 * with ambiguity about the scope zone.
1091 * - pick the outgoing interface from the route, and disambiguate the
1092 * scope zone with the interface.
1093 * - ip6_output() would try to get another route with the "new"
1094 * destination, which may be valid.
1095 * - we'd see no error on output.
1096 * Although this may not be very harmful, it should still be confusing.
1097 * We thus reject the case here.
1098 */
1099 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1100 err = ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH);
1101 goto done;
1102 }
1103
1104 /*
1105 * Adjust the "outgoing" interface. If we're going to loop the packet
1106 * back to ourselves, the ifp would be the loopback interface.
1107 * However, we'd rather know the interface associated to the
1108 * destination address (which should probably be one of our own
1109 * addresses.)
1110 */
1111 if (rt != NULL && rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp != NULL &&
1112 retifp != NULL) {
1113 ifnet_reference(rt->rt_ifa->ifa_ifp);
1114 if (*retifp != NULL)
1115 ifnet_release(*retifp);
1116 *retifp = rt->rt_ifa->ifa_ifp;
1117 }
1118
1119 done:
1120 if (ro == &sro && rt && rt == sro.ro_rt)
1121 rtfree(rt);
1122
1123 /*
1124 * retifp might point to a valid ifp with a reference held;
1125 * caller is responsible for releasing it if non-NULL.
1126 */
1127 return (err);
1128 }
1129
1130 /*
1131 * Regardless of error, it will return an ifp with a reference held if the
1132 * caller provides a non-NULL retifp. The caller is responsible for checking
1133 * if the returned ifp is valid and release its reference at all times.
1134 *
1135 * clone - meaningful only for bsdi and freebsd
1136 */
1137 int
1138 in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
1139 struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro,
1140 struct ifnet **retifp, struct rtentry **retrt, int clone,
1141 const struct ip6_out_args *ip6oa)
1142 {
1143
1144 return (selectroute(srcsock, dstsock, opts, mopts, ro, retifp,
1145 retrt, clone, 0, ip6oa));
1146 }
1147
1148 /*
1149 * Default hop limit selection. The precedence is as follows:
1150 * 1. Hoplimit value specified via ioctl.
1151 * 2. (If the outgoing interface is detected) the current
1152 * hop limit of the interface specified by router advertisement.
1153 * 3. The system default hoplimit.
1154 */
1155 int
1156 in6_selecthlim(
1157 struct in6pcb *in6p,
1158 struct ifnet *ifp)
1159 {
1160 if (in6p && in6p->in6p_hops >= 0) {
1161 return(in6p->in6p_hops);
1162 } else {
1163 lck_rw_lock_shared(nd_if_rwlock);
1164 if (ifp && ifp->if_index < nd_ifinfo_indexlim) {
1165 u_int8_t chlim;
1166 struct nd_ifinfo *ndi = &nd_ifinfo[ifp->if_index];
1167
1168 if (ndi->initialized) {
1169 lck_mtx_lock(&ndi->lock);
1170 chlim = ndi->chlim;
1171 lck_mtx_unlock(&ndi->lock);
1172 } else {
1173 chlim = ip6_defhlim;
1174 }
1175 lck_rw_done(nd_if_rwlock);
1176 return (chlim);
1177 } else {
1178 lck_rw_done(nd_if_rwlock);
1179 return(ip6_defhlim);
1180 }
1181 }
1182 }
1183
1184 /*
1185 * XXX: this is borrowed from in6_pcbbind(). If possible, we should
1186 * share this function by all *bsd*...
1187 */
1188 int
1189 in6_pcbsetport(
1190 __unused struct in6_addr *laddr,
1191 struct inpcb *inp,
1192 struct proc *p,
1193 int locked)
1194 {
1195 struct socket *so = inp->inp_socket;
1196 u_int16_t lport = 0, first, last, *lastport;
1197 int count, error = 0, wild = 0;
1198 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1199 kauth_cred_t cred;
1200 if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
1201 if (!lck_rw_try_lock_exclusive(pcbinfo->mtx)) {
1202 socket_unlock(inp->inp_socket, 0);
1203 lck_rw_lock_exclusive(pcbinfo->mtx);
1204 socket_lock(inp->inp_socket, 0);
1205 }
1206 }
1207
1208 /* XXX: this is redundant when called from in6_pcbbind */
1209 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
1210 wild = INPLOOKUP_WILDCARD;
1211
1212 inp->inp_flags |= INP_ANONPORT;
1213
1214 if (inp->inp_flags & INP_HIGHPORT) {
1215 first = ipport_hifirstauto; /* sysctl */
1216 last = ipport_hilastauto;
1217 lastport = &pcbinfo->lasthi;
1218 } else if (inp->inp_flags & INP_LOWPORT) {
1219 cred = kauth_cred_proc_ref(p);
1220 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
1221 kauth_cred_unref(&cred);
1222 if (error != 0) {
1223 if (!locked)
1224 lck_rw_done(pcbinfo->mtx);
1225 return error;
1226 }
1227 first = ipport_lowfirstauto; /* 1023 */
1228 last = ipport_lowlastauto; /* 600 */
1229 lastport = &pcbinfo->lastlow;
1230 } else {
1231 first = ipport_firstauto; /* sysctl */
1232 last = ipport_lastauto;
1233 lastport = &pcbinfo->lastport;
1234 }
1235 /*
1236 * Simple check to ensure all ports are not used up causing
1237 * a deadlock here.
1238 *
1239 * We split the two cases (up and down) so that the direction
1240 * is not being tested on each round of the loop.
1241 */
1242 if (first > last) {
1243 /*
1244 * counting down
1245 */
1246 count = first - last;
1247
1248 do {
1249 if (count-- < 0) { /* completely used? */
1250 /*
1251 * Undo any address bind that may have
1252 * occurred above.
1253 */
1254 inp->in6p_laddr = in6addr_any;
1255 inp->in6p_last_outifp = NULL;
1256 if (!locked)
1257 lck_rw_done(pcbinfo->mtx);
1258 return (EAGAIN);
1259 }
1260 --*lastport;
1261 if (*lastport > first || *lastport < last)
1262 *lastport = first;
1263 lport = htons(*lastport);
1264 } while (in6_pcblookup_local(pcbinfo,
1265 &inp->in6p_laddr, lport, wild));
1266 } else {
1267 /*
1268 * counting up
1269 */
1270 count = last - first;
1271
1272 do {
1273 if (count-- < 0) { /* completely used? */
1274 /*
1275 * Undo any address bind that may have
1276 * occurred above.
1277 */
1278 inp->in6p_laddr = in6addr_any;
1279 inp->in6p_last_outifp = NULL;
1280 if (!locked)
1281 lck_rw_done(pcbinfo->mtx);
1282 return (EAGAIN);
1283 }
1284 ++*lastport;
1285 if (*lastport < first || *lastport > last)
1286 *lastport = first;
1287 lport = htons(*lastport);
1288 } while (in6_pcblookup_local(pcbinfo,
1289 &inp->in6p_laddr, lport, wild));
1290 }
1291
1292 inp->inp_lport = lport;
1293 if (in_pcbinshash(inp, 1) != 0) {
1294 inp->in6p_laddr = in6addr_any;
1295 inp->inp_lport = 0;
1296 inp->in6p_last_outifp = NULL;
1297 if (!locked)
1298 lck_rw_done(pcbinfo->mtx);
1299 return (EAGAIN);
1300 }
1301
1302 if (!locked)
1303 lck_rw_done(pcbinfo->mtx);
1304 return(0);
1305 }
1306
1307 /*
1308 * * The followings are implementation of the policy table using a
1309 * * simple tail queue.
1310 * * XXX such details should be hidden.
1311 * * XXX implementation using binary tree should be more efficient.
1312 * */
1313 struct addrsel_policyent {
1314 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1315 struct in6_addrpolicy ape_policy;
1316 };
1317
1318 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1319
1320 struct addrsel_policyhead addrsel_policytab;
1321
1322 static void
1323 init_policy_queue(void)
1324 {
1325
1326 TAILQ_INIT(&addrsel_policytab);
1327 }
1328
1329 void
1330 addrsel_policy_init(void)
1331 {
1332 /*
1333 * Default address selection policy based on RFC 3484 and
1334 * draft-arifumi-6man-rfc3484-revise-03.
1335 */
1336 static const struct in6_addrpolicy defaddrsel[] = {
1337 /* localhost */
1338 { .addr = { .sin6_family = AF_INET6,
1339 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
1340 .sin6_len = sizeof(struct sockaddr_in6) },
1341 .addrmask = { .sin6_family = AF_INET6,
1342 .sin6_addr = IN6MASK128,
1343 .sin6_len = sizeof(struct sockaddr_in6) },
1344 .preced = 60,
1345 .label = 0 },
1346 /* ULA */
1347 { .addr = { .sin6_family = AF_INET6,
1348 .sin6_addr = {{{ 0xfc }}},
1349 .sin6_len = sizeof(struct sockaddr_in6) },
1350 .addrmask = { .sin6_family = AF_INET6,
1351 .sin6_addr = IN6MASK7,
1352 .sin6_len = sizeof(struct sockaddr_in6) },
1353 .preced = 50,
1354 .label = 1 },
1355 /* any IPv6 src */
1356 { .addr = { .sin6_family = AF_INET6,
1357 .sin6_addr = IN6ADDR_ANY_INIT,
1358 .sin6_len = sizeof(struct sockaddr_in6) },
1359 .addrmask = { .sin6_family = AF_INET6,
1360 .sin6_addr = IN6MASK0,
1361 .sin6_len = sizeof(struct sockaddr_in6) },
1362 .preced = 40,
1363 .label = 2 },
1364 /* any IPv4 src */
1365 { .addr = { .sin6_family = AF_INET6,
1366 .sin6_addr = IN6ADDR_V4MAPPED_INIT,
1367 .sin6_len = sizeof(struct sockaddr_in6) },
1368 .addrmask = { .sin6_family = AF_INET6,
1369 .sin6_addr = IN6MASK96,
1370 .sin6_len = sizeof(struct sockaddr_in6) },
1371 .preced = 30,
1372 .label = 3 },
1373 /* 6to4 */
1374 { .addr = { .sin6_family = AF_INET6,
1375 .sin6_addr = {{{ 0x20, 0x02 }}},
1376 .sin6_len = sizeof(struct sockaddr_in6) },
1377 .addrmask = { .sin6_family = AF_INET6,
1378 .sin6_addr = IN6MASK16,
1379 .sin6_len = sizeof(struct sockaddr_in6) },
1380 .preced = 20,
1381 .label = 4 },
1382 /* Teredo */
1383 { .addr = { .sin6_family = AF_INET6,
1384 .sin6_addr = {{{ 0x20, 0x01 }}},
1385 .sin6_len = sizeof(struct sockaddr_in6) },
1386 .addrmask = { .sin6_family = AF_INET6,
1387 .sin6_addr = IN6MASK32,
1388 .sin6_len = sizeof(struct sockaddr_in6) },
1389 .preced = 10,
1390 .label = 5 },
1391 /* v4 compat addresses */
1392 { .addr = { .sin6_family = AF_INET6,
1393 .sin6_addr = IN6ADDR_ANY_INIT,
1394 .sin6_len = sizeof(struct sockaddr_in6) },
1395 .addrmask = { .sin6_family = AF_INET6,
1396 .sin6_addr = IN6MASK96,
1397 .sin6_len = sizeof(struct sockaddr_in6) },
1398 .preced = 1,
1399 .label = 10 },
1400 /* site-local (deprecated) */
1401 { .addr = { .sin6_family = AF_INET6,
1402 .sin6_addr = {{{ 0xfe, 0xc0 }}},
1403 .sin6_len = sizeof(struct sockaddr_in6) },
1404 .addrmask = { .sin6_family = AF_INET6,
1405 .sin6_addr = IN6MASK16,
1406 .sin6_len = sizeof(struct sockaddr_in6) },
1407 .preced = 1,
1408 .label = 11 },
1409 /* 6bone (deprecated) */
1410 { .addr = { .sin6_family = AF_INET6,
1411 .sin6_addr = {{{ 0x3f, 0xfe }}},
1412 .sin6_len = sizeof(struct sockaddr_in6) },
1413 .addrmask = { .sin6_family = AF_INET6,
1414 .sin6_addr = IN6MASK16,
1415 .sin6_len = sizeof(struct sockaddr_in6) },
1416 .preced = 1,
1417 .label = 12 },
1418 };
1419 int i;
1420
1421 init_policy_queue();
1422
1423 /* initialize the "last resort" policy */
1424 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy));
1425 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
1426
1427 for (i = 0; i < sizeof(defaddrsel) / sizeof(defaddrsel[0]); i++)
1428 add_addrsel_policyent(&defaddrsel[i]);
1429
1430 }
1431
1432 struct in6_addrpolicy *
1433 in6_addrsel_lookup_policy(struct sockaddr_in6 *key)
1434 {
1435 struct in6_addrpolicy *match = NULL;
1436
1437 ADDRSEL_LOCK();
1438 match = match_addrsel_policy(key);
1439
1440 if (match == NULL)
1441 match = &defaultaddrpolicy;
1442 else
1443 match->use++;
1444 ADDRSEL_UNLOCK();
1445
1446 return (match);
1447 }
1448
1449 static struct in6_addrpolicy *
1450 match_addrsel_policy(struct sockaddr_in6 *key)
1451 {
1452 struct addrsel_policyent *pent;
1453 struct in6_addrpolicy *bestpol = NULL, *pol;
1454 int matchlen, bestmatchlen = -1;
1455 u_char *mp, *ep, *k, *p, m;
1456
1457 TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) {
1458 matchlen = 0;
1459
1460 pol = &pent->ape_policy;
1461 mp = (u_char *)&pol->addrmask.sin6_addr;
1462 ep = mp + 16; /* XXX: scope field? */
1463 k = (u_char *)&key->sin6_addr;
1464 p = (u_char *)&pol->addr.sin6_addr;
1465 for (; mp < ep && *mp; mp++, k++, p++) {
1466 m = *mp;
1467 if ((*k & m) != *p)
1468 goto next; /* not match */
1469 if (m == 0xff) /* short cut for a typical case */
1470 matchlen += 8;
1471 else {
1472 while (m >= 0x80) {
1473 matchlen++;
1474 m <<= 1;
1475 }
1476 }
1477 }
1478
1479 /* matched. check if this is better than the current best. */
1480 if (bestpol == NULL ||
1481 matchlen > bestmatchlen) {
1482 bestpol = pol;
1483 bestmatchlen = matchlen;
1484 }
1485
1486 next:
1487 continue;
1488 }
1489
1490 return (bestpol);
1491 }
1492
1493 static int
1494 add_addrsel_policyent(const struct in6_addrpolicy *newpolicy)
1495 {
1496 struct addrsel_policyent *new, *pol;
1497
1498 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR,
1499 M_WAITOK);
1500
1501 ADDRSEL_LOCK();
1502
1503 /* duplication check */
1504 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1505 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1506 &pol->ape_policy.addr.sin6_addr) &&
1507 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1508 &pol->ape_policy.addrmask.sin6_addr)) {
1509 ADDRSEL_UNLOCK();
1510 FREE(new, M_IFADDR);
1511 return (EEXIST); /* or override it? */
1512 }
1513 }
1514
1515 bzero(new, sizeof(*new));
1516
1517 /* XXX: should validate entry */
1518 new->ape_policy = *newpolicy;
1519
1520 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
1521 ADDRSEL_UNLOCK();
1522
1523 return (0);
1524 }
1525 #ifdef ENABLE_ADDRSEL
1526 static int
1527 delete_addrsel_policyent(const struct in6_addrpolicy *key)
1528 {
1529 struct addrsel_policyent *pol;
1530
1531
1532 ADDRSEL_LOCK();
1533
1534 /* search for the entry in the table */
1535 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1536 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1537 &pol->ape_policy.addr.sin6_addr) &&
1538 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1539 &pol->ape_policy.addrmask.sin6_addr)) {
1540 break;
1541 }
1542 }
1543 if (pol == NULL) {
1544 ADDRSEL_UNLOCK();
1545 return (ESRCH);
1546 }
1547
1548 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1549 FREE(pol, M_IFADDR);
1550 pol = NULL;
1551 ADDRSEL_UNLOCK();
1552
1553 return (0);
1554 }
1555 #endif /* ENABLE_ADDRSEL */
1556
1557 int
1558 walk_addrsel_policy(int (*callback)(const struct in6_addrpolicy *, void *),
1559 void *w)
1560 {
1561 struct addrsel_policyent *pol;
1562 int error = 0;
1563
1564 ADDRSEL_LOCK();
1565 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1566 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1567 ADDRSEL_UNLOCK();
1568 return (error);
1569 }
1570 }
1571 ADDRSEL_UNLOCK();
1572 return (error);
1573 }
1574 /*
1575 * Subroutines to manage the address selection policy table via sysctl.
1576 */
1577 struct walkarg {
1578 struct sysctl_req *w_req;
1579 };
1580
1581
1582 static int
1583 dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg)
1584 {
1585 int error = 0;
1586 struct walkarg *w = arg;
1587
1588 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
1589
1590 return (error);
1591 }
1592
1593 static int
1594 in6_src_sysctl SYSCTL_HANDLER_ARGS
1595 {
1596 #pragma unused(oidp, arg1, arg2)
1597 struct walkarg w;
1598
1599 if (req->newptr)
1600 return EPERM;
1601 bzero(&w, sizeof(w));
1602 w.w_req = req;
1603
1604 return (walk_addrsel_policy(dump_addrsel_policyent, &w));
1605 }
1606
1607
1608 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
1609 CTLFLAG_RD | CTLFLAG_LOCKED, in6_src_sysctl, "");
1610 int
1611 in6_src_ioctl(u_long cmd, caddr_t data)
1612 {
1613 int i;
1614 struct in6_addrpolicy ent0;
1615
1616 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
1617 return (EOPNOTSUPP); /* check for safety */
1618
1619 bcopy(data, &ent0, sizeof (ent0));
1620
1621 if (ent0.label == ADDR_LABEL_NOTAPP)
1622 return (EINVAL);
1623 /* check if the prefix mask is consecutive. */
1624 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
1625 return (EINVAL);
1626 /* clear trailing garbages (if any) of the prefix address. */
1627 for (i = 0; i < 4; i++) {
1628 ent0.addr.sin6_addr.s6_addr32[i] &=
1629 ent0.addrmask.sin6_addr.s6_addr32[i];
1630 }
1631 ent0.use = 0;
1632
1633 switch (cmd) {
1634 case SIOCAADDRCTL_POLICY:
1635 #ifdef ENABLE_ADDRSEL
1636 return (add_addrsel_policyent(&ent0));
1637 #else
1638 return (ENOTSUP);
1639 #endif
1640 case SIOCDADDRCTL_POLICY:
1641 #ifdef ENABLE_ADDRSEL
1642 return (delete_addrsel_policyent(&ent0));
1643 #else
1644 return (ENOTSUP);
1645 #endif
1646 }
1647
1648 return (0); /* XXX: compromise compilers */
1649 }
1650
1651 /*
1652 * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
1653 * If the address scope of is link-local, embed the interface index in the
1654 * address. The routine determines our precedence
1655 * between advanced API scope/interface specification and basic API
1656 * specification.
1657 *
1658 * this function should be nuked in the future, when we get rid of
1659 * embedded scopeid thing.
1660 *
1661 * XXX actually, it is over-specification to return ifp against sin6_scope_id.
1662 * there can be multiple interfaces that belong to a particular scope zone
1663 * (in specification, we have 1:N mapping between a scope zone and interfaces).
1664 * we may want to change the function to return something other than ifp.
1665 */
1666 int
1667 in6_embedscope(
1668 struct in6_addr *in6,
1669 const struct sockaddr_in6 *sin6,
1670 struct in6pcb *in6p,
1671 struct ifnet **ifpp,
1672 struct ip6_pktopts *opt)
1673 {
1674 struct ifnet *ifp = NULL;
1675 u_int32_t scopeid;
1676 struct ip6_pktopts *optp = NULL;
1677
1678 *in6 = sin6->sin6_addr;
1679 scopeid = sin6->sin6_scope_id;
1680 if (ifpp != NULL)
1681 *ifpp = NULL;
1682
1683 /*
1684 * don't try to read sin6->sin6_addr beyond here, since the caller may
1685 * ask us to overwrite existing sockaddr_in6
1686 */
1687
1688 #ifdef ENABLE_DEFAULT_SCOPE
1689 if (scopeid == 0)
1690 scopeid = scope6_addr2default(in6);
1691 #endif
1692
1693 if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
1694 struct in6_pktinfo *pi;
1695 struct ifnet *im6o_multicast_ifp = NULL;
1696
1697 if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
1698 in6p->in6p_moptions != NULL) {
1699 IM6O_LOCK(in6p->in6p_moptions);
1700 im6o_multicast_ifp =
1701 in6p->in6p_moptions->im6o_multicast_ifp;
1702 IM6O_UNLOCK(in6p->in6p_moptions);
1703 }
1704
1705 if (opt)
1706 optp = opt;
1707 else if (in6p)
1708 optp = in6p->in6p_outputopts;
1709 /*
1710 * KAME assumption: link id == interface id
1711 */
1712 ifnet_head_lock_shared();
1713 if (in6p && optp && (pi = optp->ip6po_pktinfo) &&
1714 pi->ipi6_ifindex) {
1715 ifp = ifindex2ifnet[pi->ipi6_ifindex];
1716 in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
1717 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) &&
1718 in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) {
1719 ifp = im6o_multicast_ifp;
1720 in6->s6_addr16[1] = htons(ifp->if_index);
1721 } else if (scopeid) {
1722 /*
1723 * Since scopeid is unsigned, we only have to check it
1724 * against if_index
1725 */
1726 if (if_index < scopeid) {
1727 ifnet_head_done();
1728 return ENXIO; /* XXX EINVAL? */
1729
1730 }
1731 ifp = ifindex2ifnet[scopeid];
1732 /*XXX assignment to 16bit from 32bit variable */
1733 in6->s6_addr16[1] = htons(scopeid & 0xffff);
1734 }
1735 ifnet_head_done();
1736
1737 if (ifpp != NULL) {
1738 if (ifp != NULL)
1739 ifnet_reference(ifp); /* for caller */
1740 *ifpp = ifp;
1741 }
1742 }
1743
1744 return 0;
1745 }
1746
1747 /*
1748 * generate standard sockaddr_in6 from embedded form.
1749 * touches sin6_addr and sin6_scope_id only.
1750 *
1751 * this function should be nuked in the future, when we get rid of
1752 * embedded scopeid thing.
1753 */
1754 int
1755 in6_recoverscope(
1756 struct sockaddr_in6 *sin6,
1757 const struct in6_addr *in6,
1758 struct ifnet *ifp)
1759 {
1760 u_int32_t scopeid;
1761
1762 sin6->sin6_addr = *in6;
1763
1764 /*
1765 * don't try to read *in6 beyond here, since the caller may
1766 * ask us to overwrite existing sockaddr_in6
1767 */
1768
1769 sin6->sin6_scope_id = 0;
1770 if (IN6_IS_SCOPE_LINKLOCAL(in6)) {
1771 /*
1772 * KAME assumption: link id == interface id
1773 */
1774 scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]);
1775 if (scopeid) {
1776 /*
1777 * sanity check
1778 *
1779 * Since scopeid is unsigned, we only have to check it
1780 * against if_index
1781 */
1782 if (if_index < scopeid)
1783 return ENXIO;
1784 if (ifp && ifp->if_index != scopeid)
1785 return ENXIO;
1786 sin6->sin6_addr.s6_addr16[1] = 0;
1787 sin6->sin6_scope_id = scopeid;
1788 }
1789 }
1790
1791 return 0;
1792 }