]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet6/in6_src.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / bsd / netinet6 / in6_src.c
CommitLineData
b0d623f7 1/*
39037602 2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
b0d623f7
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
b0d623f7
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
b0d623f7
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
b0d623f7
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
1c79356b
A
29/*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58/*
59 * Copyright (c) 1982, 1986, 1991, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
91 */
92
1c79356b
A
93
94#include <sys/param.h>
95#include <sys/systm.h>
96#include <sys/malloc.h>
97#include <sys/mbuf.h>
98#include <sys/protosw.h>
99#include <sys/socket.h>
100#include <sys/socketvar.h>
1c79356b
A
101#include <sys/errno.h>
102#include <sys/time.h>
103#include <sys/proc.h>
6d2010ae
A
104#include <sys/sysctl.h>
105#include <sys/kauth.h>
106#include <sys/priv.h>
fe8ab488 107#include <kern/locks.h>
1c79356b
A
108
109#include <net/if.h>
6d2010ae 110#include <net/if_types.h>
1c79356b
A
111#include <net/route.h>
112
113#include <netinet/in.h>
114#include <netinet/in_var.h>
115#include <netinet/in_systm.h>
116#include <netinet/ip.h>
117#include <netinet/in_pcb.h>
118#include <netinet6/in6_var.h>
119#include <netinet/ip6.h>
1c79356b 120#include <netinet6/in6_pcb.h>
1c79356b 121#include <netinet6/ip6_var.h>
6d2010ae 122#include <netinet6/scope6_var.h>
1c79356b
A
123#include <netinet6/nd6.h>
124
125#include <net/net_osdep.h>
126
1c79356b 127#include "loop.h"
1c79356b 128
6d2010ae
A
129SYSCTL_DECL(_net_inet6_ip6);
130
131static int ip6_select_srcif_debug = 0;
132SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug,
39236c6e
A
133 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0,
134 "log source interface selection debug info");
6d2010ae 135
3e170ce0
A
136static int ip6_select_srcaddr_debug = 0;
137SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcaddr_debug,
138 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcaddr_debug, 0,
139 "log source address selection debug info");
140
141static int ip6_select_src_expensive_secondary_if = 0;
142SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_src_expensive_secondary_if,
143 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_src_expensive_secondary_if, 0,
144 "allow source interface selection to use expensive secondaries");
145
5ba3f43e
A
146static int ip6_select_src_strong_end = 1;
147SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_src_strong_end,
148 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_src_strong_end, 0,
149 "limit source address selection to outgoing interface");
150
39236c6e 151#define ADDR_LABEL_NOTAPP (-1)
6d2010ae
A
152struct in6_addrpolicy defaultaddrpolicy;
153
154int ip6_prefer_tempaddr = 1;
155#ifdef ENABLE_ADDRSEL
156extern lck_mtx_t *addrsel_mutex;
157#define ADDRSEL_LOCK() lck_mtx_lock(addrsel_mutex)
158#define ADDRSEL_UNLOCK() lck_mtx_unlock(addrsel_mutex)
159#else
160#define ADDRSEL_LOCK()
161#define ADDRSEL_UNLOCK()
162#endif
163
164static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
39236c6e
A
165 struct ip6_pktopts *, struct ip6_moptions *, struct in6_ifaddr **,
166 struct route_in6 *, struct ifnet **, struct rtentry **, int, int,
167 struct ip6_out_args *ip6oa);
6d2010ae 168static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
316670eb 169 struct ip6_moptions *, struct route_in6 *ro,
39236c6e 170 struct ip6_out_args *, struct ifnet **);
6d2010ae
A
171static void init_policy_queue(void);
172static int add_addrsel_policyent(const struct in6_addrpolicy *);
173#ifdef ENABLE_ADDRSEL
174static int delete_addrsel_policyent(const struct in6_addrpolicy *);
175#endif
176static int walk_addrsel_policy(int (*)(const struct in6_addrpolicy *, void *),
177 void *);
178static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *);
179static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
180void addrsel_policy_init(void);
181
3e170ce0
A
182#define SASEL_DO_DBG(inp) \
183 (ip6_select_srcaddr_debug && (inp) != NULL && \
184 (inp)->inp_socket != NULL && \
185 ((inp)->inp_socket->so_options & SO_DEBUG))
186
187#define SASEL_LOG(fmt, ...) \
188do { \
5ba3f43e 189 if (srcsel_debug) \
3e170ce0
A
190 printf("%s:%d " fmt "\n",\
191 __FUNCTION__, __LINE__, ##__VA_ARGS__); \
192} while (0); \
193
1c79356b 194/*
9bccf70c 195 * Return an IPv6 address, which is the most appropriate for a given
1c79356b 196 * destination and user specified options.
9bccf70c 197 * If necessary, this function lookups the routing table and returns
1c79356b
A
198 * an entry to the caller for later use.
199 */
39236c6e 200#define REPLACE(r) do {\
3e170ce0
A
201 SASEL_LOG("REPLACE r %d ia %s ifp1 %s\n", \
202 (r), s_src, ifp1->if_xname); \
203 srcrule = (r); \
6d2010ae 204 goto replace; \
39236c6e 205} while (0)
3e170ce0 206
39236c6e 207#define NEXTSRC(r) do {\
3e170ce0
A
208 SASEL_LOG("NEXTSRC r %d ia %s ifp1 %s\n", \
209 (r), s_src, ifp1->if_xname); \
6d2010ae 210 goto next; /* XXX: we can't use 'continue' here */ \
39236c6e 211} while (0)
3e170ce0 212
39236c6e 213#define BREAK(r) do { \
3e170ce0
A
214 SASEL_LOG("BREAK r %d ia %s ifp1 %s\n", \
215 (r), s_src, ifp1->if_xname); \
216 srcrule = (r); \
6d2010ae 217 goto out; /* XXX: we can't use 'break' here */ \
39236c6e 218} while (0)
6d2010ae 219
1c79356b 220struct in6_addr *
5ba3f43e
A
221in6_selectsrc_core(struct sockaddr_in6 *dstsock, uint32_t hint_mask,
222 struct ifnet *ifp, int srcsel_debug, struct in6_addr *src_storage,
223 struct ifnet **sifp, int *errorp, struct ifaddr **ifapp)
1c79356b 224{
5ba3f43e
A
225 u_int32_t odstzone;
226 int bestrule = IP6S_SRCRULE_0;
227 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
6d2010ae 228 struct in6_addr dst;
6d2010ae 229 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
5ba3f43e
A
230 char s_src[MAX_IPv6_STR_LEN] = {0};
231 char s_dst[MAX_IPv6_STR_LEN] = {0};
232 const struct in6_addr *tmp = NULL;
6d2010ae 233 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
39236c6e 234 uint64_t secs = net_uptime();
5ba3f43e
A
235 VERIFY(dstsock != NULL);
236 VERIFY(src_storage != NULL);
237 VERIFY(ifp != NULL);
6d2010ae 238
5ba3f43e
A
239 if (sifp != NULL)
240 *sifp = NULL;
6d2010ae 241
5ba3f43e
A
242 if (ifapp != NULL)
243 *ifapp = NULL;
6d2010ae 244
5ba3f43e 245 dst = dstsock->sin6_addr; /* make a copy for local operation */
6d2010ae 246
5ba3f43e 247 if (srcsel_debug) {
3e170ce0
A
248 (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src));
249
250 tmp = &in6addr_any;
251 (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src));
5ba3f43e
A
252 printf("%s out src %s dst %s ifp %s\n",
253 __func__, s_src, s_dst, ifp->if_xname);
3e170ce0
A
254 }
255
6d2010ae
A
256 *errorp = in6_setscope(&dst, ifp, &odstzone);
257 if (*errorp != 0) {
316670eb
A
258 src_storage = NULL;
259 goto done;
6d2010ae 260 }
6d2010ae 261
5ba3f43e 262 lck_rw_lock_shared(&in6_ifaddr_rwlock);
6d2010ae
A
263 for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
264 int new_scope = -1, new_matchlen = -1;
265 struct in6_addrpolicy *new_policy = NULL;
5ba3f43e 266 u_int32_t srczone = 0, osrczone, dstzone;
6d2010ae
A
267 struct in6_addr src;
268 struct ifnet *ifp1 = ia->ia_ifp;
3e170ce0
A
269 int srcrule;
270
5ba3f43e 271 if (srcsel_debug)
3e170ce0 272 (void) inet_ntop(AF_INET6, &ia->ia_addr.sin6_addr,
5ba3f43e 273 s_src, sizeof (s_src));
6d2010ae
A
274
275 IFA_LOCK(&ia->ia_ifa);
5ba3f43e
A
276
277 /*
278 * XXX By default we are strong end system and will
279 * limit candidate set of source address to the ones
280 * configured on the outgoing interface.
281 */
282 if (ip6_select_src_strong_end &&
283 ifp1 != ifp) {
284 SASEL_LOG("NEXT ia %s ifp1 %s address is not on outgoing "
285 "interface \n", s_src, ifp1->if_xname);
286 goto next;
287 }
288
6d2010ae
A
289 /*
290 * We'll never take an address that breaks the scope zone
5ba3f43e 291 * of the destination. We also skip an address if its zone
6d2010ae
A
292 * does not contain the outgoing interface.
293 * XXX: we should probably use sin6_scope_id here.
294 */
295 if (in6_setscope(&dst, ifp1, &dstzone) ||
3e170ce0
A
296 odstzone != dstzone) {
297 SASEL_LOG("NEXT ia %s ifp1 %s odstzone %d != dstzone %d\n",
298 s_src, ifp1->if_xname, odstzone, dstzone);
6d2010ae 299 goto next;
3e170ce0 300 }
6d2010ae
A
301 src = ia->ia_addr.sin6_addr;
302 if (in6_setscope(&src, ifp, &osrczone) ||
303 in6_setscope(&src, ifp1, &srczone) ||
3e170ce0
A
304 osrczone != srczone) {
305 SASEL_LOG("NEXT ia %s ifp1 %s osrczone %d != srczone %d\n",
306 s_src, ifp1->if_xname, osrczone, srczone);
6d2010ae 307 goto next;
3e170ce0 308 }
6d2010ae
A
309 /* avoid unusable addresses */
310 if ((ia->ia6_flags &
3e170ce0
A
311 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
312 SASEL_LOG("NEXT ia %s ifp1 %s ia6_flags 0x%x\n",
313 s_src, ifp1->if_xname, ia->ia6_flags);
6d2010ae 314 goto next;
3e170ce0
A
315 }
316 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs)) {
317 SASEL_LOG("NEXT ia %s ifp1 %s IFA6_IS_DEPRECATED\n",
318 s_src, ifp1->if_xname);
6d2010ae 319 goto next;
3e170ce0 320 }
316670eb 321 if (!nd6_optimistic_dad &&
3e170ce0
A
322 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) {
323 SASEL_LOG("NEXT ia %s ifp1 %s IN6_IFF_OPTIMISTIC\n",
324 s_src, ifp1->if_xname);
316670eb 325 goto next;
3e170ce0 326 }
6d2010ae
A
327 /* Rule 1: Prefer same address */
328 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
fe8ab488 329 BREAK(IP6S_SRCRULE_1); /* there should be no better candidate */
6d2010ae
A
330
331 if (ia_best == NULL)
fe8ab488 332 REPLACE(IP6S_SRCRULE_0);
6d2010ae
A
333
334 /* Rule 2: Prefer appropriate scope */
335 if (dst_scope < 0)
336 dst_scope = in6_addrscope(&dst);
337 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
338 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
339 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
fe8ab488
A
340 REPLACE(IP6S_SRCRULE_2);
341 NEXTSRC(IP6S_SRCRULE_2);
6d2010ae
A
342 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
343 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
fe8ab488
A
344 NEXTSRC(IP6S_SRCRULE_2);
345 REPLACE(IP6S_SRCRULE_2);
b0d623f7 346 }
6d2010ae
A
347
348 /*
349 * Rule 3: Avoid deprecated addresses. Note that the case of
350 * !ip6_use_deprecated is already rejected above.
351 */
39236c6e
A
352 if (!IFA6_IS_DEPRECATED(ia_best, secs) &&
353 IFA6_IS_DEPRECATED(ia, secs))
fe8ab488 354 NEXTSRC(IP6S_SRCRULE_3);
39236c6e
A
355 if (IFA6_IS_DEPRECATED(ia_best, secs) &&
356 !IFA6_IS_DEPRECATED(ia, secs))
fe8ab488 357 REPLACE(IP6S_SRCRULE_3);
6d2010ae 358
316670eb
A
359 /*
360 * RFC 4429 says that optimistic addresses are equivalent to
361 * deprecated addresses, so avoid them here.
362 */
363 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) == 0 &&
364 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
fe8ab488 365 NEXTSRC(IP6S_SRCRULE_3);
316670eb
A
366 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) != 0 &&
367 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0)
fe8ab488 368 REPLACE(IP6S_SRCRULE_3);
316670eb 369
6d2010ae
A
370 /* Rule 4: Prefer home addresses */
371 /*
372 * XXX: This is a TODO. We should probably merge the MIP6
373 * case above.
374 */
375
376 /* Rule 5: Prefer outgoing interface */
5ba3f43e
A
377 /*
378 * XXX By default we are strong end with source address
379 * selection. That means all address selection candidate
380 * addresses will be the ones hosted on the outgoing interface
381 * making the following check redundant.
382 */
383 if (ip6_select_src_strong_end == 0) {
384 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
385 NEXTSRC(IP6S_SRCRULE_5);
386 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
387 REPLACE(IP6S_SRCRULE_5);
fe8ab488 388 }
6d2010ae
A
389
390 /*
391 * Rule 6: Prefer matching label
392 * Note that best_policy should be non-NULL here.
393 */
394 if (dst_policy == NULL)
395 dst_policy = in6_addrsel_lookup_policy(dstsock);
396 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
397 new_policy = in6_addrsel_lookup_policy(&ia->ia_addr);
398 if (dst_policy->label == best_policy->label &&
399 dst_policy->label != new_policy->label)
fe8ab488 400 NEXTSRC(IP6S_SRCRULE_6);
6d2010ae
A
401 if (dst_policy->label != best_policy->label &&
402 dst_policy->label == new_policy->label)
fe8ab488 403 REPLACE(IP6S_SRCRULE_6);
1c79356b 404 }
6d2010ae
A
405
406 /*
fe8ab488 407 * Rule 7: Prefer temporary addresses.
6d2010ae 408 * We allow users to reverse the logic by configuring
fe8ab488
A
409 * a sysctl variable, so that transparency conscious users can
410 * always prefer stable addresses.
6d2010ae 411 */
6d2010ae
A
412 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
413 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
5ba3f43e 414 if (hint_mask & IPV6_SRCSEL_HINT_PREFER_TMPADDR)
fe8ab488 415 REPLACE(IP6S_SRCRULE_7);
6d2010ae 416 else
fe8ab488 417 NEXTSRC(IP6S_SRCRULE_7);
6d2010ae
A
418 }
419 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
420 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
5ba3f43e 421 if (hint_mask & IPV6_SRCSEL_HINT_PREFER_TMPADDR)
fe8ab488 422 NEXTSRC(IP6S_SRCRULE_7);
6d2010ae 423 else
fe8ab488 424 REPLACE(IP6S_SRCRULE_7);
6d2010ae
A
425 }
426
427 /*
fe8ab488 428 * Rule 7x: prefer addresses on alive interfaces.
6d2010ae
A
429 * This is a KAME specific rule.
430 */
431 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
432 !(ia->ia_ifp->if_flags & IFF_UP))
fe8ab488 433 NEXTSRC(IP6S_SRCRULE_7x);
6d2010ae
A
434 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
435 (ia->ia_ifp->if_flags & IFF_UP))
fe8ab488 436 REPLACE(IP6S_SRCRULE_7x);
6d2010ae
A
437
438 /*
fe8ab488 439 * Rule 8: Use longest matching prefix.
6d2010ae
A
440 */
441 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
442 if (best_matchlen < new_matchlen)
fe8ab488 443 REPLACE(IP6S_SRCRULE_8);
6d2010ae 444 if (new_matchlen < best_matchlen)
fe8ab488 445 NEXTSRC(IP6S_SRCRULE_8);
6d2010ae
A
446
447 /*
448 * Last resort: just keep the current candidate.
449 * Or, do we need more rules?
450 */
3e170ce0
A
451 if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) &&
452 ip6_select_src_expensive_secondary_if == 0) {
453 SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n",
454 s_src, ifp1->if_xname);
5ba3f43e 455 ip6stat.ip6s_sources_skip_expensive_secondary_if++;
3e170ce0
A
456 goto next;
457 }
458 SASEL_LOG("NEXT ia %s ifp1 %s last resort\n",
459 s_src, ifp1->if_xname);
6d2010ae
A
460 IFA_UNLOCK(&ia->ia_ifa);
461 continue;
462
463replace:
3e170ce0
A
464 /*
465 * Ignore addresses on secondary interfaces that are marked
466 * expensive
467 */
468 if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) &&
469 ip6_select_src_expensive_secondary_if == 0) {
470 SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n",
471 s_src, ifp1->if_xname);
5ba3f43e 472 ip6stat.ip6s_sources_skip_expensive_secondary_if++;
3e170ce0
A
473 goto next;
474 }
475 bestrule = srcrule;
6d2010ae 476 best_scope = (new_scope >= 0 ? new_scope :
39236c6e 477 in6_addrscope(&ia->ia_addr.sin6_addr));
6d2010ae 478 best_policy = (new_policy ? new_policy :
39236c6e 479 in6_addrsel_lookup_policy(&ia->ia_addr));
6d2010ae 480 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
39236c6e 481 in6_matchlen(&ia->ia_addr.sin6_addr, &dst));
3e170ce0
A
482 SASEL_LOG("NEXT ia %s ifp1 %s best_scope %d new_scope %d dst_scope %d\n",
483 s_src, ifp1->if_xname, best_scope, new_scope, dst_scope);
5ba3f43e 484 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
6d2010ae
A
485 IFA_UNLOCK(&ia->ia_ifa);
486 if (ia_best != NULL)
487 IFA_REMREF(&ia_best->ia_ifa);
488 ia_best = ia;
489 continue;
490
491next:
492 IFA_UNLOCK(&ia->ia_ifa);
493 continue;
494
495out:
5ba3f43e 496 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
6d2010ae
A
497 IFA_UNLOCK(&ia->ia_ifa);
498 if (ia_best != NULL)
499 IFA_REMREF(&ia_best->ia_ifa);
500 ia_best = ia;
501 break;
502 }
503
504 lck_rw_done(&in6_ifaddr_rwlock);
505
316670eb 506 if ((ia = ia_best) == NULL) {
39236c6e
A
507 if (*errorp == 0)
508 *errorp = EADDRNOTAVAIL;
316670eb
A
509 src_storage = NULL;
510 goto done;
1c79356b
A
511 }
512
5ba3f43e
A
513 if (sifp != NULL) {
514 *sifp = ia->ia_ifa.ifa_ifp;
515 ifnet_reference(*sifp);
516 }
517
6d2010ae 518 IFA_LOCK_SPIN(&ia->ia_ifa);
3e170ce0
A
519 if (bestrule < IP6S_SRCRULE_COUNT)
520 ip6stat.ip6s_sources_rule[bestrule]++;
6d2010ae
A
521 *src_storage = satosin6(&ia->ia_addr)->sin6_addr;
522 IFA_UNLOCK(&ia->ia_ifa);
5ba3f43e
A
523
524 if (ifapp != NULL)
525 *ifapp = &ia->ia_ifa;
526 else
527 IFA_REMREF(&ia->ia_ifa);
528
316670eb 529done:
5ba3f43e 530 if (srcsel_debug) {
3e170ce0
A
531 (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src));
532
533 tmp = (src_storage != NULL) ? src_storage : &in6addr_any;
534 (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src));
39037602 535
5ba3f43e
A
536 printf("%s out src %s dst %s dst_scope %d best_scope %d\n",
537 __func__, s_src, s_dst, dst_scope, best_scope);
3e170ce0 538 }
5ba3f43e
A
539
540 return (src_storage);
541}
542
543/*
544 * Regardless of error, it will return an ifp with a reference held if the
545 * caller provides a non-NULL ifpp. The caller is responsible for checking
546 * if the returned ifp is valid and release its reference at all times.
547 */
548struct in6_addr *
549in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
550 struct inpcb *inp, struct route_in6 *ro,
551 struct ifnet **ifpp, struct in6_addr *src_storage, unsigned int ifscope,
552 int *errorp)
553{
554 struct ifnet *ifp = NULL;
555 struct in6_pktinfo *pi = NULL;
556 struct ip6_moptions *mopts;
557 struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF, 0,
558 SO_TC_UNSPEC, _NET_SERVICE_TYPE_UNSPEC };
559 boolean_t inp_debug = FALSE;
560 uint32_t hint_mask = 0;
561 int prefer_tempaddr = 0;
562 struct ifnet *sifp = NULL;
563
564 *errorp = 0;
565 if (ifpp != NULL)
566 *ifpp = NULL;
567
568 if (inp != NULL) {
569 inp_debug = SASEL_DO_DBG(inp);
570 mopts = inp->in6p_moptions;
571 if (INP_NO_CELLULAR(inp))
572 ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
573 if (INP_NO_EXPENSIVE(inp))
574 ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE;
575 if (INP_AWDL_UNRESTRICTED(inp))
576 ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED;
577 if (INP_INTCOPROC_ALLOWED(inp))
578 ip6oa.ip6oa_flags |= IP6OAF_INTCOPROC_ALLOWED;
579 } else {
580 mopts = NULL;
581 /* Allow the kernel to retransmit packets. */
582 ip6oa.ip6oa_flags |= IP6OAF_INTCOPROC_ALLOWED |
583 IP6OAF_AWDL_UNRESTRICTED;
584 }
585
586 if (ip6oa.ip6oa_boundif != IFSCOPE_NONE)
587 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
588
589 /*
590 * If the source address is explicitly specified by the caller,
591 * check if the requested source address is indeed a unicast address
592 * assigned to the node, and can be used as the packet's source
593 * address. If everything is okay, use the address as source.
594 */
595 if (opts && (pi = opts->ip6po_pktinfo) &&
596 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
597 struct sockaddr_in6 srcsock;
598 struct in6_ifaddr *ia6;
599
600 /* get the outgoing interface */
601 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
602 &ifp)) != 0) {
603 src_storage = NULL;
604 goto done;
605 }
606
607 /*
608 * determine the appropriate zone id of the source based on
609 * the zone of the destination and the outgoing interface.
610 * If the specified address is ambiguous wrt the scope zone,
611 * the interface must be specified; otherwise, ifa_ifwithaddr()
612 * will fail matching the address.
613 */
614 bzero(&srcsock, sizeof (srcsock));
615 srcsock.sin6_family = AF_INET6;
616 srcsock.sin6_len = sizeof (srcsock);
617 srcsock.sin6_addr = pi->ipi6_addr;
618 if (ifp != NULL) {
619 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
620 if (*errorp != 0) {
621 src_storage = NULL;
622 goto done;
623 }
624 }
625 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)
626 (&srcsock));
627 if (ia6 == NULL) {
628 *errorp = EADDRNOTAVAIL;
629 src_storage = NULL;
630 goto done;
631 }
632 IFA_LOCK_SPIN(&ia6->ia_ifa);
633 if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) ||
634 (inp && inp_restricted_send(inp, ia6->ia_ifa.ifa_ifp))) {
635 IFA_UNLOCK(&ia6->ia_ifa);
636 IFA_REMREF(&ia6->ia_ifa);
637 *errorp = EHOSTUNREACH;
638 src_storage = NULL;
639 goto done;
640 }
641
642 *src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
643 IFA_UNLOCK(&ia6->ia_ifa);
644 IFA_REMREF(&ia6->ia_ifa);
645 goto done;
646 }
647
648 /*
649 * Otherwise, if the socket has already bound the source, just use it.
650 */
651 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
652 src_storage = &inp->in6p_laddr;
653 goto done;
654 }
655
656 /*
657 * If the address is not specified, choose the best one based on
658 * the outgoing interface and the destination address.
659 */
660 /* get the outgoing interface */
661 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
662 &ifp)) != 0) {
663 src_storage = NULL;
664 goto done;
665 }
666
667 VERIFY(ifp != NULL);
668
669 if (opts == NULL ||
670 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
671 prefer_tempaddr = ip6_prefer_tempaddr;
672 } else if (opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_NOTPREFER) {
673 prefer_tempaddr = 0;
674 } else
675 prefer_tempaddr = 1;
676
677 if (prefer_tempaddr)
678 hint_mask |= IPV6_SRCSEL_HINT_PREFER_TMPADDR;
679
680 if (in6_selectsrc_core(dstsock, hint_mask, ifp, inp_debug, src_storage,
681 &sifp, errorp, NULL) == NULL) {
682 src_storage = NULL;
683 goto done;
684 }
685
686 VERIFY(sifp != NULL);
687
688 if (inp && inp_restricted_send(inp, sifp)) {
689 src_storage = NULL;
690 *errorp = EHOSTUNREACH;
691 ifnet_release(sifp);
692 goto done;
693 } else {
694 ifnet_release(sifp);
695 }
696
697done:
6d2010ae
A
698 if (ifpp != NULL) {
699 /* if ifp is non-NULL, refcnt held in in6_selectif() */
700 *ifpp = ifp;
701 } else if (ifp != NULL) {
702 ifnet_release(ifp);
703 }
704 return (src_storage);
705}
706
707/*
708 * Given a source IPv6 address (and route, if available), determine the best
709 * interface to send the packet from. Checking for (and updating) the
710 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
711 * without any locks, based on the assumption that in the event this is
712 * called from ip6_output(), the output operation is single-threaded per-pcb,
713 * i.e. for any given pcb there can only be one thread performing output at
714 * the IPv6 layer.
715 *
316670eb
A
716 * This routine is analogous to in_selectsrcif() for IPv4. Regardless of
717 * error, it will return an ifp with a reference held if the caller provides
718 * a non-NULL retifp. The caller is responsible for checking if the
719 * returned ifp is valid and release its reference at all times.
6d2010ae
A
720 *
721 * clone - meaningful only for bsdi and freebsd
722 */
723static int
724selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
39236c6e
A
725 struct ip6_pktopts *opts, struct ip6_moptions *mopts,
726 struct in6_ifaddr **retsrcia, struct route_in6 *ro,
6d2010ae 727 struct ifnet **retifp, struct rtentry **retrt, int clone,
39236c6e 728 int norouteok, struct ip6_out_args *ip6oa)
6d2010ae
A
729{
730 int error = 0;
316670eb 731 struct ifnet *ifp = NULL, *ifp0 = NULL;
6d2010ae
A
732 struct route_in6 *route = NULL;
733 struct sockaddr_in6 *sin6_next;
734 struct in6_pktinfo *pi = NULL;
735 struct in6_addr *dst = &dstsock->sin6_addr;
736 struct ifaddr *ifa = NULL;
737 char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
39236c6e
A
738 boolean_t select_srcif, proxied_ifa = FALSE, local_dst = FALSE;
739 unsigned int ifscope = ((ip6oa != NULL) ?
740 ip6oa->ip6oa_boundif : IFSCOPE_NONE);
6d2010ae 741
6d2010ae
A
742 if (retifp != NULL)
743 *retifp = NULL;
744
745 if (retrt != NULL)
746 *retrt = NULL;
747
748 if (ip6_select_srcif_debug) {
749 struct in6_addr src;
750 src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any;
751 (void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src));
752 (void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst));
753 }
754
755 /*
756 * If the destination address is UNSPECIFIED addr, bail out.
757 */
758 if (IN6_IS_ADDR_UNSPECIFIED(dst)) {
759 error = EHOSTUNREACH;
760 goto done;
761 }
762
763 /*
764 * Perform source interface selection only if Scoped Routing
765 * is enabled and a source address that isn't unspecified.
766 */
39037602 767 select_srcif = (srcsock != NULL &&
6d2010ae
A
768 !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr));
769
3e170ce0 770 if (ip6_select_srcif_debug) {
39037602 771 printf("%s src %s dst %s ifscope %d select_srcif %d\n",
3e170ce0
A
772 __func__, s_src, s_dst, ifscope, select_srcif);
773 }
6d2010ae
A
774
775 /* If the caller specified the outgoing interface explicitly, use it */
776 if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL &&
777 pi->ipi6_ifindex != 0) {
1c79356b 778 /*
6d2010ae 779 * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF.
1c79356b 780 */
6d2010ae 781 ifscope = pi->ipi6_ifindex;
b0d623f7 782 ifnet_head_lock_shared();
6d2010ae 783 /* ifp may be NULL if detached or out of range */
316670eb
A
784 ifp = ifp0 =
785 ((ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL);
6d2010ae
A
786 ifnet_head_done();
787 if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) {
788 /*
789 * We do not have to check or get the route for
790 * multicast. If the caller didn't ask/care for
791 * the route and we have no interface to use,
792 * it's an error.
793 */
794 if (ifp == NULL)
795 error = EHOSTUNREACH;
796 goto done;
b0d623f7 797 } else {
6d2010ae 798 goto getsrcif;
1c79356b 799 }
6d2010ae 800 }
b0d623f7 801
6d2010ae
A
802 /*
803 * If the destination address is a multicast address and the outgoing
804 * interface for the address is specified by the caller, use it.
805 */
806 if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
807 IM6O_LOCK(mopts);
316670eb 808 if ((ifp = ifp0 = mopts->im6o_multicast_ifp) != NULL) {
6d2010ae
A
809 IM6O_UNLOCK(mopts);
810 goto done; /* we do not need a route for multicast. */
1c79356b 811 }
6d2010ae
A
812 IM6O_UNLOCK(mopts);
813 }
814
815getsrcif:
816 /*
817 * If the outgoing interface was not set via IPV6_BOUND_IF or
818 * IPV6_PKTINFO, use the scope ID in the destination address.
819 */
39037602 820 if (ifscope == IFSCOPE_NONE)
6d2010ae
A
821 ifscope = dstsock->sin6_scope_id;
822
823 /*
824 * Perform source interface selection; the source IPv6 address
825 * must belong to one of the addresses of the interface used
826 * by the route. For performance reasons, do this only if
827 * there is no route, or if the routing table has changed,
828 * or if we haven't done source interface selection on this
829 * route (for this PCB instance) before.
830 */
39236c6e
A
831 if (!select_srcif) {
832 goto getroute;
833 } else if (!ROUTE_UNUSABLE(ro) && ro->ro_srcia != NULL &&
834 (ro->ro_flags & ROF_SRCIF_SELECTED)) {
835 if (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
836 local_dst = TRUE;
837 ifa = ro->ro_srcia;
838 IFA_ADDREF(ifa); /* for caller */
6d2010ae 839 goto getroute;
1c79356b
A
840 }
841
842 /*
6d2010ae
A
843 * Given the source IPv6 address, find a suitable source interface
844 * to use for transmission; if a scope ID has been specified,
845 * optimize the search by looking at the addresses only for that
846 * interface. This is still suboptimal, however, as we need to
847 * traverse the per-interface list.
1c79356b 848 */
6d2010ae
A
849 if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) {
850 unsigned int scope = ifscope;
851 struct ifnet *rt_ifp;
852
853 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
1c79356b 854
6d2010ae
A
855 /*
856 * If no scope is specified and the route is stale (pointing
857 * to a defunct interface) use the current primary interface;
858 * this happens when switching between interfaces configured
859 * with the same IPv6 address. Otherwise pick up the scope
860 * information from the route; the ULP may have looked up a
861 * correct route and we just need to verify it here and mark
862 * it with the ROF_SRCIF_SELECTED flag below.
863 */
864 if (scope == IFSCOPE_NONE) {
865 scope = rt_ifp->if_index;
866 if (scope != get_primary_ifscope(AF_INET6) &&
39236c6e 867 ROUTE_UNUSABLE(ro))
6d2010ae 868 scope = get_primary_ifscope(AF_INET6);
1c79356b
A
869 }
870
6d2010ae
A
871 ifa = (struct ifaddr *)
872 ifa_foraddr6_scoped(&srcsock->sin6_addr, scope);
873
316670eb
A
874 /*
875 * If we are forwarding and proxying prefix(es), see if the
876 * source address is one of ours and is a proxied address;
877 * if so, use it.
878 */
879 if (ifa == NULL && ip6_forwarding && nd6_prproxy) {
880 ifa = (struct ifaddr *)
881 ifa_foraddr6(&srcsock->sin6_addr);
882 if (ifa != NULL && !(proxied_ifa =
883 nd6_prproxy_ifaddr((struct in6_ifaddr *)ifa))) {
884 IFA_REMREF(ifa);
885 ifa = NULL;
886 }
887 }
888
6d2010ae
A
889 if (ip6_select_srcif_debug && ifa != NULL) {
890 if (ro->ro_rt != NULL) {
3e170ce0
A
891 printf("%s %s->%s ifscope %d->%d ifa_if %s "
892 "ro_if %s\n",
39037602 893 __func__,
3e170ce0 894 s_src, s_dst, ifscope,
6d2010ae
A
895 scope, if_name(ifa->ifa_ifp),
896 if_name(rt_ifp));
897 } else {
3e170ce0 898 printf("%s %s->%s ifscope %d->%d ifa_if %s\n",
39037602 899 __func__,
6d2010ae
A
900 s_src, s_dst, ifscope, scope,
901 if_name(ifa->ifa_ifp));
1c79356b 902 }
1c79356b
A
903 }
904 }
905
906 /*
6d2010ae
A
907 * Slow path; search for an interface having the corresponding source
908 * IPv6 address if the scope was not specified by the caller, and:
909 *
910 * 1) There currently isn't any route, or,
911 * 2) The interface used by the route does not own that source
912 * IPv6 address; in this case, the route will get blown away
913 * and we'll do a more specific scoped search using the newly
914 * found interface.
1c79356b 915 */
6d2010ae 916 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
39236c6e
A
917 struct ifaddr *ifadst;
918
919 /* Check if the destination address is one of ours */
920 ifadst = (struct ifaddr *)ifa_foraddr6(&dstsock->sin6_addr);
921 if (ifadst != NULL) {
922 local_dst = TRUE;
923 IFA_REMREF(ifadst);
924 }
925
6d2010ae
A
926 ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr);
927
928 if (ip6_select_srcif_debug && ifa != NULL) {
3e170ce0
A
929 printf("%s %s->%s ifscope %d ifa_if %s\n",
930 __func__,
6d2010ae 931 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
3e170ce0
A
932 } else if (ip6_select_srcif_debug) {
933 printf("%s %s->%s ifscope %d ifa_if NULL\n",
934 __func__,
935 s_src, s_dst, ifscope);
6d2010ae 936 }
6d2010ae
A
937 }
938
939getroute:
39236c6e 940 if (ifa != NULL && !proxied_ifa && !local_dst)
6d2010ae
A
941 ifscope = ifa->ifa_ifp->if_index;
942
943 /*
944 * If the next hop address for the packet is specified by the caller,
945 * use it as the gateway.
946 */
947 if (opts != NULL && opts->ip6po_nexthop != NULL) {
948 struct route_in6 *ron;
949
950 sin6_next = satosin6(opts->ip6po_nexthop);
951
952 /* at this moment, we only support AF_INET6 next hops */
953 if (sin6_next->sin6_family != AF_INET6) {
954 error = EAFNOSUPPORT; /* or should we proceed? */
955 goto done;
956 }
957
958 /*
959 * If the next hop is an IPv6 address, then the node identified
960 * by that address must be a neighbor of the sending host.
961 */
962 ron = &opts->ip6po_nextroute;
963 if (ron->ro_rt != NULL)
964 RT_LOCK(ron->ro_rt);
39236c6e
A
965 if (ROUTE_UNUSABLE(ron) || (ron->ro_rt != NULL &&
966 (!(ron->ro_rt->rt_flags & RTF_LLINFO) ||
6d2010ae 967 (select_srcif && (ifa == NULL ||
316670eb 968 (ifa->ifa_ifp != ron->ro_rt->rt_ifp && !proxied_ifa))))) ||
6d2010ae
A
969 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
970 &sin6_next->sin6_addr)) {
39236c6e 971 if (ron->ro_rt != NULL)
6d2010ae 972 RT_UNLOCK(ron->ro_rt);
39236c6e
A
973
974 ROUTE_RELEASE(ron);
6d2010ae
A
975 *satosin6(&ron->ro_dst) = *sin6_next;
976 }
977 if (ron->ro_rt == NULL) {
978 rtalloc_scoped((struct route *)ron, ifscope);
979 if (ron->ro_rt != NULL)
980 RT_LOCK(ron->ro_rt);
39236c6e 981 if (ROUTE_UNUSABLE(ron) ||
6d2010ae
A
982 !(ron->ro_rt->rt_flags & RTF_LLINFO) ||
983 !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))->
984 sin6_addr, &sin6_next->sin6_addr)) {
39236c6e 985 if (ron->ro_rt != NULL)
6d2010ae 986 RT_UNLOCK(ron->ro_rt);
39236c6e
A
987
988 ROUTE_RELEASE(ron);
6d2010ae
A
989 error = EHOSTUNREACH;
990 goto done;
991 }
992 }
993 route = ron;
316670eb 994 ifp = ifp0 = ron->ro_rt->rt_ifp;
6d2010ae
A
995
996 /*
997 * When cloning is required, try to allocate a route to the
998 * destination so that the caller can store path MTU
999 * information.
1000 */
1001 if (!clone) {
1002 if (select_srcif) {
1003 /* Keep the route locked */
1004 goto validateroute;
1c79356b 1005 }
6d2010ae
A
1006 RT_UNLOCK(ron->ro_rt);
1007 goto done;
1c79356b 1008 }
6d2010ae 1009 RT_UNLOCK(ron->ro_rt);
1c79356b
A
1010 }
1011
1c79356b 1012 /*
6d2010ae
A
1013 * Use a cached route if it exists and is valid, else try to allocate
1014 * a new one. Note that we should check the address family of the
1015 * cached destination, in case of sharing the cache with IPv4.
1c79356b 1016 */
6d2010ae
A
1017 if (ro == NULL)
1018 goto done;
1019 if (ro->ro_rt != NULL)
39236c6e
A
1020 RT_LOCK_SPIN(ro->ro_rt);
1021 if (ROUTE_UNUSABLE(ro) || (ro->ro_rt != NULL &&
1022 (satosin6(&ro->ro_dst)->sin6_family != AF_INET6 ||
6d2010ae
A
1023 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) ||
1024 (select_srcif && (ifa == NULL ||
39236c6e
A
1025 (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa)))))) {
1026 if (ro->ro_rt != NULL)
1027 RT_UNLOCK(ro->ro_rt);
1028
1029 ROUTE_RELEASE(ro);
6d2010ae
A
1030 }
1031 if (ro->ro_rt == NULL) {
1032 struct sockaddr_in6 *sa6;
1033
b0d623f7 1034 if (ro->ro_rt != NULL)
b0d623f7 1035 RT_UNLOCK(ro->ro_rt);
6d2010ae 1036 /* No route yet, so try to acquire one */
39236c6e 1037 bzero(&ro->ro_dst, sizeof (struct sockaddr_in6));
6d2010ae
A
1038 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
1039 sa6->sin6_family = AF_INET6;
39236c6e 1040 sa6->sin6_len = sizeof (struct sockaddr_in6);
6d2010ae
A
1041 sa6->sin6_addr = *dst;
1042 if (IN6_IS_ADDR_MULTICAST(dst)) {
1043 ro->ro_rt = rtalloc1_scoped(
1044 &((struct route *)ro)->ro_dst, 0, 0, ifscope);
1045 } else {
1046 rtalloc_scoped((struct route *)ro, ifscope);
1c79356b 1047 }
6d2010ae 1048 if (ro->ro_rt != NULL)
39236c6e 1049 RT_LOCK_SPIN(ro->ro_rt);
6d2010ae 1050 }
1c79356b 1051
6d2010ae
A
1052 /*
1053 * Do not care about the result if we have the nexthop
1054 * explicitly specified (in case we're asked to clone.)
1055 */
1056 if (opts != NULL && opts->ip6po_nexthop != NULL) {
1057 if (ro->ro_rt != NULL)
1058 RT_UNLOCK(ro->ro_rt);
1059 goto done;
1060 }
1061
1062 if (ro->ro_rt != NULL) {
1063 RT_LOCK_ASSERT_HELD(ro->ro_rt);
316670eb 1064 ifp = ifp0 = ro->ro_rt->rt_ifp;
6d2010ae
A
1065 } else {
1066 error = EHOSTUNREACH;
1067 }
1068 route = ro;
1069
1070validateroute:
1071 if (select_srcif) {
1072 boolean_t has_route = (route != NULL && route->ro_rt != NULL);
316670eb 1073 boolean_t srcif_selected = FALSE;
6d2010ae
A
1074
1075 if (has_route)
1076 RT_LOCK_ASSERT_HELD(route->ro_rt);
1c79356b 1077 /*
6d2010ae
A
1078 * If there is a non-loopback route with the wrong interface,
1079 * or if there is no interface configured with such an address,
1080 * blow it away. Except for local/loopback, we look for one
1081 * with a matching interface scope/index.
1c79356b 1082 */
6d2010ae
A
1083 if (has_route && (ifa == NULL ||
1084 (ifa->ifa_ifp != ifp && ifp != lo_ifp) ||
1085 !(route->ro_rt->rt_flags & RTF_UP))) {
316670eb
A
1086 /*
1087 * If the destination address belongs to a proxied
1088 * prefix, relax the requirement and allow the packet
1089 * to come out of the proxy interface with the source
1090 * address of the real interface.
1091 */
1092 if (ifa != NULL && proxied_ifa &&
1093 (route->ro_rt->rt_flags & (RTF_UP|RTF_PROXY)) ==
1094 (RTF_UP|RTF_PROXY)) {
1095 srcif_selected = TRUE;
1096 } else {
1097 if (ip6_select_srcif_debug) {
1098 if (ifa != NULL) {
1099 printf("%s->%s ifscope %d "
1100 "ro_if %s != ifa_if %s "
1101 "(cached route cleared)\n",
1102 s_src, s_dst,
1103 ifscope, if_name(ifp),
1104 if_name(ifa->ifa_ifp));
1105 } else {
1106 printf("%s->%s ifscope %d "
1107 "ro_if %s (no ifa_if "
1108 "found)\n", s_src, s_dst,
1109 ifscope, if_name(ifp));
1110 }
6d2010ae 1111 }
316670eb 1112 RT_UNLOCK(route->ro_rt);
39236c6e 1113 ROUTE_RELEASE(route);
316670eb
A
1114 error = EHOSTUNREACH;
1115 /* Undo the settings done above */
1116 route = NULL;
1117 ifp = NULL; /* ditch ifp; keep ifp0 */
1118 has_route = FALSE;
91447636 1119 }
6d2010ae 1120 } else if (has_route) {
316670eb
A
1121 srcif_selected = TRUE;
1122 }
1123
1124 if (srcif_selected) {
1125 VERIFY(has_route);
39236c6e
A
1126 if (ifa != route->ro_srcia ||
1127 !(route->ro_flags & ROF_SRCIF_SELECTED)) {
1128 RT_CONVERT_LOCK(route->ro_rt);
1129 if (ifa != NULL)
1130 IFA_ADDREF(ifa); /* for route_in6 */
1131 if (route->ro_srcia != NULL)
1132 IFA_REMREF(route->ro_srcia);
1133 route->ro_srcia = ifa;
1134 route->ro_flags |= ROF_SRCIF_SELECTED;
1135 RT_GENID_SYNC(route->ro_rt);
1136 }
6d2010ae
A
1137 RT_UNLOCK(route->ro_rt);
1138 }
1139 } else {
1140 if (ro->ro_rt != NULL)
b0d623f7 1141 RT_UNLOCK(ro->ro_rt);
6d2010ae
A
1142 if (ifp != NULL && opts != NULL &&
1143 opts->ip6po_pktinfo != NULL &&
1144 opts->ip6po_pktinfo->ipi6_ifindex != 0) {
1145 /*
1146 * Check if the outgoing interface conflicts with the
1147 * interface specified by ipi6_ifindex (if specified).
1148 * Note that loopback interface is always okay.
1149 * (this may happen when we are sending a packet to
1150 * one of our own addresses.)
1151 */
1152 if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index !=
1153 opts->ip6po_pktinfo->ipi6_ifindex) {
1154 error = EHOSTUNREACH;
1155 goto done;
1156 }
1c79356b 1157 }
6d2010ae
A
1158 }
1159
1160done:
fe8ab488
A
1161 /*
1162 * Check for interface restrictions.
1163 */
1164#define CHECK_RESTRICTIONS(_ip6oa, _ifp) \
1165 ((((_ip6oa)->ip6oa_flags & IP6OAF_NO_CELLULAR) && \
1166 IFNET_IS_CELLULAR(_ifp)) || \
1167 (((_ip6oa)->ip6oa_flags & IP6OAF_NO_EXPENSIVE) && \
1168 IFNET_IS_EXPENSIVE(_ifp)) || \
39037602
A
1169 (!((_ip6oa)->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED) && \
1170 IFNET_IS_INTCOPROC(_ifp)) || \
fe8ab488 1171 (!((_ip6oa)->ip6oa_flags & IP6OAF_AWDL_UNRESTRICTED) && \
39037602 1172 IFNET_IS_AWDL_RESTRICTED(_ifp)))
fe8ab488
A
1173
1174 if (error == 0 && ip6oa != NULL &&
1175 ((ifp && CHECK_RESTRICTIONS(ip6oa, ifp)) ||
39037602 1176 (route && route->ro_rt &&
fe8ab488
A
1177 CHECK_RESTRICTIONS(ip6oa, route->ro_rt->rt_ifp)))) {
1178 if (route != NULL && route->ro_rt != NULL) {
1179 ROUTE_RELEASE(route);
1180 route = NULL;
6d2010ae 1181 }
fe8ab488
A
1182 ifp = NULL; /* ditch ifp; keep ifp0 */
1183 error = EHOSTUNREACH;
1184 ip6oa->ip6oa_retflags |= IP6OARF_IFDENIED;
6d2010ae 1185 }
fe8ab488 1186#undef CHECK_RESTRICTIONS
6d2010ae 1187
39236c6e
A
1188 /*
1189 * If the interface is disabled for IPv6, then ENETDOWN error.
1190 */
1191 if (error == 0 &&
1192 ifp != NULL && (ifp->if_eflags & IFEF_IPV6_DISABLED)) {
1193 error = ENETDOWN;
1194 }
1195
6d2010ae 1196 if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) {
1c79356b 1197 /*
6d2010ae
A
1198 * This can happen if the caller did not pass a cached route
1199 * nor any other hints. We treat this case an error.
1c79356b 1200 */
6d2010ae
A
1201 error = EHOSTUNREACH;
1202 }
39236c6e 1203 if (error == EHOSTUNREACH || error == ENETDOWN)
6d2010ae 1204 ip6stat.ip6s_noroute++;
1c79356b 1205
316670eb
A
1206 /*
1207 * We'll return ifp regardless of error, so pick it up from ifp0
1208 * in case it was nullified above. Caller is responsible for
1209 * releasing the ifp if it is non-NULL.
1210 */
1211 ifp = ifp0;
1212 if (retifp != NULL) {
1213 if (ifp != NULL)
1214 ifnet_reference(ifp); /* for caller */
1215 *retifp = ifp;
1216 }
1217
39236c6e
A
1218 if (retsrcia != NULL) {
1219 if (ifa != NULL)
1220 IFA_ADDREF(ifa); /* for caller */
1221 *retsrcia = (struct in6_ifaddr *)ifa;
1222 }
1223
6d2010ae 1224 if (error == 0) {
6d2010ae
A
1225 if (retrt != NULL && route != NULL)
1226 *retrt = route->ro_rt; /* ro_rt may be NULL */
39037602 1227 }
3e170ce0
A
1228 if (ip6_select_srcif_debug) {
1229 printf("%s %s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
1230 __func__,
6d2010ae
A
1231 s_src, s_dst, ifscope,
1232 (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE",
1233 (ifp != NULL) ? if_name(ifp) : "NONE", error);
1c79356b
A
1234 }
1235
6d2010ae
A
1236 if (ifa != NULL)
1237 IFA_REMREF(ifa);
1238
1239 return (error);
1240}
1241
316670eb
A
1242/*
1243 * Regardless of error, it will return an ifp with a reference held if the
1244 * caller provides a non-NULL retifp. The caller is responsible for checking
1245 * if the returned ifp is valid and release its reference at all times.
1246 */
3e170ce0 1247int
6d2010ae 1248in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
316670eb 1249 struct ip6_moptions *mopts, struct route_in6 *ro,
39236c6e 1250 struct ip6_out_args *ip6oa, struct ifnet **retifp)
6d2010ae 1251{
316670eb 1252 int err = 0;
6d2010ae
A
1253 struct route_in6 sro;
1254 struct rtentry *rt = NULL;
1255
1256 if (ro == NULL) {
39236c6e 1257 bzero(&sro, sizeof (sro));
6d2010ae
A
1258 ro = &sro;
1259 }
1260
39236c6e 1261 if ((err = selectroute(NULL, dstsock, opts, mopts, NULL, ro, retifp,
316670eb
A
1262 &rt, 0, 1, ip6oa)) != 0)
1263 goto done;
6d2010ae
A
1264
1265 /*
1266 * do not use a rejected or black hole route.
1267 * XXX: this check should be done in the L2 output routine.
1268 * However, if we skipped this check here, we'd see the following
1269 * scenario:
1270 * - install a rejected route for a scoped address prefix
1271 * (like fe80::/10)
1272 * - send a packet to a destination that matches the scoped prefix,
1273 * with ambiguity about the scope zone.
1274 * - pick the outgoing interface from the route, and disambiguate the
1275 * scope zone with the interface.
1276 * - ip6_output() would try to get another route with the "new"
1277 * destination, which may be valid.
1278 * - we'd see no error on output.
1279 * Although this may not be very harmful, it should still be confusing.
1280 * We thus reject the case here.
1281 */
1282 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
316670eb
A
1283 err = ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH);
1284 goto done;
6d2010ae
A
1285 }
1286
1287 /*
1288 * Adjust the "outgoing" interface. If we're going to loop the packet
1289 * back to ourselves, the ifp would be the loopback interface.
1290 * However, we'd rather know the interface associated to the
1291 * destination address (which should probably be one of our own
1292 * addresses.)
1293 */
316670eb
A
1294 if (rt != NULL && rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp != NULL &&
1295 retifp != NULL) {
1296 ifnet_reference(rt->rt_ifa->ifa_ifp);
6d2010ae
A
1297 if (*retifp != NULL)
1298 ifnet_release(*retifp);
1299 *retifp = rt->rt_ifa->ifa_ifp;
6d2010ae
A
1300 }
1301
316670eb 1302done:
39236c6e
A
1303 if (ro == &sro) {
1304 VERIFY(rt == NULL || rt == ro->ro_rt);
1305 ROUTE_RELEASE(ro);
1306 }
316670eb
A
1307
1308 /*
1309 * retifp might point to a valid ifp with a reference held;
1310 * caller is responsible for releasing it if non-NULL.
1311 */
1312 return (err);
6d2010ae
A
1313}
1314
1315/*
316670eb
A
1316 * Regardless of error, it will return an ifp with a reference held if the
1317 * caller provides a non-NULL retifp. The caller is responsible for checking
1318 * if the returned ifp is valid and release its reference at all times.
1319 *
6d2010ae
A
1320 * clone - meaningful only for bsdi and freebsd
1321 */
1322int
1323in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
39236c6e
A
1324 struct ip6_pktopts *opts, struct ip6_moptions *mopts,
1325 struct in6_ifaddr **retsrcia, struct route_in6 *ro, struct ifnet **retifp,
1326 struct rtentry **retrt, int clone, struct ip6_out_args *ip6oa)
6d2010ae
A
1327{
1328
39236c6e 1329 return (selectroute(srcsock, dstsock, opts, mopts, retsrcia, ro, retifp,
316670eb 1330 retrt, clone, 0, ip6oa));
1c79356b
A
1331}
1332
1333/*
1334 * Default hop limit selection. The precedence is as follows:
1335 * 1. Hoplimit value specified via ioctl.
1336 * 2. (If the outgoing interface is detected) the current
1337 * hop limit of the interface specified by router advertisement.
1338 * 3. The system default hoplimit.
39236c6e 1339 */
1c79356b 1340int
39236c6e 1341in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
1c79356b 1342{
b0d623f7 1343 if (in6p && in6p->in6p_hops >= 0) {
39236c6e 1344 return (in6p->in6p_hops);
3e170ce0
A
1345 } else if (NULL != ifp) {
1346 u_int8_t chlim;
1347 struct nd_ifinfo *ndi = ND_IFINFO(ifp);
1348 if (ndi && ndi->initialized) {
1349 /* access chlim without lock, for performance */
1350 chlim = ndi->chlim;
b0d623f7 1351 } else {
3e170ce0 1352 chlim = ip6_defhlim;
b0d623f7 1353 }
3e170ce0 1354 return (chlim);
b0d623f7 1355 }
3e170ce0
A
1356
1357 return (ip6_defhlim);
1c79356b 1358}
1c79356b 1359
1c79356b 1360/*
9bccf70c
A
1361 * XXX: this is borrowed from in6_pcbbind(). If possible, we should
1362 * share this function by all *bsd*...
1c79356b 1363 */
1c79356b 1364int
39236c6e
A
1365in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p,
1366 int locked)
1c79356b 1367{
39236c6e 1368#pragma unused(laddr)
9bccf70c
A
1369 struct socket *so = inp->inp_socket;
1370 u_int16_t lport = 0, first, last, *lastport;
1371 int count, error = 0, wild = 0;
5ba3f43e 1372 bool found;
9bccf70c 1373 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
6d2010ae 1374 kauth_cred_t cred;
91447636 1375 if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
39236c6e 1376 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
91447636 1377 socket_unlock(inp->inp_socket, 0);
39236c6e 1378 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
91447636
A
1379 socket_lock(inp->inp_socket, 0);
1380 }
fe8ab488
A
1381
1382 /*
1383 * Check if a local port was assigned to the inp while
1384 * this thread was waiting for the pcbinfo lock
1385 */
1386 if (inp->inp_lport != 0) {
1387 VERIFY(inp->inp_flags2 & INP2_INHASHLIST);
1388 lck_rw_done(pcbinfo->ipi_lock);
1389
1390 /*
1391 * It is not an error if another thread allocated
1392 * a port
1393 */
1394 return (0);
1395 }
91447636 1396 }
1c79356b
A
1397
1398 /* XXX: this is redundant when called from in6_pcbbind */
9bccf70c
A
1399 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
1400 wild = INPLOOKUP_WILDCARD;
1401
9bccf70c
A
1402 if (inp->inp_flags & INP_HIGHPORT) {
1403 first = ipport_hifirstauto; /* sysctl */
1404 last = ipport_hilastauto;
39236c6e 1405 lastport = &pcbinfo->ipi_lasthi;
9bccf70c 1406 } else if (inp->inp_flags & INP_LOWPORT) {
6d2010ae
A
1407 cred = kauth_cred_proc_ref(p);
1408 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
1409 kauth_cred_unref(&cred);
1410 if (error != 0) {
91447636 1411 if (!locked)
39236c6e
A
1412 lck_rw_done(pcbinfo->ipi_lock);
1413 return (error);
91447636 1414 }
9bccf70c
A
1415 first = ipport_lowfirstauto; /* 1023 */
1416 last = ipport_lowlastauto; /* 600 */
39236c6e 1417 lastport = &pcbinfo->ipi_lastlow;
1c79356b 1418 } else {
9bccf70c
A
1419 first = ipport_firstauto; /* sysctl */
1420 last = ipport_lastauto;
39236c6e 1421 lastport = &pcbinfo->ipi_lastport;
9bccf70c
A
1422 }
1423 /*
1424 * Simple check to ensure all ports are not used up causing
1425 * a deadlock here.
1426 *
1427 * We split the two cases (up and down) so that the direction
1428 * is not being tested on each round of the loop.
1429 */
1430 if (first > last) {
1431 /*
1432 * counting down
1433 */
1434 count = first - last;
5ba3f43e 1435 found = false;
9bccf70c
A
1436
1437 do {
1438 if (count-- < 0) { /* completely used? */
1439 /*
1440 * Undo any address bind that may have
1441 * occurred above.
1442 */
1443 inp->in6p_laddr = in6addr_any;
316670eb 1444 inp->in6p_last_outifp = NULL;
91447636 1445 if (!locked)
39236c6e 1446 lck_rw_done(pcbinfo->ipi_lock);
9bccf70c
A
1447 return (EAGAIN);
1448 }
1449 --*lastport;
1450 if (*lastport > first || *lastport < last)
1451 *lastport = first;
1452 lport = htons(*lastport);
5ba3f43e
A
1453
1454 found = in6_pcblookup_local(pcbinfo, &inp->in6p_laddr,
1455 lport, wild) == NULL;
1456 } while (!found);
9bccf70c 1457 } else {
39236c6e 1458 /* counting up */
9bccf70c 1459 count = last - first;
5ba3f43e 1460 found = false;
9bccf70c
A
1461
1462 do {
1463 if (count-- < 0) { /* completely used? */
1464 /*
1465 * Undo any address bind that may have
1466 * occurred above.
1467 */
1468 inp->in6p_laddr = in6addr_any;
316670eb 1469 inp->in6p_last_outifp = NULL;
91447636 1470 if (!locked)
39236c6e 1471 lck_rw_done(pcbinfo->ipi_lock);
9bccf70c
A
1472 return (EAGAIN);
1473 }
1474 ++*lastport;
1475 if (*lastport < first || *lastport > last)
1476 *lastport = first;
1477 lport = htons(*lastport);
5ba3f43e
A
1478
1479 found = in6_pcblookup_local(pcbinfo, &inp->in6p_laddr,
1480 lport, wild) == NULL;
1481 } while (!found);
1c79356b
A
1482 }
1483
9bccf70c 1484 inp->inp_lport = lport;
fe8ab488
A
1485 inp->inp_flags |= INP_ANONPORT;
1486
91447636 1487 if (in_pcbinshash(inp, 1) != 0) {
9bccf70c 1488 inp->in6p_laddr = in6addr_any;
316670eb 1489 inp->in6p_last_outifp = NULL;
fe8ab488
A
1490
1491 inp->inp_lport = 0;
1492 inp->inp_flags &= ~INP_ANONPORT;
91447636 1493 if (!locked)
39236c6e 1494 lck_rw_done(pcbinfo->ipi_lock);
9bccf70c
A
1495 return (EAGAIN);
1496 }
1497
91447636 1498 if (!locked)
39236c6e
A
1499 lck_rw_done(pcbinfo->ipi_lock);
1500 return (0);
9bccf70c
A
1501}
1502
6d2010ae 1503/*
39236c6e
A
1504 * The followings are implementation of the policy table using a
1505 * simple tail queue.
1506 * XXX such details should be hidden.
1507 * XXX implementation using binary tree should be more efficient.
1508 */
6d2010ae 1509struct addrsel_policyent {
39236c6e
A
1510 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1511 struct in6_addrpolicy ape_policy;
6d2010ae
A
1512};
1513
1514TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1515
1516struct addrsel_policyhead addrsel_policytab;
1517
1518static void
1519init_policy_queue(void)
1520{
39236c6e 1521 TAILQ_INIT(&addrsel_policytab);
6d2010ae
A
1522}
1523
1524void
1525addrsel_policy_init(void)
1526{
1527 /*
fe8ab488 1528 * Default address selection policy based on RFC 6724.
6d2010ae
A
1529 */
1530 static const struct in6_addrpolicy defaddrsel[] = {
fe8ab488 1531 /* Loopback -- prefix=::1/128, precedence=50, label=0 */
39236c6e
A
1532 {
1533 .addr = {
1534 .sin6_family = AF_INET6,
6d2010ae 1535 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
39236c6e
A
1536 .sin6_len = sizeof (struct sockaddr_in6)
1537 },
1538 .addrmask = {
1539 .sin6_family = AF_INET6,
1540 .sin6_addr = IN6MASK128,
1541 .sin6_len = sizeof (struct sockaddr_in6)
1542 },
fe8ab488 1543 .preced = 50,
39236c6e
A
1544 .label = 0
1545 },
1546
fe8ab488 1547 /* Unspecified -- prefix=::/0, precedence=40, label=1 */
39236c6e
A
1548 {
1549 .addr = {
1550 .sin6_family = AF_INET6,
fe8ab488 1551 .sin6_addr = IN6ADDR_ANY_INIT,
39236c6e
A
1552 .sin6_len = sizeof (struct sockaddr_in6)
1553 },
1554 .addrmask = {
1555 .sin6_family = AF_INET6,
fe8ab488 1556 .sin6_addr = IN6MASK0,
39236c6e
A
1557 .sin6_len = sizeof (struct sockaddr_in6)
1558 },
fe8ab488 1559 .preced = 40,
39236c6e
A
1560 .label = 1
1561 },
1562
fe8ab488 1563 /* IPv4 Mapped -- prefix=::ffff:0:0/96, precedence=35, label=4 */
39236c6e
A
1564 {
1565 .addr = {
1566 .sin6_family = AF_INET6,
fe8ab488 1567 .sin6_addr = IN6ADDR_V4MAPPED_INIT,
39236c6e
A
1568 .sin6_len = sizeof (struct sockaddr_in6)
1569 },
1570 .addrmask = {
1571 .sin6_family = AF_INET6,
fe8ab488 1572 .sin6_addr = IN6MASK96,
39236c6e
A
1573 .sin6_len = sizeof (struct sockaddr_in6)
1574 },
fe8ab488
A
1575 .preced = 35,
1576 .label = 4
1577 },
39236c6e 1578
fe8ab488 1579 /* 6to4 -- prefix=2002::/16, precedence=30, label=2 */
39236c6e
A
1580 {
1581 .addr = {
1582 .sin6_family = AF_INET6,
fe8ab488 1583 .sin6_addr = {{{ 0x20, 0x02 }}},
39236c6e
A
1584 .sin6_len = sizeof (struct sockaddr_in6)
1585 },
1586 .addrmask = {
1587 .sin6_family = AF_INET6,
fe8ab488 1588 .sin6_addr = IN6MASK16,
39236c6e
A
1589 .sin6_len = sizeof (struct sockaddr_in6)
1590 },
1591 .preced = 30,
fe8ab488 1592 .label = 2
39236c6e
A
1593 },
1594
fe8ab488 1595 /* Teredo -- prefix=2001::/32, precedence=5, label=5 */
39236c6e
A
1596 {
1597 .addr = {
1598 .sin6_family = AF_INET6,
fe8ab488 1599 .sin6_addr = {{{ 0x20, 0x01 }}},
39236c6e
A
1600 .sin6_len = sizeof (struct sockaddr_in6)
1601 },
1602 .addrmask = {
1603 .sin6_family = AF_INET6,
fe8ab488 1604 .sin6_addr = IN6MASK32,
39236c6e
A
1605 .sin6_len = sizeof (struct sockaddr_in6)
1606 },
fe8ab488
A
1607 .preced = 5,
1608 .label = 5
39236c6e
A
1609 },
1610
fe8ab488 1611 /* Unique Local (ULA) -- prefix=fc00::/7, precedence=3, label=13 */
39236c6e
A
1612 {
1613 .addr = {
1614 .sin6_family = AF_INET6,
fe8ab488 1615 .sin6_addr = {{{ 0xfc }}},
39236c6e
A
1616 .sin6_len = sizeof (struct sockaddr_in6)
1617 },
1618 .addrmask = {
1619 .sin6_family = AF_INET6,
fe8ab488 1620 .sin6_addr = IN6MASK7,
39236c6e
A
1621 .sin6_len = sizeof (struct sockaddr_in6)
1622 },
fe8ab488
A
1623 .preced = 3,
1624 .label = 13
39236c6e
A
1625 },
1626
fe8ab488 1627 /* IPv4 Compatible -- prefix=::/96, precedence=1, label=3 */
39236c6e
A
1628 {
1629 .addr = {
1630 .sin6_family = AF_INET6,
6d2010ae 1631 .sin6_addr = IN6ADDR_ANY_INIT,
39236c6e
A
1632 .sin6_len = sizeof (struct sockaddr_in6)
1633 },
1634 .addrmask = {
1635 .sin6_family = AF_INET6,
1636 .sin6_addr = IN6MASK96,
1637 .sin6_len = sizeof (struct sockaddr_in6)
1638 },
1639 .preced = 1,
fe8ab488 1640 .label = 3
39236c6e
A
1641 },
1642
fe8ab488 1643 /* Site-local (deprecated) -- prefix=fec0::/10, precedence=1, label=11 */
39236c6e
A
1644 {
1645 .addr = {
1646 .sin6_family = AF_INET6,
6d2010ae 1647 .sin6_addr = {{{ 0xfe, 0xc0 }}},
39236c6e
A
1648 .sin6_len = sizeof (struct sockaddr_in6)
1649 },
1650 .addrmask = {
1651 .sin6_family = AF_INET6,
1652 .sin6_addr = IN6MASK16,
1653 .sin6_len = sizeof (struct sockaddr_in6)
1654 },
1655 .preced = 1,
1656 .label = 11
1657 },
1658
fe8ab488 1659 /* 6bone (deprecated) -- prefix=3ffe::/16, precedence=1, label=12 */
39236c6e
A
1660 {
1661 .addr = {
1662 .sin6_family = AF_INET6,
6d2010ae 1663 .sin6_addr = {{{ 0x3f, 0xfe }}},
39236c6e
A
1664 .sin6_len = sizeof (struct sockaddr_in6)
1665 },
1666 .addrmask = {
1667 .sin6_family = AF_INET6,
1668 .sin6_addr = IN6MASK16,
1669 .sin6_len = sizeof (struct sockaddr_in6)
1670 },
1671 .preced = 1,
1672 .label = 12
1673 },
6d2010ae
A
1674 };
1675 int i;
1676
1677 init_policy_queue();
1678
1679 /* initialize the "last resort" policy */
39236c6e 1680 bzero(&defaultaddrpolicy, sizeof (defaultaddrpolicy));
6d2010ae
A
1681 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
1682
39236c6e 1683 for (i = 0; i < sizeof (defaddrsel) / sizeof (defaddrsel[0]); i++)
6d2010ae
A
1684 add_addrsel_policyent(&defaddrsel[i]);
1685
1686}
1687
1688struct in6_addrpolicy *
1689in6_addrsel_lookup_policy(struct sockaddr_in6 *key)
1690{
1691 struct in6_addrpolicy *match = NULL;
1692
1693 ADDRSEL_LOCK();
1694 match = match_addrsel_policy(key);
1695
1696 if (match == NULL)
1697 match = &defaultaddrpolicy;
1698 else
1699 match->use++;
1700 ADDRSEL_UNLOCK();
1701
1702 return (match);
1703}
1704
1705static struct in6_addrpolicy *
1706match_addrsel_policy(struct sockaddr_in6 *key)
1707{
1708 struct addrsel_policyent *pent;
1709 struct in6_addrpolicy *bestpol = NULL, *pol;
1710 int matchlen, bestmatchlen = -1;
1711 u_char *mp, *ep, *k, *p, m;
1712
1713 TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) {
1714 matchlen = 0;
1715
1716 pol = &pent->ape_policy;
1717 mp = (u_char *)&pol->addrmask.sin6_addr;
1718 ep = mp + 16; /* XXX: scope field? */
1719 k = (u_char *)&key->sin6_addr;
1720 p = (u_char *)&pol->addr.sin6_addr;
1721 for (; mp < ep && *mp; mp++, k++, p++) {
1722 m = *mp;
1723 if ((*k & m) != *p)
1724 goto next; /* not match */
1725 if (m == 0xff) /* short cut for a typical case */
1726 matchlen += 8;
1727 else {
1728 while (m >= 0x80) {
1729 matchlen++;
1730 m <<= 1;
1731 }
1732 }
1733 }
1734
1735 /* matched. check if this is better than the current best. */
1736 if (bestpol == NULL ||
1737 matchlen > bestmatchlen) {
1738 bestpol = pol;
1739 bestmatchlen = matchlen;
1740 }
1741
39236c6e 1742 next:
6d2010ae
A
1743 continue;
1744 }
1745
1746 return (bestpol);
39236c6e 1747}
6d2010ae
A
1748
1749static int
1750add_addrsel_policyent(const struct in6_addrpolicy *newpolicy)
1751{
1752 struct addrsel_policyent *new, *pol;
1753
39236c6e
A
1754 MALLOC(new, struct addrsel_policyent *, sizeof (*new), M_IFADDR,
1755 M_WAITOK);
6d2010ae
A
1756
1757 ADDRSEL_LOCK();
1758
1759 /* duplication check */
1760 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1761 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
39236c6e 1762 &pol->ape_policy.addr.sin6_addr) &&
6d2010ae 1763 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
39236c6e 1764 &pol->ape_policy.addrmask.sin6_addr)) {
6d2010ae
A
1765 ADDRSEL_UNLOCK();
1766 FREE(new, M_IFADDR);
1767 return (EEXIST); /* or override it? */
1768 }
1769 }
1770
39236c6e 1771 bzero(new, sizeof (*new));
6d2010ae
A
1772
1773 /* XXX: should validate entry */
1774 new->ape_policy = *newpolicy;
1775
1776 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
1777 ADDRSEL_UNLOCK();
1778
1779 return (0);
1780}
1781#ifdef ENABLE_ADDRSEL
1782static int
1783delete_addrsel_policyent(const struct in6_addrpolicy *key)
1784{
1785 struct addrsel_policyent *pol;
1786
1787
1788 ADDRSEL_LOCK();
1789
1790 /* search for the entry in the table */
1791 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1792 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1793 &pol->ape_policy.addr.sin6_addr) &&
1794 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1795 &pol->ape_policy.addrmask.sin6_addr)) {
1796 break;
1797 }
1798 }
1799 if (pol == NULL) {
1800 ADDRSEL_UNLOCK();
1801 return (ESRCH);
1802 }
1803
1804 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1805 FREE(pol, M_IFADDR);
1806 pol = NULL;
1807 ADDRSEL_UNLOCK();
1808
1809 return (0);
1810}
1811#endif /* ENABLE_ADDRSEL */
1812
1813int
1814walk_addrsel_policy(int (*callback)(const struct in6_addrpolicy *, void *),
1815 void *w)
1816{
1817 struct addrsel_policyent *pol;
1818 int error = 0;
1819
1820 ADDRSEL_LOCK();
1821 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1822 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1823 ADDRSEL_UNLOCK();
1824 return (error);
1825 }
1826 }
1827 ADDRSEL_UNLOCK();
1828 return (error);
1829}
1830/*
1831 * Subroutines to manage the address selection policy table via sysctl.
1832 */
1833struct walkarg {
1834 struct sysctl_req *w_req;
1835};
1836
1837
1838static int
1839dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg)
1840{
1841 int error = 0;
1842 struct walkarg *w = arg;
1843
39236c6e 1844 error = SYSCTL_OUT(w->w_req, pol, sizeof (*pol));
6d2010ae
A
1845
1846 return (error);
1847}
1848
1849static int
39236c6e 1850in6_src_sysctl SYSCTL_HANDLER_ARGS
6d2010ae
A
1851{
1852#pragma unused(oidp, arg1, arg2)
1853struct walkarg w;
1854
1855 if (req->newptr)
39236c6e
A
1856 return (EPERM);
1857 bzero(&w, sizeof (w));
6d2010ae
A
1858 w.w_req = req;
1859
1860 return (walk_addrsel_policy(dump_addrsel_policyent, &w));
1861}
1862
1863
1864SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
1865 CTLFLAG_RD | CTLFLAG_LOCKED, in6_src_sysctl, "");
1866int
1867in6_src_ioctl(u_long cmd, caddr_t data)
1868{
1869 int i;
1870 struct in6_addrpolicy ent0;
1871
1872 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
1873 return (EOPNOTSUPP); /* check for safety */
1874
316670eb 1875 bcopy(data, &ent0, sizeof (ent0));
6d2010ae
A
1876
1877 if (ent0.label == ADDR_LABEL_NOTAPP)
1878 return (EINVAL);
1879 /* check if the prefix mask is consecutive. */
1880 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
1881 return (EINVAL);
1882 /* clear trailing garbages (if any) of the prefix address. */
1883 for (i = 0; i < 4; i++) {
1884 ent0.addr.sin6_addr.s6_addr32[i] &=
1885 ent0.addrmask.sin6_addr.s6_addr32[i];
1886 }
1887 ent0.use = 0;
1888
1889 switch (cmd) {
1890 case SIOCAADDRCTL_POLICY:
1891#ifdef ENABLE_ADDRSEL
1892 return (add_addrsel_policyent(&ent0));
1893#else
1894 return (ENOTSUP);
1895#endif
1896 case SIOCDADDRCTL_POLICY:
1897#ifdef ENABLE_ADDRSEL
1898 return (delete_addrsel_policyent(&ent0));
1899#else
1900 return (ENOTSUP);
1901#endif
1902 }
1903
1904 return (0); /* XXX: compromise compilers */
1905}
1906
9bccf70c
A
1907/*
1908 * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
1909 * If the address scope of is link-local, embed the interface index in the
1910 * address. The routine determines our precedence
1911 * between advanced API scope/interface specification and basic API
1912 * specification.
1913 *
1914 * this function should be nuked in the future, when we get rid of
1915 * embedded scopeid thing.
1916 *
1917 * XXX actually, it is over-specification to return ifp against sin6_scope_id.
1918 * there can be multiple interfaces that belong to a particular scope zone
1919 * (in specification, we have 1:N mapping between a scope zone and interfaces).
1920 * we may want to change the function to return something other than ifp.
1921 */
1922int
39236c6e
A
1923in6_embedscope(struct in6_addr *in6, const struct sockaddr_in6 *sin6,
1924 struct in6pcb *in6p, struct ifnet **ifpp, struct ip6_pktopts *opt)
9bccf70c
A
1925{
1926 struct ifnet *ifp = NULL;
1927 u_int32_t scopeid;
6d2010ae 1928 struct ip6_pktopts *optp = NULL;
9bccf70c
A
1929
1930 *in6 = sin6->sin6_addr;
1931 scopeid = sin6->sin6_scope_id;
6d2010ae 1932 if (ifpp != NULL)
9bccf70c
A
1933 *ifpp = NULL;
1934
1935 /*
1936 * don't try to read sin6->sin6_addr beyond here, since the caller may
1937 * ask us to overwrite existing sockaddr_in6
1938 */
1939
1940#ifdef ENABLE_DEFAULT_SCOPE
1941 if (scopeid == 0)
1942 scopeid = scope6_addr2default(in6);
1c79356b 1943#endif
9bccf70c 1944
fe8ab488 1945 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) {
9bccf70c 1946 struct in6_pktinfo *pi;
6d2010ae
A
1947 struct ifnet *im6o_multicast_ifp = NULL;
1948
1949 if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
1950 in6p->in6p_moptions != NULL) {
1951 IM6O_LOCK(in6p->in6p_moptions);
1952 im6o_multicast_ifp =
1953 in6p->in6p_moptions->im6o_multicast_ifp;
1954 IM6O_UNLOCK(in6p->in6p_moptions);
1955 }
9bccf70c 1956
39236c6e 1957 if (opt != NULL)
6d2010ae 1958 optp = opt;
39236c6e 1959 else if (in6p != NULL)
6d2010ae 1960 optp = in6p->in6p_outputopts;
9bccf70c
A
1961 /*
1962 * KAME assumption: link id == interface id
1963 */
39236c6e
A
1964 if (in6p != NULL && optp != NULL &&
1965 (pi = optp->ip6po_pktinfo) != NULL &&
1966 pi->ipi6_ifindex != 0) {
1967 /* ifp is needed here if only we're returning it */
1968 if (ifpp != NULL) {
1969 ifnet_head_lock_shared();
1970 ifp = ifindex2ifnet[pi->ipi6_ifindex];
1971 ifnet_head_done();
1972 }
9bccf70c 1973 in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
39236c6e 1974 } else if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
6d2010ae
A
1975 in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) {
1976 ifp = im6o_multicast_ifp;
9bccf70c 1977 in6->s6_addr16[1] = htons(ifp->if_index);
39236c6e
A
1978 } else if (scopeid != 0) {
1979 /*
b0d623f7 1980 * Since scopeid is unsigned, we only have to check it
39236c6e
A
1981 * against if_index (ifnet_head_lock not needed since
1982 * if_index is an ever-increasing integer.)
b0d623f7 1983 */
39236c6e
A
1984 if (if_index < scopeid)
1985 return (ENXIO); /* XXX EINVAL? */
b0d623f7 1986
39236c6e
A
1987 /* ifp is needed here only if we're returning it */
1988 if (ifpp != NULL) {
1989 ifnet_head_lock_shared();
1990 ifp = ifindex2ifnet[scopeid];
1991 ifnet_head_done();
b0d623f7 1992 }
39236c6e 1993 /* XXX assignment to 16bit from 32bit variable */
9bccf70c 1994 in6->s6_addr16[1] = htons(scopeid & 0xffff);
1c79356b 1995 }
9bccf70c 1996
6d2010ae
A
1997 if (ifpp != NULL) {
1998 if (ifp != NULL)
1999 ifnet_reference(ifp); /* for caller */
9bccf70c 2000 *ifpp = ifp;
6d2010ae 2001 }
1c79356b
A
2002 }
2003
39236c6e 2004 return (0);
1c79356b 2005}
9bccf70c
A
2006
2007/*
2008 * generate standard sockaddr_in6 from embedded form.
2009 * touches sin6_addr and sin6_scope_id only.
2010 *
2011 * this function should be nuked in the future, when we get rid of
2012 * embedded scopeid thing.
2013 */
2014int
91447636
A
2015in6_recoverscope(
2016 struct sockaddr_in6 *sin6,
2017 const struct in6_addr *in6,
2018 struct ifnet *ifp)
9bccf70c
A
2019{
2020 u_int32_t scopeid;
2021
2022 sin6->sin6_addr = *in6;
2023
2024 /*
2025 * don't try to read *in6 beyond here, since the caller may
2026 * ask us to overwrite existing sockaddr_in6
2027 */
2028
2029 sin6->sin6_scope_id = 0;
fe8ab488 2030 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) {
9bccf70c
A
2031 /*
2032 * KAME assumption: link id == interface id
2033 */
2034 scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]);
2035 if (scopeid) {
39236c6e
A
2036 /*
2037 * sanity check
b0d623f7
A
2038 *
2039 * Since scopeid is unsigned, we only have to check it
2040 * against if_index
2041 */
2042 if (if_index < scopeid)
39236c6e 2043 return (ENXIO);
9bccf70c 2044 if (ifp && ifp->if_index != scopeid)
39236c6e 2045 return (ENXIO);
9bccf70c
A
2046 sin6->sin6_addr.s6_addr16[1] = 0;
2047 sin6->sin6_scope_id = scopeid;
2048 }
2049 }
2050
39236c6e 2051 return (0);
9bccf70c 2052}