]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/in6_src.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / bsd / netinet6 / in6_src.c
1 /*
2 * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Neither the name of the project nor the names of its contributors
42 * may be used to endorse or promote products derived from this software
43 * without specific prior written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 */
57
58 /*
59 * Copyright (c) 1982, 1986, 1991, 1993
60 * The Regents of the University of California. All rights reserved.
61 *
62 * Redistribution and use in source and binary forms, with or without
63 * modification, are permitted provided that the following conditions
64 * are met:
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 * 2. Redistributions in binary form must reproduce the above copyright
68 * notice, this list of conditions and the following disclaimer in the
69 * documentation and/or other materials provided with the distribution.
70 * 3. All advertising materials mentioning features or use of this software
71 * must display the following acknowledgement:
72 * This product includes software developed by the University of
73 * California, Berkeley and its contributors.
74 * 4. Neither the name of the University nor the names of its contributors
75 * may be used to endorse or promote products derived from this software
76 * without specific prior written permission.
77 *
78 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
79 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
80 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
81 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
82 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
83 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
84 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
85 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
86 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
87 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
88 * SUCH DAMAGE.
89 *
90 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
91 */
92
93
94 #include <sys/param.h>
95 #include <sys/systm.h>
96 #include <sys/malloc.h>
97 #include <sys/mbuf.h>
98 #include <sys/protosw.h>
99 #include <sys/socket.h>
100 #include <sys/socketvar.h>
101 #include <sys/errno.h>
102 #include <sys/time.h>
103 #include <sys/proc.h>
104 #include <sys/sysctl.h>
105 #include <sys/kauth.h>
106 #include <sys/priv.h>
107 #include <kern/locks.h>
108
109 #include <net/if.h>
110 #include <net/if_types.h>
111 #include <net/route.h>
112
113 #include <netinet/in.h>
114 #include <netinet/in_var.h>
115 #include <netinet/in_systm.h>
116 #include <netinet/ip.h>
117 #include <netinet/in_pcb.h>
118 #include <netinet6/in6_var.h>
119 #include <netinet/ip6.h>
120 #include <netinet6/in6_pcb.h>
121 #include <netinet6/ip6_var.h>
122 #include <netinet6/scope6_var.h>
123 #include <netinet6/nd6.h>
124
125 #include <net/net_osdep.h>
126
127 #include "loop.h"
128
129 SYSCTL_DECL(_net_inet6_ip6);
130
131 static int ip6_select_srcif_debug = 0;
132 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcif_debug,
133 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcif_debug, 0,
134 "log source interface selection debug info");
135
136 static int ip6_select_srcaddr_debug = 0;
137 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_srcaddr_debug,
138 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_srcaddr_debug, 0,
139 "log source address selection debug info");
140
141 static int ip6_select_src_expensive_secondary_if = 0;
142 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, select_src_expensive_secondary_if,
143 CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_select_src_expensive_secondary_if, 0,
144 "allow source interface selection to use expensive secondaries");
145
146 #define ADDR_LABEL_NOTAPP (-1)
147 struct in6_addrpolicy defaultaddrpolicy;
148
149 int ip6_prefer_tempaddr = 1;
150 #ifdef ENABLE_ADDRSEL
151 extern lck_mtx_t *addrsel_mutex;
152 #define ADDRSEL_LOCK() lck_mtx_lock(addrsel_mutex)
153 #define ADDRSEL_UNLOCK() lck_mtx_unlock(addrsel_mutex)
154 #else
155 #define ADDRSEL_LOCK()
156 #define ADDRSEL_UNLOCK()
157 #endif
158
159 static int selectroute(struct sockaddr_in6 *, struct sockaddr_in6 *,
160 struct ip6_pktopts *, struct ip6_moptions *, struct in6_ifaddr **,
161 struct route_in6 *, struct ifnet **, struct rtentry **, int, int,
162 struct ip6_out_args *ip6oa);
163 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
164 struct ip6_moptions *, struct route_in6 *ro,
165 struct ip6_out_args *, struct ifnet **);
166 static void init_policy_queue(void);
167 static int add_addrsel_policyent(const struct in6_addrpolicy *);
168 #ifdef ENABLE_ADDRSEL
169 static int delete_addrsel_policyent(const struct in6_addrpolicy *);
170 #endif
171 static int walk_addrsel_policy(int (*)(const struct in6_addrpolicy *, void *),
172 void *);
173 static int dump_addrsel_policyent(const struct in6_addrpolicy *, void *);
174 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
175 void addrsel_policy_init(void);
176
177 #define SASEL_DO_DBG(inp) \
178 (ip6_select_srcaddr_debug && (inp) != NULL && \
179 (inp)->inp_socket != NULL && \
180 ((inp)->inp_socket->so_options & SO_DEBUG))
181
182 #define SASEL_LOG(fmt, ...) \
183 do { \
184 if (SASEL_DO_DBG(inp)) \
185 printf("%s:%d " fmt "\n",\
186 __FUNCTION__, __LINE__, ##__VA_ARGS__); \
187 } while (0); \
188
189 /*
190 * Return an IPv6 address, which is the most appropriate for a given
191 * destination and user specified options.
192 * If necessary, this function lookups the routing table and returns
193 * an entry to the caller for later use.
194 */
195 #define REPLACE(r) do {\
196 SASEL_LOG("REPLACE r %d ia %s ifp1 %s\n", \
197 (r), s_src, ifp1->if_xname); \
198 srcrule = (r); \
199 goto replace; \
200 } while (0)
201
202 #define NEXTSRC(r) do {\
203 SASEL_LOG("NEXTSRC r %d ia %s ifp1 %s\n", \
204 (r), s_src, ifp1->if_xname); \
205 goto next; /* XXX: we can't use 'continue' here */ \
206 } while (0)
207
208 #define BREAK(r) do { \
209 SASEL_LOG("BREAK r %d ia %s ifp1 %s\n", \
210 (r), s_src, ifp1->if_xname); \
211 srcrule = (r); \
212 goto out; /* XXX: we can't use 'break' here */ \
213 } while (0)
214
215 /*
216 * Regardless of error, it will return an ifp with a reference held if the
217 * caller provides a non-NULL ifpp. The caller is responsible for checking
218 * if the returned ifp is valid and release its reference at all times.
219 */
220 struct in6_addr *
221 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
222 struct inpcb *inp, struct route_in6 *ro,
223 struct ifnet **ifpp, struct in6_addr *src_storage, unsigned int ifscope,
224 int *errorp)
225 {
226 struct in6_addr dst;
227 struct ifnet *ifp = NULL;
228 struct in6_ifaddr *ia = NULL, *ia_best = NULL;
229 struct in6_pktinfo *pi = NULL;
230 int dst_scope = -1, best_scope = -1, best_matchlen = -1;
231 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
232 u_int32_t odstzone;
233 int prefer_tempaddr;
234 struct ip6_moptions *mopts;
235 struct ip6_out_args ip6oa = { ifscope, { 0 }, IP6OAF_SELECT_SRCIF, 0,
236 SO_TC_UNSPEC, _NET_SERVICE_TYPE_UNSPEC };
237 boolean_t islocal = FALSE;
238 uint64_t secs = net_uptime();
239 char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
240 const struct in6_addr *tmp;
241 int bestrule = IP6S_SRCRULE_0;
242
243 dst = dstsock->sin6_addr; /* make a copy for local operation */
244 *errorp = 0;
245 if (ifpp != NULL)
246 *ifpp = NULL;
247
248 if (inp != NULL) {
249 mopts = inp->in6p_moptions;
250 if (INP_NO_CELLULAR(inp))
251 ip6oa.ip6oa_flags |= IP6OAF_NO_CELLULAR;
252 if (INP_NO_EXPENSIVE(inp))
253 ip6oa.ip6oa_flags |= IP6OAF_NO_EXPENSIVE;
254 if (INP_AWDL_UNRESTRICTED(inp))
255 ip6oa.ip6oa_flags |= IP6OAF_AWDL_UNRESTRICTED;
256 if (INP_INTCOPROC_ALLOWED(inp))
257 ip6oa.ip6oa_flags |= IP6OAF_INTCOPROC_ALLOWED;
258 } else {
259 mopts = NULL;
260 /* Allow the kernel to retransmit packets. */
261 ip6oa.ip6oa_flags |= IP6OAF_INTCOPROC_ALLOWED |
262 IP6OAF_AWDL_UNRESTRICTED;
263 }
264
265 if (ip6oa.ip6oa_boundif != IFSCOPE_NONE)
266 ip6oa.ip6oa_flags |= IP6OAF_BOUND_IF;
267
268 /*
269 * If the source address is explicitly specified by the caller,
270 * check if the requested source address is indeed a unicast address
271 * assigned to the node, and can be used as the packet's source
272 * address. If everything is okay, use the address as source.
273 */
274 if (opts && (pi = opts->ip6po_pktinfo) &&
275 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
276 struct sockaddr_in6 srcsock;
277 struct in6_ifaddr *ia6;
278
279 /* get the outgoing interface */
280 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
281 &ifp)) != 0) {
282 src_storage = NULL;
283 goto done;
284 }
285
286 /*
287 * determine the appropriate zone id of the source based on
288 * the zone of the destination and the outgoing interface.
289 * If the specified address is ambiguous wrt the scope zone,
290 * the interface must be specified; otherwise, ifa_ifwithaddr()
291 * will fail matching the address.
292 */
293 bzero(&srcsock, sizeof (srcsock));
294 srcsock.sin6_family = AF_INET6;
295 srcsock.sin6_len = sizeof (srcsock);
296 srcsock.sin6_addr = pi->ipi6_addr;
297 if (ifp != NULL) {
298 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
299 if (*errorp != 0) {
300 src_storage = NULL;
301 goto done;
302 }
303 }
304 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)
305 (&srcsock));
306 if (ia6 == NULL) {
307 *errorp = EADDRNOTAVAIL;
308 src_storage = NULL;
309 goto done;
310 }
311 IFA_LOCK_SPIN(&ia6->ia_ifa);
312 if ((ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) ||
313 (inp && inp_restricted_send(inp, ia6->ia_ifa.ifa_ifp))) {
314 IFA_UNLOCK(&ia6->ia_ifa);
315 IFA_REMREF(&ia6->ia_ifa);
316 *errorp = EHOSTUNREACH;
317 src_storage = NULL;
318 goto done;
319 }
320
321 *src_storage = satosin6(&ia6->ia_addr)->sin6_addr;
322 IFA_UNLOCK(&ia6->ia_ifa);
323 IFA_REMREF(&ia6->ia_ifa);
324 goto done;
325 }
326
327 /*
328 * Otherwise, if the socket has already bound the source, just use it.
329 */
330 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
331 src_storage = &inp->in6p_laddr;
332 goto done;
333 }
334
335 /*
336 * If the address is not specified, choose the best one based on
337 * the outgoing interface and the destination address.
338 */
339
340 /* get the outgoing interface */
341 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ip6oa,
342 &ifp)) != 0) {
343 src_storage = NULL;
344 goto done;
345 }
346
347 if (SASEL_DO_DBG(inp)) {
348 (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src));
349
350 tmp = &in6addr_any;
351 (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src));
352
353 printf("%s out src %s dst %s ifscope %d ifp %s\n",
354 __func__, s_src, s_dst, ifscope,
355 ifp ? ifp->if_xname : "NULL");
356 }
357
358 *errorp = in6_setscope(&dst, ifp, &odstzone);
359 if (*errorp != 0) {
360 src_storage = NULL;
361 goto done;
362 }
363 lck_rw_lock_shared(&in6_ifaddr_rwlock);
364
365 for (ia = in6_ifaddrs; ia; ia = ia->ia_next) {
366 int new_scope = -1, new_matchlen = -1;
367 struct in6_addrpolicy *new_policy = NULL;
368 u_int32_t srczone, osrczone, dstzone;
369 struct in6_addr src;
370 struct ifnet *ifp1 = ia->ia_ifp;
371 int srcrule;
372
373 if (SASEL_DO_DBG(inp))
374 (void) inet_ntop(AF_INET6, &ia->ia_addr.sin6_addr,
375 s_src, sizeof (s_src));
376
377 IFA_LOCK(&ia->ia_ifa);
378 /*
379 * We'll never take an address that breaks the scope zone
380 * of the destination. We also skip an address if its zone
381 * does not contain the outgoing interface.
382 * XXX: we should probably use sin6_scope_id here.
383 */
384 if (in6_setscope(&dst, ifp1, &dstzone) ||
385 odstzone != dstzone) {
386 SASEL_LOG("NEXT ia %s ifp1 %s odstzone %d != dstzone %d\n",
387 s_src, ifp1->if_xname, odstzone, dstzone);
388 goto next;
389 }
390 src = ia->ia_addr.sin6_addr;
391 if (in6_setscope(&src, ifp, &osrczone) ||
392 in6_setscope(&src, ifp1, &srczone) ||
393 osrczone != srczone) {
394 SASEL_LOG("NEXT ia %s ifp1 %s osrczone %d != srczone %d\n",
395 s_src, ifp1->if_xname, osrczone, srczone);
396 goto next;
397 }
398 /* avoid unusable addresses */
399 if ((ia->ia6_flags &
400 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
401 SASEL_LOG("NEXT ia %s ifp1 %s ia6_flags 0x%x\n",
402 s_src, ifp1->if_xname, ia->ia6_flags);
403 goto next;
404 }
405 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia, secs)) {
406 SASEL_LOG("NEXT ia %s ifp1 %s IFA6_IS_DEPRECATED\n",
407 s_src, ifp1->if_xname);
408 goto next;
409 }
410 if (!nd6_optimistic_dad &&
411 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0) {
412 SASEL_LOG("NEXT ia %s ifp1 %s IN6_IFF_OPTIMISTIC\n",
413 s_src, ifp1->if_xname);
414 goto next;
415 }
416 /* Rule 1: Prefer same address */
417 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr))
418 BREAK(IP6S_SRCRULE_1); /* there should be no better candidate */
419
420 if (ia_best == NULL)
421 REPLACE(IP6S_SRCRULE_0);
422
423 /* Rule 2: Prefer appropriate scope */
424 if (dst_scope < 0)
425 dst_scope = in6_addrscope(&dst);
426 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
427 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
428 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
429 REPLACE(IP6S_SRCRULE_2);
430 NEXTSRC(IP6S_SRCRULE_2);
431 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
432 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
433 NEXTSRC(IP6S_SRCRULE_2);
434 REPLACE(IP6S_SRCRULE_2);
435 }
436
437 /*
438 * Rule 3: Avoid deprecated addresses. Note that the case of
439 * !ip6_use_deprecated is already rejected above.
440 */
441 if (!IFA6_IS_DEPRECATED(ia_best, secs) &&
442 IFA6_IS_DEPRECATED(ia, secs))
443 NEXTSRC(IP6S_SRCRULE_3);
444 if (IFA6_IS_DEPRECATED(ia_best, secs) &&
445 !IFA6_IS_DEPRECATED(ia, secs))
446 REPLACE(IP6S_SRCRULE_3);
447
448 /*
449 * RFC 4429 says that optimistic addresses are equivalent to
450 * deprecated addresses, so avoid them here.
451 */
452 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) == 0 &&
453 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) != 0)
454 NEXTSRC(IP6S_SRCRULE_3);
455 if ((ia_best->ia6_flags & IN6_IFF_OPTIMISTIC) != 0 &&
456 (ia->ia6_flags & IN6_IFF_OPTIMISTIC) == 0)
457 REPLACE(IP6S_SRCRULE_3);
458
459 /* Rule 4: Prefer home addresses */
460 /*
461 * XXX: This is a TODO. We should probably merge the MIP6
462 * case above.
463 */
464
465 /* Rule 5: Prefer outgoing interface */
466 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
467 NEXTSRC(IP6S_SRCRULE_5);
468 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
469 REPLACE(IP6S_SRCRULE_5);
470
471 /* Rule 5.5: Prefer addresses in a prefix advertised by the next hop. */
472 if (ro != NULL && ro->ro_rt != NULL && ia_best->ia6_ndpr != NULL &&
473 ia->ia6_ndpr != NULL) {
474 struct rtentry *rta, *rtb;
475 int op;
476
477 NDPR_LOCK(ia_best->ia6_ndpr);
478 rta = ia_best->ia6_ndpr->ndpr_rt;
479 if (rta != NULL)
480 RT_ADDREF(rta);
481 NDPR_UNLOCK(ia_best->ia6_ndpr);
482
483 NDPR_LOCK(ia->ia6_ndpr);
484 rtb = ia->ia6_ndpr->ndpr_rt;
485 if (rtb != NULL)
486 RT_ADDREF(rtb);
487 NDPR_UNLOCK(ia->ia6_ndpr);
488
489 if (rta == NULL || rtb == NULL)
490 op = 0;
491 else if (rta == ro->ro_rt && rtb != ro->ro_rt)
492 op = 1;
493 else if (rta != ro->ro_rt && rtb == ro->ro_rt)
494 op = 2;
495 else
496 op = 0;
497
498 if (rta != NULL)
499 RT_REMREF(rta);
500 if (rtb != NULL)
501 RT_REMREF(rtb);
502
503 switch (op) {
504 case 1:
505 NEXTSRC(IP6S_SRCRULE_5_5);
506 break;
507 case 2:
508 REPLACE(IP6S_SRCRULE_5_5);
509 break;
510 default:
511 break;
512 }
513 }
514
515 /*
516 * Rule 6: Prefer matching label
517 * Note that best_policy should be non-NULL here.
518 */
519 if (dst_policy == NULL)
520 dst_policy = in6_addrsel_lookup_policy(dstsock);
521 if (dst_policy->label != ADDR_LABEL_NOTAPP) {
522 new_policy = in6_addrsel_lookup_policy(&ia->ia_addr);
523 if (dst_policy->label == best_policy->label &&
524 dst_policy->label != new_policy->label)
525 NEXTSRC(IP6S_SRCRULE_6);
526 if (dst_policy->label != best_policy->label &&
527 dst_policy->label == new_policy->label)
528 REPLACE(IP6S_SRCRULE_6);
529 }
530
531 /*
532 * Rule 7: Prefer temporary addresses.
533 * We allow users to reverse the logic by configuring
534 * a sysctl variable, so that transparency conscious users can
535 * always prefer stable addresses.
536 * Don't use temporary addresses for local destinations or
537 * for multicast addresses unless we were passed in an option.
538 */
539 if (IN6_IS_ADDR_MULTICAST(&dst) ||
540 in6_matchlen(&ia_best->ia_addr.sin6_addr, &dst) >=
541 ia_best->ia_plen)
542 islocal = TRUE;
543 if (opts == NULL ||
544 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
545 prefer_tempaddr = islocal ? 0 : ip6_prefer_tempaddr;
546 } else if (opts->ip6po_prefer_tempaddr ==
547 IP6PO_TEMPADDR_NOTPREFER) {
548 prefer_tempaddr = 0;
549 } else
550 prefer_tempaddr = 1;
551 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
552 (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
553 if (prefer_tempaddr)
554 REPLACE(IP6S_SRCRULE_7);
555 else
556 NEXTSRC(IP6S_SRCRULE_7);
557 }
558 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
559 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
560 if (prefer_tempaddr)
561 NEXTSRC(IP6S_SRCRULE_7);
562 else
563 REPLACE(IP6S_SRCRULE_7);
564 }
565
566 /*
567 * Rule 7x: prefer addresses on alive interfaces.
568 * This is a KAME specific rule.
569 */
570 if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
571 !(ia->ia_ifp->if_flags & IFF_UP))
572 NEXTSRC(IP6S_SRCRULE_7x);
573 if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
574 (ia->ia_ifp->if_flags & IFF_UP))
575 REPLACE(IP6S_SRCRULE_7x);
576
577 /*
578 * Rule 8: Use longest matching prefix.
579 */
580 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
581 if (best_matchlen < new_matchlen)
582 REPLACE(IP6S_SRCRULE_8);
583 if (new_matchlen < best_matchlen)
584 NEXTSRC(IP6S_SRCRULE_8);
585
586 /*
587 * Last resort: just keep the current candidate.
588 * Or, do we need more rules?
589 */
590 if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) &&
591 ip6_select_src_expensive_secondary_if == 0) {
592 SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n",
593 s_src, ifp1->if_xname);
594 ip6stat.ip6s_sources_skip_expensive_secondary_if++;
595 goto next;
596 }
597 SASEL_LOG("NEXT ia %s ifp1 %s last resort\n",
598 s_src, ifp1->if_xname);
599 IFA_UNLOCK(&ia->ia_ifa);
600 continue;
601
602 replace:
603 /*
604 * Ignore addresses on secondary interfaces that are marked
605 * expensive
606 */
607 if (ifp1 != ifp && (ifp1->if_eflags & IFEF_EXPENSIVE) &&
608 ip6_select_src_expensive_secondary_if == 0) {
609 SASEL_LOG("NEXT ia %s ifp1 %s IFEF_EXPENSIVE\n",
610 s_src, ifp1->if_xname);
611 ip6stat.ip6s_sources_skip_expensive_secondary_if++;
612 goto next;
613 }
614 bestrule = srcrule;
615 best_scope = (new_scope >= 0 ? new_scope :
616 in6_addrscope(&ia->ia_addr.sin6_addr));
617 best_policy = (new_policy ? new_policy :
618 in6_addrsel_lookup_policy(&ia->ia_addr));
619 best_matchlen = (new_matchlen >= 0 ? new_matchlen :
620 in6_matchlen(&ia->ia_addr.sin6_addr, &dst));
621 SASEL_LOG("NEXT ia %s ifp1 %s best_scope %d new_scope %d dst_scope %d\n",
622 s_src, ifp1->if_xname, best_scope, new_scope, dst_scope);
623 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
624 IFA_UNLOCK(&ia->ia_ifa);
625 if (ia_best != NULL)
626 IFA_REMREF(&ia_best->ia_ifa);
627 ia_best = ia;
628 continue;
629
630 next:
631 IFA_UNLOCK(&ia->ia_ifa);
632 continue;
633
634 out:
635 IFA_ADDREF_LOCKED(&ia->ia_ifa); /* for ia_best */
636 IFA_UNLOCK(&ia->ia_ifa);
637 if (ia_best != NULL)
638 IFA_REMREF(&ia_best->ia_ifa);
639 ia_best = ia;
640 break;
641 }
642
643 lck_rw_done(&in6_ifaddr_rwlock);
644
645 if (ia_best != NULL && inp &&
646 inp_restricted_send(inp, ia_best->ia_ifa.ifa_ifp)) {
647 IFA_REMREF(&ia_best->ia_ifa);
648 ia_best = NULL;
649 *errorp = EHOSTUNREACH;
650 }
651
652 if ((ia = ia_best) == NULL) {
653 if (*errorp == 0)
654 *errorp = EADDRNOTAVAIL;
655 src_storage = NULL;
656 goto done;
657 }
658
659 IFA_LOCK_SPIN(&ia->ia_ifa);
660 if (bestrule < IP6S_SRCRULE_COUNT)
661 ip6stat.ip6s_sources_rule[bestrule]++;
662 *src_storage = satosin6(&ia->ia_addr)->sin6_addr;
663 IFA_UNLOCK(&ia->ia_ifa);
664 IFA_REMREF(&ia->ia_ifa);
665 done:
666 if (SASEL_DO_DBG(inp)) {
667 (void) inet_ntop(AF_INET6, &dst, s_dst, sizeof (s_src));
668
669 tmp = (src_storage != NULL) ? src_storage : &in6addr_any;
670 (void) inet_ntop(AF_INET6, tmp, s_src, sizeof (s_src));
671
672 printf("%s out src %s dst %s ifscope %d dst_scope %d best_scope %d\n",
673 __func__, s_src, s_dst, ifscope, dst_scope, best_scope);
674 }
675 if (ifpp != NULL) {
676 /* if ifp is non-NULL, refcnt held in in6_selectif() */
677 *ifpp = ifp;
678 } else if (ifp != NULL) {
679 ifnet_release(ifp);
680 }
681 return (src_storage);
682 }
683
684 /*
685 * Given a source IPv6 address (and route, if available), determine the best
686 * interface to send the packet from. Checking for (and updating) the
687 * ROF_SRCIF_SELECTED flag in the pcb-supplied route placeholder is done
688 * without any locks, based on the assumption that in the event this is
689 * called from ip6_output(), the output operation is single-threaded per-pcb,
690 * i.e. for any given pcb there can only be one thread performing output at
691 * the IPv6 layer.
692 *
693 * This routine is analogous to in_selectsrcif() for IPv4. Regardless of
694 * error, it will return an ifp with a reference held if the caller provides
695 * a non-NULL retifp. The caller is responsible for checking if the
696 * returned ifp is valid and release its reference at all times.
697 *
698 * clone - meaningful only for bsdi and freebsd
699 */
700 static int
701 selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
702 struct ip6_pktopts *opts, struct ip6_moptions *mopts,
703 struct in6_ifaddr **retsrcia, struct route_in6 *ro,
704 struct ifnet **retifp, struct rtentry **retrt, int clone,
705 int norouteok, struct ip6_out_args *ip6oa)
706 {
707 int error = 0;
708 struct ifnet *ifp = NULL, *ifp0 = NULL;
709 struct route_in6 *route = NULL;
710 struct sockaddr_in6 *sin6_next;
711 struct in6_pktinfo *pi = NULL;
712 struct in6_addr *dst = &dstsock->sin6_addr;
713 struct ifaddr *ifa = NULL;
714 char s_src[MAX_IPv6_STR_LEN], s_dst[MAX_IPv6_STR_LEN];
715 boolean_t select_srcif, proxied_ifa = FALSE, local_dst = FALSE;
716 unsigned int ifscope = ((ip6oa != NULL) ?
717 ip6oa->ip6oa_boundif : IFSCOPE_NONE);
718
719 #if 0
720 char ip6buf[INET6_ADDRSTRLEN];
721
722 if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
723 dstsock->sin6_addr.s6_addr32[1] == 0 &&
724 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
725 printf("in6_selectroute: strange destination %s\n",
726 ip6_sprintf(ip6buf, &dstsock->sin6_addr));
727 } else {
728 printf("in6_selectroute: destination = %s%%%d\n",
729 ip6_sprintf(ip6buf, &dstsock->sin6_addr),
730 dstsock->sin6_scope_id); /* for debug */
731 }
732 #endif
733
734 if (retifp != NULL)
735 *retifp = NULL;
736
737 if (retrt != NULL)
738 *retrt = NULL;
739
740 if (ip6_select_srcif_debug) {
741 struct in6_addr src;
742 src = (srcsock != NULL) ? srcsock->sin6_addr : in6addr_any;
743 (void) inet_ntop(AF_INET6, &src, s_src, sizeof (s_src));
744 (void) inet_ntop(AF_INET6, dst, s_dst, sizeof (s_dst));
745 }
746
747 /*
748 * If the destination address is UNSPECIFIED addr, bail out.
749 */
750 if (IN6_IS_ADDR_UNSPECIFIED(dst)) {
751 error = EHOSTUNREACH;
752 goto done;
753 }
754
755 /*
756 * Perform source interface selection only if Scoped Routing
757 * is enabled and a source address that isn't unspecified.
758 */
759 select_srcif = (srcsock != NULL &&
760 !IN6_IS_ADDR_UNSPECIFIED(&srcsock->sin6_addr));
761
762 if (ip6_select_srcif_debug) {
763 printf("%s src %s dst %s ifscope %d select_srcif %d\n",
764 __func__, s_src, s_dst, ifscope, select_srcif);
765 }
766
767 /* If the caller specified the outgoing interface explicitly, use it */
768 if (opts != NULL && (pi = opts->ip6po_pktinfo) != NULL &&
769 pi->ipi6_ifindex != 0) {
770 /*
771 * If IPV6_PKTINFO takes precedence over IPV6_BOUND_IF.
772 */
773 ifscope = pi->ipi6_ifindex;
774 ifnet_head_lock_shared();
775 /* ifp may be NULL if detached or out of range */
776 ifp = ifp0 =
777 ((ifscope <= if_index) ? ifindex2ifnet[ifscope] : NULL);
778 ifnet_head_done();
779 if (norouteok || retrt == NULL || IN6_IS_ADDR_MULTICAST(dst)) {
780 /*
781 * We do not have to check or get the route for
782 * multicast. If the caller didn't ask/care for
783 * the route and we have no interface to use,
784 * it's an error.
785 */
786 if (ifp == NULL)
787 error = EHOSTUNREACH;
788 goto done;
789 } else {
790 goto getsrcif;
791 }
792 }
793
794 /*
795 * If the destination address is a multicast address and the outgoing
796 * interface for the address is specified by the caller, use it.
797 */
798 if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
799 IM6O_LOCK(mopts);
800 if ((ifp = ifp0 = mopts->im6o_multicast_ifp) != NULL) {
801 IM6O_UNLOCK(mopts);
802 goto done; /* we do not need a route for multicast. */
803 }
804 IM6O_UNLOCK(mopts);
805 }
806
807 getsrcif:
808 /*
809 * If the outgoing interface was not set via IPV6_BOUND_IF or
810 * IPV6_PKTINFO, use the scope ID in the destination address.
811 */
812 if (ifscope == IFSCOPE_NONE)
813 ifscope = dstsock->sin6_scope_id;
814
815 /*
816 * Perform source interface selection; the source IPv6 address
817 * must belong to one of the addresses of the interface used
818 * by the route. For performance reasons, do this only if
819 * there is no route, or if the routing table has changed,
820 * or if we haven't done source interface selection on this
821 * route (for this PCB instance) before.
822 */
823 if (!select_srcif) {
824 goto getroute;
825 } else if (!ROUTE_UNUSABLE(ro) && ro->ro_srcia != NULL &&
826 (ro->ro_flags & ROF_SRCIF_SELECTED)) {
827 if (ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)
828 local_dst = TRUE;
829 ifa = ro->ro_srcia;
830 IFA_ADDREF(ifa); /* for caller */
831 goto getroute;
832 }
833
834 /*
835 * Given the source IPv6 address, find a suitable source interface
836 * to use for transmission; if a scope ID has been specified,
837 * optimize the search by looking at the addresses only for that
838 * interface. This is still suboptimal, however, as we need to
839 * traverse the per-interface list.
840 */
841 if (ifscope != IFSCOPE_NONE || (ro != NULL && ro->ro_rt != NULL)) {
842 unsigned int scope = ifscope;
843 struct ifnet *rt_ifp;
844
845 rt_ifp = (ro->ro_rt != NULL) ? ro->ro_rt->rt_ifp : NULL;
846
847 /*
848 * If no scope is specified and the route is stale (pointing
849 * to a defunct interface) use the current primary interface;
850 * this happens when switching between interfaces configured
851 * with the same IPv6 address. Otherwise pick up the scope
852 * information from the route; the ULP may have looked up a
853 * correct route and we just need to verify it here and mark
854 * it with the ROF_SRCIF_SELECTED flag below.
855 */
856 if (scope == IFSCOPE_NONE) {
857 scope = rt_ifp->if_index;
858 if (scope != get_primary_ifscope(AF_INET6) &&
859 ROUTE_UNUSABLE(ro))
860 scope = get_primary_ifscope(AF_INET6);
861 }
862
863 ifa = (struct ifaddr *)
864 ifa_foraddr6_scoped(&srcsock->sin6_addr, scope);
865
866 /*
867 * If we are forwarding and proxying prefix(es), see if the
868 * source address is one of ours and is a proxied address;
869 * if so, use it.
870 */
871 if (ifa == NULL && ip6_forwarding && nd6_prproxy) {
872 ifa = (struct ifaddr *)
873 ifa_foraddr6(&srcsock->sin6_addr);
874 if (ifa != NULL && !(proxied_ifa =
875 nd6_prproxy_ifaddr((struct in6_ifaddr *)ifa))) {
876 IFA_REMREF(ifa);
877 ifa = NULL;
878 }
879 }
880
881 if (ip6_select_srcif_debug && ifa != NULL) {
882 if (ro->ro_rt != NULL) {
883 printf("%s %s->%s ifscope %d->%d ifa_if %s "
884 "ro_if %s\n",
885 __func__,
886 s_src, s_dst, ifscope,
887 scope, if_name(ifa->ifa_ifp),
888 if_name(rt_ifp));
889 } else {
890 printf("%s %s->%s ifscope %d->%d ifa_if %s\n",
891 __func__,
892 s_src, s_dst, ifscope, scope,
893 if_name(ifa->ifa_ifp));
894 }
895 }
896 }
897
898 /*
899 * Slow path; search for an interface having the corresponding source
900 * IPv6 address if the scope was not specified by the caller, and:
901 *
902 * 1) There currently isn't any route, or,
903 * 2) The interface used by the route does not own that source
904 * IPv6 address; in this case, the route will get blown away
905 * and we'll do a more specific scoped search using the newly
906 * found interface.
907 */
908 if (ifa == NULL && ifscope == IFSCOPE_NONE) {
909 struct ifaddr *ifadst;
910
911 /* Check if the destination address is one of ours */
912 ifadst = (struct ifaddr *)ifa_foraddr6(&dstsock->sin6_addr);
913 if (ifadst != NULL) {
914 local_dst = TRUE;
915 IFA_REMREF(ifadst);
916 }
917
918 ifa = (struct ifaddr *)ifa_foraddr6(&srcsock->sin6_addr);
919
920 if (ip6_select_srcif_debug && ifa != NULL) {
921 printf("%s %s->%s ifscope %d ifa_if %s\n",
922 __func__,
923 s_src, s_dst, ifscope, if_name(ifa->ifa_ifp));
924 } else if (ip6_select_srcif_debug) {
925 printf("%s %s->%s ifscope %d ifa_if NULL\n",
926 __func__,
927 s_src, s_dst, ifscope);
928 }
929 }
930
931 getroute:
932 if (ifa != NULL && !proxied_ifa && !local_dst)
933 ifscope = ifa->ifa_ifp->if_index;
934
935 /*
936 * If the next hop address for the packet is specified by the caller,
937 * use it as the gateway.
938 */
939 if (opts != NULL && opts->ip6po_nexthop != NULL) {
940 struct route_in6 *ron;
941
942 sin6_next = satosin6(opts->ip6po_nexthop);
943
944 /* at this moment, we only support AF_INET6 next hops */
945 if (sin6_next->sin6_family != AF_INET6) {
946 error = EAFNOSUPPORT; /* or should we proceed? */
947 goto done;
948 }
949
950 /*
951 * If the next hop is an IPv6 address, then the node identified
952 * by that address must be a neighbor of the sending host.
953 */
954 ron = &opts->ip6po_nextroute;
955 if (ron->ro_rt != NULL)
956 RT_LOCK(ron->ro_rt);
957 if (ROUTE_UNUSABLE(ron) || (ron->ro_rt != NULL &&
958 (!(ron->ro_rt->rt_flags & RTF_LLINFO) ||
959 (select_srcif && (ifa == NULL ||
960 (ifa->ifa_ifp != ron->ro_rt->rt_ifp && !proxied_ifa))))) ||
961 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr,
962 &sin6_next->sin6_addr)) {
963 if (ron->ro_rt != NULL)
964 RT_UNLOCK(ron->ro_rt);
965
966 ROUTE_RELEASE(ron);
967 *satosin6(&ron->ro_dst) = *sin6_next;
968 }
969 if (ron->ro_rt == NULL) {
970 rtalloc_scoped((struct route *)ron, ifscope);
971 if (ron->ro_rt != NULL)
972 RT_LOCK(ron->ro_rt);
973 if (ROUTE_UNUSABLE(ron) ||
974 !(ron->ro_rt->rt_flags & RTF_LLINFO) ||
975 !IN6_ARE_ADDR_EQUAL(&satosin6(rt_key(ron->ro_rt))->
976 sin6_addr, &sin6_next->sin6_addr)) {
977 if (ron->ro_rt != NULL)
978 RT_UNLOCK(ron->ro_rt);
979
980 ROUTE_RELEASE(ron);
981 error = EHOSTUNREACH;
982 goto done;
983 }
984 }
985 route = ron;
986 ifp = ifp0 = ron->ro_rt->rt_ifp;
987
988 /*
989 * When cloning is required, try to allocate a route to the
990 * destination so that the caller can store path MTU
991 * information.
992 */
993 if (!clone) {
994 if (select_srcif) {
995 /* Keep the route locked */
996 goto validateroute;
997 }
998 RT_UNLOCK(ron->ro_rt);
999 goto done;
1000 }
1001 RT_UNLOCK(ron->ro_rt);
1002 }
1003
1004 /*
1005 * Use a cached route if it exists and is valid, else try to allocate
1006 * a new one. Note that we should check the address family of the
1007 * cached destination, in case of sharing the cache with IPv4.
1008 */
1009 if (ro == NULL)
1010 goto done;
1011 if (ro->ro_rt != NULL)
1012 RT_LOCK_SPIN(ro->ro_rt);
1013 if (ROUTE_UNUSABLE(ro) || (ro->ro_rt != NULL &&
1014 (satosin6(&ro->ro_dst)->sin6_family != AF_INET6 ||
1015 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst) ||
1016 (select_srcif && (ifa == NULL ||
1017 (ifa->ifa_ifp != ro->ro_rt->rt_ifp && !proxied_ifa)))))) {
1018 if (ro->ro_rt != NULL)
1019 RT_UNLOCK(ro->ro_rt);
1020
1021 ROUTE_RELEASE(ro);
1022 }
1023 if (ro->ro_rt == NULL) {
1024 struct sockaddr_in6 *sa6;
1025
1026 if (ro->ro_rt != NULL)
1027 RT_UNLOCK(ro->ro_rt);
1028 /* No route yet, so try to acquire one */
1029 bzero(&ro->ro_dst, sizeof (struct sockaddr_in6));
1030 sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
1031 sa6->sin6_family = AF_INET6;
1032 sa6->sin6_len = sizeof (struct sockaddr_in6);
1033 sa6->sin6_addr = *dst;
1034 if (IN6_IS_ADDR_MULTICAST(dst)) {
1035 ro->ro_rt = rtalloc1_scoped(
1036 &((struct route *)ro)->ro_dst, 0, 0, ifscope);
1037 } else {
1038 rtalloc_scoped((struct route *)ro, ifscope);
1039 }
1040 if (ro->ro_rt != NULL)
1041 RT_LOCK_SPIN(ro->ro_rt);
1042 }
1043
1044 /*
1045 * Do not care about the result if we have the nexthop
1046 * explicitly specified (in case we're asked to clone.)
1047 */
1048 if (opts != NULL && opts->ip6po_nexthop != NULL) {
1049 if (ro->ro_rt != NULL)
1050 RT_UNLOCK(ro->ro_rt);
1051 goto done;
1052 }
1053
1054 if (ro->ro_rt != NULL) {
1055 RT_LOCK_ASSERT_HELD(ro->ro_rt);
1056 ifp = ifp0 = ro->ro_rt->rt_ifp;
1057 } else {
1058 error = EHOSTUNREACH;
1059 }
1060 route = ro;
1061
1062 validateroute:
1063 if (select_srcif) {
1064 boolean_t has_route = (route != NULL && route->ro_rt != NULL);
1065 boolean_t srcif_selected = FALSE;
1066
1067 if (has_route)
1068 RT_LOCK_ASSERT_HELD(route->ro_rt);
1069 /*
1070 * If there is a non-loopback route with the wrong interface,
1071 * or if there is no interface configured with such an address,
1072 * blow it away. Except for local/loopback, we look for one
1073 * with a matching interface scope/index.
1074 */
1075 if (has_route && (ifa == NULL ||
1076 (ifa->ifa_ifp != ifp && ifp != lo_ifp) ||
1077 !(route->ro_rt->rt_flags & RTF_UP))) {
1078 /*
1079 * If the destination address belongs to a proxied
1080 * prefix, relax the requirement and allow the packet
1081 * to come out of the proxy interface with the source
1082 * address of the real interface.
1083 */
1084 if (ifa != NULL && proxied_ifa &&
1085 (route->ro_rt->rt_flags & (RTF_UP|RTF_PROXY)) ==
1086 (RTF_UP|RTF_PROXY)) {
1087 srcif_selected = TRUE;
1088 } else {
1089 if (ip6_select_srcif_debug) {
1090 if (ifa != NULL) {
1091 printf("%s->%s ifscope %d "
1092 "ro_if %s != ifa_if %s "
1093 "(cached route cleared)\n",
1094 s_src, s_dst,
1095 ifscope, if_name(ifp),
1096 if_name(ifa->ifa_ifp));
1097 } else {
1098 printf("%s->%s ifscope %d "
1099 "ro_if %s (no ifa_if "
1100 "found)\n", s_src, s_dst,
1101 ifscope, if_name(ifp));
1102 }
1103 }
1104 RT_UNLOCK(route->ro_rt);
1105 ROUTE_RELEASE(route);
1106 error = EHOSTUNREACH;
1107 /* Undo the settings done above */
1108 route = NULL;
1109 ifp = NULL; /* ditch ifp; keep ifp0 */
1110 has_route = FALSE;
1111 }
1112 } else if (has_route) {
1113 srcif_selected = TRUE;
1114 }
1115
1116 if (srcif_selected) {
1117 VERIFY(has_route);
1118 if (ifa != route->ro_srcia ||
1119 !(route->ro_flags & ROF_SRCIF_SELECTED)) {
1120 RT_CONVERT_LOCK(route->ro_rt);
1121 if (ifa != NULL)
1122 IFA_ADDREF(ifa); /* for route_in6 */
1123 if (route->ro_srcia != NULL)
1124 IFA_REMREF(route->ro_srcia);
1125 route->ro_srcia = ifa;
1126 route->ro_flags |= ROF_SRCIF_SELECTED;
1127 RT_GENID_SYNC(route->ro_rt);
1128 }
1129 RT_UNLOCK(route->ro_rt);
1130 }
1131 } else {
1132 if (ro->ro_rt != NULL)
1133 RT_UNLOCK(ro->ro_rt);
1134 if (ifp != NULL && opts != NULL &&
1135 opts->ip6po_pktinfo != NULL &&
1136 opts->ip6po_pktinfo->ipi6_ifindex != 0) {
1137 /*
1138 * Check if the outgoing interface conflicts with the
1139 * interface specified by ipi6_ifindex (if specified).
1140 * Note that loopback interface is always okay.
1141 * (this may happen when we are sending a packet to
1142 * one of our own addresses.)
1143 */
1144 if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index !=
1145 opts->ip6po_pktinfo->ipi6_ifindex) {
1146 error = EHOSTUNREACH;
1147 goto done;
1148 }
1149 }
1150 }
1151
1152 done:
1153 /*
1154 * Check for interface restrictions.
1155 */
1156 #define CHECK_RESTRICTIONS(_ip6oa, _ifp) \
1157 ((((_ip6oa)->ip6oa_flags & IP6OAF_NO_CELLULAR) && \
1158 IFNET_IS_CELLULAR(_ifp)) || \
1159 (((_ip6oa)->ip6oa_flags & IP6OAF_NO_EXPENSIVE) && \
1160 IFNET_IS_EXPENSIVE(_ifp)) || \
1161 (!((_ip6oa)->ip6oa_flags & IP6OAF_INTCOPROC_ALLOWED) && \
1162 IFNET_IS_INTCOPROC(_ifp)) || \
1163 (!((_ip6oa)->ip6oa_flags & IP6OAF_AWDL_UNRESTRICTED) && \
1164 IFNET_IS_AWDL_RESTRICTED(_ifp)))
1165
1166 if (error == 0 && ip6oa != NULL &&
1167 ((ifp && CHECK_RESTRICTIONS(ip6oa, ifp)) ||
1168 (route && route->ro_rt &&
1169 CHECK_RESTRICTIONS(ip6oa, route->ro_rt->rt_ifp)))) {
1170 if (route != NULL && route->ro_rt != NULL) {
1171 ROUTE_RELEASE(route);
1172 route = NULL;
1173 }
1174 ifp = NULL; /* ditch ifp; keep ifp0 */
1175 error = EHOSTUNREACH;
1176 ip6oa->ip6oa_retflags |= IP6OARF_IFDENIED;
1177 }
1178 #undef CHECK_RESTRICTIONS
1179
1180 /*
1181 * If the interface is disabled for IPv6, then ENETDOWN error.
1182 */
1183 if (error == 0 &&
1184 ifp != NULL && (ifp->if_eflags & IFEF_IPV6_DISABLED)) {
1185 error = ENETDOWN;
1186 }
1187
1188 if (ifp == NULL && (route == NULL || route->ro_rt == NULL)) {
1189 /*
1190 * This can happen if the caller did not pass a cached route
1191 * nor any other hints. We treat this case an error.
1192 */
1193 error = EHOSTUNREACH;
1194 }
1195 if (error == EHOSTUNREACH || error == ENETDOWN)
1196 ip6stat.ip6s_noroute++;
1197
1198 /*
1199 * We'll return ifp regardless of error, so pick it up from ifp0
1200 * in case it was nullified above. Caller is responsible for
1201 * releasing the ifp if it is non-NULL.
1202 */
1203 ifp = ifp0;
1204 if (retifp != NULL) {
1205 if (ifp != NULL)
1206 ifnet_reference(ifp); /* for caller */
1207 *retifp = ifp;
1208 }
1209
1210 if (retsrcia != NULL) {
1211 if (ifa != NULL)
1212 IFA_ADDREF(ifa); /* for caller */
1213 *retsrcia = (struct in6_ifaddr *)ifa;
1214 }
1215
1216 if (error == 0) {
1217 if (retrt != NULL && route != NULL)
1218 *retrt = route->ro_rt; /* ro_rt may be NULL */
1219 }
1220 if (ip6_select_srcif_debug) {
1221 printf("%s %s->%s ifscope %d ifa_if %s ro_if %s (error=%d)\n",
1222 __func__,
1223 s_src, s_dst, ifscope,
1224 (ifa != NULL) ? if_name(ifa->ifa_ifp) : "NONE",
1225 (ifp != NULL) ? if_name(ifp) : "NONE", error);
1226 }
1227
1228 if (ifa != NULL)
1229 IFA_REMREF(ifa);
1230
1231 return (error);
1232 }
1233
1234 /*
1235 * Regardless of error, it will return an ifp with a reference held if the
1236 * caller provides a non-NULL retifp. The caller is responsible for checking
1237 * if the returned ifp is valid and release its reference at all times.
1238 */
1239 int
1240 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
1241 struct ip6_moptions *mopts, struct route_in6 *ro,
1242 struct ip6_out_args *ip6oa, struct ifnet **retifp)
1243 {
1244 int err = 0;
1245 struct route_in6 sro;
1246 struct rtentry *rt = NULL;
1247
1248 if (ro == NULL) {
1249 bzero(&sro, sizeof (sro));
1250 ro = &sro;
1251 }
1252
1253 if ((err = selectroute(NULL, dstsock, opts, mopts, NULL, ro, retifp,
1254 &rt, 0, 1, ip6oa)) != 0)
1255 goto done;
1256
1257 /*
1258 * do not use a rejected or black hole route.
1259 * XXX: this check should be done in the L2 output routine.
1260 * However, if we skipped this check here, we'd see the following
1261 * scenario:
1262 * - install a rejected route for a scoped address prefix
1263 * (like fe80::/10)
1264 * - send a packet to a destination that matches the scoped prefix,
1265 * with ambiguity about the scope zone.
1266 * - pick the outgoing interface from the route, and disambiguate the
1267 * scope zone with the interface.
1268 * - ip6_output() would try to get another route with the "new"
1269 * destination, which may be valid.
1270 * - we'd see no error on output.
1271 * Although this may not be very harmful, it should still be confusing.
1272 * We thus reject the case here.
1273 */
1274 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
1275 err = ((rt->rt_flags & RTF_HOST) ? EHOSTUNREACH : ENETUNREACH);
1276 goto done;
1277 }
1278
1279 /*
1280 * Adjust the "outgoing" interface. If we're going to loop the packet
1281 * back to ourselves, the ifp would be the loopback interface.
1282 * However, we'd rather know the interface associated to the
1283 * destination address (which should probably be one of our own
1284 * addresses.)
1285 */
1286 if (rt != NULL && rt->rt_ifa != NULL && rt->rt_ifa->ifa_ifp != NULL &&
1287 retifp != NULL) {
1288 ifnet_reference(rt->rt_ifa->ifa_ifp);
1289 if (*retifp != NULL)
1290 ifnet_release(*retifp);
1291 *retifp = rt->rt_ifa->ifa_ifp;
1292 }
1293
1294 done:
1295 if (ro == &sro) {
1296 VERIFY(rt == NULL || rt == ro->ro_rt);
1297 ROUTE_RELEASE(ro);
1298 }
1299
1300 /*
1301 * retifp might point to a valid ifp with a reference held;
1302 * caller is responsible for releasing it if non-NULL.
1303 */
1304 return (err);
1305 }
1306
1307 /*
1308 * Regardless of error, it will return an ifp with a reference held if the
1309 * caller provides a non-NULL retifp. The caller is responsible for checking
1310 * if the returned ifp is valid and release its reference at all times.
1311 *
1312 * clone - meaningful only for bsdi and freebsd
1313 */
1314 int
1315 in6_selectroute(struct sockaddr_in6 *srcsock, struct sockaddr_in6 *dstsock,
1316 struct ip6_pktopts *opts, struct ip6_moptions *mopts,
1317 struct in6_ifaddr **retsrcia, struct route_in6 *ro, struct ifnet **retifp,
1318 struct rtentry **retrt, int clone, struct ip6_out_args *ip6oa)
1319 {
1320
1321 return (selectroute(srcsock, dstsock, opts, mopts, retsrcia, ro, retifp,
1322 retrt, clone, 0, ip6oa));
1323 }
1324
1325 /*
1326 * Default hop limit selection. The precedence is as follows:
1327 * 1. Hoplimit value specified via ioctl.
1328 * 2. (If the outgoing interface is detected) the current
1329 * hop limit of the interface specified by router advertisement.
1330 * 3. The system default hoplimit.
1331 */
1332 int
1333 in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
1334 {
1335 if (in6p && in6p->in6p_hops >= 0) {
1336 return (in6p->in6p_hops);
1337 } else if (NULL != ifp) {
1338 u_int8_t chlim;
1339 struct nd_ifinfo *ndi = ND_IFINFO(ifp);
1340 if (ndi && ndi->initialized) {
1341 /* access chlim without lock, for performance */
1342 chlim = ndi->chlim;
1343 } else {
1344 chlim = ip6_defhlim;
1345 }
1346 return (chlim);
1347 }
1348
1349 return (ip6_defhlim);
1350 }
1351
1352 /*
1353 * XXX: this is borrowed from in6_pcbbind(). If possible, we should
1354 * share this function by all *bsd*...
1355 */
1356 int
1357 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct proc *p,
1358 int locked)
1359 {
1360 #pragma unused(laddr)
1361 struct socket *so = inp->inp_socket;
1362 u_int16_t lport = 0, first, last, *lastport;
1363 int count, error = 0, wild = 0;
1364 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1365 kauth_cred_t cred;
1366 if (!locked) { /* Make sure we don't run into a deadlock: 4052373 */
1367 if (!lck_rw_try_lock_exclusive(pcbinfo->ipi_lock)) {
1368 socket_unlock(inp->inp_socket, 0);
1369 lck_rw_lock_exclusive(pcbinfo->ipi_lock);
1370 socket_lock(inp->inp_socket, 0);
1371 }
1372
1373 /*
1374 * Check if a local port was assigned to the inp while
1375 * this thread was waiting for the pcbinfo lock
1376 */
1377 if (inp->inp_lport != 0) {
1378 VERIFY(inp->inp_flags2 & INP2_INHASHLIST);
1379 lck_rw_done(pcbinfo->ipi_lock);
1380
1381 /*
1382 * It is not an error if another thread allocated
1383 * a port
1384 */
1385 return (0);
1386 }
1387 }
1388
1389 /* XXX: this is redundant when called from in6_pcbbind */
1390 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
1391 wild = INPLOOKUP_WILDCARD;
1392
1393 if (inp->inp_flags & INP_HIGHPORT) {
1394 first = ipport_hifirstauto; /* sysctl */
1395 last = ipport_hilastauto;
1396 lastport = &pcbinfo->ipi_lasthi;
1397 } else if (inp->inp_flags & INP_LOWPORT) {
1398 cred = kauth_cred_proc_ref(p);
1399 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
1400 kauth_cred_unref(&cred);
1401 if (error != 0) {
1402 if (!locked)
1403 lck_rw_done(pcbinfo->ipi_lock);
1404 return (error);
1405 }
1406 first = ipport_lowfirstauto; /* 1023 */
1407 last = ipport_lowlastauto; /* 600 */
1408 lastport = &pcbinfo->ipi_lastlow;
1409 } else {
1410 first = ipport_firstauto; /* sysctl */
1411 last = ipport_lastauto;
1412 lastport = &pcbinfo->ipi_lastport;
1413 }
1414 /*
1415 * Simple check to ensure all ports are not used up causing
1416 * a deadlock here.
1417 *
1418 * We split the two cases (up and down) so that the direction
1419 * is not being tested on each round of the loop.
1420 */
1421 if (first > last) {
1422 /*
1423 * counting down
1424 */
1425 count = first - last;
1426
1427 do {
1428 if (count-- < 0) { /* completely used? */
1429 /*
1430 * Undo any address bind that may have
1431 * occurred above.
1432 */
1433 inp->in6p_laddr = in6addr_any;
1434 inp->in6p_last_outifp = NULL;
1435 if (!locked)
1436 lck_rw_done(pcbinfo->ipi_lock);
1437 return (EAGAIN);
1438 }
1439 --*lastport;
1440 if (*lastport > first || *lastport < last)
1441 *lastport = first;
1442 lport = htons(*lastport);
1443 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport,
1444 wild));
1445 } else {
1446 /* counting up */
1447 count = last - first;
1448
1449 do {
1450 if (count-- < 0) { /* completely used? */
1451 /*
1452 * Undo any address bind that may have
1453 * occurred above.
1454 */
1455 inp->in6p_laddr = in6addr_any;
1456 inp->in6p_last_outifp = NULL;
1457 if (!locked)
1458 lck_rw_done(pcbinfo->ipi_lock);
1459 return (EAGAIN);
1460 }
1461 ++*lastport;
1462 if (*lastport < first || *lastport > last)
1463 *lastport = first;
1464 lport = htons(*lastport);
1465 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, lport,
1466 wild));
1467 }
1468
1469 inp->inp_lport = lport;
1470 inp->inp_flags |= INP_ANONPORT;
1471
1472 if (in_pcbinshash(inp, 1) != 0) {
1473 inp->in6p_laddr = in6addr_any;
1474 inp->in6p_last_outifp = NULL;
1475
1476 inp->inp_lport = 0;
1477 inp->inp_flags &= ~INP_ANONPORT;
1478 if (!locked)
1479 lck_rw_done(pcbinfo->ipi_lock);
1480 return (EAGAIN);
1481 }
1482
1483 if (!locked)
1484 lck_rw_done(pcbinfo->ipi_lock);
1485 return (0);
1486 }
1487
1488 /*
1489 * The followings are implementation of the policy table using a
1490 * simple tail queue.
1491 * XXX such details should be hidden.
1492 * XXX implementation using binary tree should be more efficient.
1493 */
1494 struct addrsel_policyent {
1495 TAILQ_ENTRY(addrsel_policyent) ape_entry;
1496 struct in6_addrpolicy ape_policy;
1497 };
1498
1499 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
1500
1501 struct addrsel_policyhead addrsel_policytab;
1502
1503 static void
1504 init_policy_queue(void)
1505 {
1506 TAILQ_INIT(&addrsel_policytab);
1507 }
1508
1509 void
1510 addrsel_policy_init(void)
1511 {
1512 /*
1513 * Default address selection policy based on RFC 6724.
1514 */
1515 static const struct in6_addrpolicy defaddrsel[] = {
1516 /* Loopback -- prefix=::1/128, precedence=50, label=0 */
1517 {
1518 .addr = {
1519 .sin6_family = AF_INET6,
1520 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
1521 .sin6_len = sizeof (struct sockaddr_in6)
1522 },
1523 .addrmask = {
1524 .sin6_family = AF_INET6,
1525 .sin6_addr = IN6MASK128,
1526 .sin6_len = sizeof (struct sockaddr_in6)
1527 },
1528 .preced = 50,
1529 .label = 0
1530 },
1531
1532 /* Unspecified -- prefix=::/0, precedence=40, label=1 */
1533 {
1534 .addr = {
1535 .sin6_family = AF_INET6,
1536 .sin6_addr = IN6ADDR_ANY_INIT,
1537 .sin6_len = sizeof (struct sockaddr_in6)
1538 },
1539 .addrmask = {
1540 .sin6_family = AF_INET6,
1541 .sin6_addr = IN6MASK0,
1542 .sin6_len = sizeof (struct sockaddr_in6)
1543 },
1544 .preced = 40,
1545 .label = 1
1546 },
1547
1548 /* IPv4 Mapped -- prefix=::ffff:0:0/96, precedence=35, label=4 */
1549 {
1550 .addr = {
1551 .sin6_family = AF_INET6,
1552 .sin6_addr = IN6ADDR_V4MAPPED_INIT,
1553 .sin6_len = sizeof (struct sockaddr_in6)
1554 },
1555 .addrmask = {
1556 .sin6_family = AF_INET6,
1557 .sin6_addr = IN6MASK96,
1558 .sin6_len = sizeof (struct sockaddr_in6)
1559 },
1560 .preced = 35,
1561 .label = 4
1562 },
1563
1564 /* 6to4 -- prefix=2002::/16, precedence=30, label=2 */
1565 {
1566 .addr = {
1567 .sin6_family = AF_INET6,
1568 .sin6_addr = {{{ 0x20, 0x02 }}},
1569 .sin6_len = sizeof (struct sockaddr_in6)
1570 },
1571 .addrmask = {
1572 .sin6_family = AF_INET6,
1573 .sin6_addr = IN6MASK16,
1574 .sin6_len = sizeof (struct sockaddr_in6)
1575 },
1576 .preced = 30,
1577 .label = 2
1578 },
1579
1580 /* Teredo -- prefix=2001::/32, precedence=5, label=5 */
1581 {
1582 .addr = {
1583 .sin6_family = AF_INET6,
1584 .sin6_addr = {{{ 0x20, 0x01 }}},
1585 .sin6_len = sizeof (struct sockaddr_in6)
1586 },
1587 .addrmask = {
1588 .sin6_family = AF_INET6,
1589 .sin6_addr = IN6MASK32,
1590 .sin6_len = sizeof (struct sockaddr_in6)
1591 },
1592 .preced = 5,
1593 .label = 5
1594 },
1595
1596 /* Unique Local (ULA) -- prefix=fc00::/7, precedence=3, label=13 */
1597 {
1598 .addr = {
1599 .sin6_family = AF_INET6,
1600 .sin6_addr = {{{ 0xfc }}},
1601 .sin6_len = sizeof (struct sockaddr_in6)
1602 },
1603 .addrmask = {
1604 .sin6_family = AF_INET6,
1605 .sin6_addr = IN6MASK7,
1606 .sin6_len = sizeof (struct sockaddr_in6)
1607 },
1608 .preced = 3,
1609 .label = 13
1610 },
1611
1612 /* IPv4 Compatible -- prefix=::/96, precedence=1, label=3 */
1613 {
1614 .addr = {
1615 .sin6_family = AF_INET6,
1616 .sin6_addr = IN6ADDR_ANY_INIT,
1617 .sin6_len = sizeof (struct sockaddr_in6)
1618 },
1619 .addrmask = {
1620 .sin6_family = AF_INET6,
1621 .sin6_addr = IN6MASK96,
1622 .sin6_len = sizeof (struct sockaddr_in6)
1623 },
1624 .preced = 1,
1625 .label = 3
1626 },
1627
1628 /* Site-local (deprecated) -- prefix=fec0::/10, precedence=1, label=11 */
1629 {
1630 .addr = {
1631 .sin6_family = AF_INET6,
1632 .sin6_addr = {{{ 0xfe, 0xc0 }}},
1633 .sin6_len = sizeof (struct sockaddr_in6)
1634 },
1635 .addrmask = {
1636 .sin6_family = AF_INET6,
1637 .sin6_addr = IN6MASK16,
1638 .sin6_len = sizeof (struct sockaddr_in6)
1639 },
1640 .preced = 1,
1641 .label = 11
1642 },
1643
1644 /* 6bone (deprecated) -- prefix=3ffe::/16, precedence=1, label=12 */
1645 {
1646 .addr = {
1647 .sin6_family = AF_INET6,
1648 .sin6_addr = {{{ 0x3f, 0xfe }}},
1649 .sin6_len = sizeof (struct sockaddr_in6)
1650 },
1651 .addrmask = {
1652 .sin6_family = AF_INET6,
1653 .sin6_addr = IN6MASK16,
1654 .sin6_len = sizeof (struct sockaddr_in6)
1655 },
1656 .preced = 1,
1657 .label = 12
1658 },
1659 };
1660 int i;
1661
1662 init_policy_queue();
1663
1664 /* initialize the "last resort" policy */
1665 bzero(&defaultaddrpolicy, sizeof (defaultaddrpolicy));
1666 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
1667
1668 for (i = 0; i < sizeof (defaddrsel) / sizeof (defaddrsel[0]); i++)
1669 add_addrsel_policyent(&defaddrsel[i]);
1670
1671 }
1672
1673 struct in6_addrpolicy *
1674 in6_addrsel_lookup_policy(struct sockaddr_in6 *key)
1675 {
1676 struct in6_addrpolicy *match = NULL;
1677
1678 ADDRSEL_LOCK();
1679 match = match_addrsel_policy(key);
1680
1681 if (match == NULL)
1682 match = &defaultaddrpolicy;
1683 else
1684 match->use++;
1685 ADDRSEL_UNLOCK();
1686
1687 return (match);
1688 }
1689
1690 static struct in6_addrpolicy *
1691 match_addrsel_policy(struct sockaddr_in6 *key)
1692 {
1693 struct addrsel_policyent *pent;
1694 struct in6_addrpolicy *bestpol = NULL, *pol;
1695 int matchlen, bestmatchlen = -1;
1696 u_char *mp, *ep, *k, *p, m;
1697
1698 TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) {
1699 matchlen = 0;
1700
1701 pol = &pent->ape_policy;
1702 mp = (u_char *)&pol->addrmask.sin6_addr;
1703 ep = mp + 16; /* XXX: scope field? */
1704 k = (u_char *)&key->sin6_addr;
1705 p = (u_char *)&pol->addr.sin6_addr;
1706 for (; mp < ep && *mp; mp++, k++, p++) {
1707 m = *mp;
1708 if ((*k & m) != *p)
1709 goto next; /* not match */
1710 if (m == 0xff) /* short cut for a typical case */
1711 matchlen += 8;
1712 else {
1713 while (m >= 0x80) {
1714 matchlen++;
1715 m <<= 1;
1716 }
1717 }
1718 }
1719
1720 /* matched. check if this is better than the current best. */
1721 if (bestpol == NULL ||
1722 matchlen > bestmatchlen) {
1723 bestpol = pol;
1724 bestmatchlen = matchlen;
1725 }
1726
1727 next:
1728 continue;
1729 }
1730
1731 return (bestpol);
1732 }
1733
1734 static int
1735 add_addrsel_policyent(const struct in6_addrpolicy *newpolicy)
1736 {
1737 struct addrsel_policyent *new, *pol;
1738
1739 MALLOC(new, struct addrsel_policyent *, sizeof (*new), M_IFADDR,
1740 M_WAITOK);
1741
1742 ADDRSEL_LOCK();
1743
1744 /* duplication check */
1745 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1746 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
1747 &pol->ape_policy.addr.sin6_addr) &&
1748 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
1749 &pol->ape_policy.addrmask.sin6_addr)) {
1750 ADDRSEL_UNLOCK();
1751 FREE(new, M_IFADDR);
1752 return (EEXIST); /* or override it? */
1753 }
1754 }
1755
1756 bzero(new, sizeof (*new));
1757
1758 /* XXX: should validate entry */
1759 new->ape_policy = *newpolicy;
1760
1761 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry);
1762 ADDRSEL_UNLOCK();
1763
1764 return (0);
1765 }
1766 #ifdef ENABLE_ADDRSEL
1767 static int
1768 delete_addrsel_policyent(const struct in6_addrpolicy *key)
1769 {
1770 struct addrsel_policyent *pol;
1771
1772
1773 ADDRSEL_LOCK();
1774
1775 /* search for the entry in the table */
1776 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1777 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
1778 &pol->ape_policy.addr.sin6_addr) &&
1779 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
1780 &pol->ape_policy.addrmask.sin6_addr)) {
1781 break;
1782 }
1783 }
1784 if (pol == NULL) {
1785 ADDRSEL_UNLOCK();
1786 return (ESRCH);
1787 }
1788
1789 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);
1790 FREE(pol, M_IFADDR);
1791 pol = NULL;
1792 ADDRSEL_UNLOCK();
1793
1794 return (0);
1795 }
1796 #endif /* ENABLE_ADDRSEL */
1797
1798 int
1799 walk_addrsel_policy(int (*callback)(const struct in6_addrpolicy *, void *),
1800 void *w)
1801 {
1802 struct addrsel_policyent *pol;
1803 int error = 0;
1804
1805 ADDRSEL_LOCK();
1806 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
1807 if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
1808 ADDRSEL_UNLOCK();
1809 return (error);
1810 }
1811 }
1812 ADDRSEL_UNLOCK();
1813 return (error);
1814 }
1815 /*
1816 * Subroutines to manage the address selection policy table via sysctl.
1817 */
1818 struct walkarg {
1819 struct sysctl_req *w_req;
1820 };
1821
1822
1823 static int
1824 dump_addrsel_policyent(const struct in6_addrpolicy *pol, void *arg)
1825 {
1826 int error = 0;
1827 struct walkarg *w = arg;
1828
1829 error = SYSCTL_OUT(w->w_req, pol, sizeof (*pol));
1830
1831 return (error);
1832 }
1833
1834 static int
1835 in6_src_sysctl SYSCTL_HANDLER_ARGS
1836 {
1837 #pragma unused(oidp, arg1, arg2)
1838 struct walkarg w;
1839
1840 if (req->newptr)
1841 return (EPERM);
1842 bzero(&w, sizeof (w));
1843 w.w_req = req;
1844
1845 return (walk_addrsel_policy(dump_addrsel_policyent, &w));
1846 }
1847
1848
1849 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
1850 CTLFLAG_RD | CTLFLAG_LOCKED, in6_src_sysctl, "");
1851 int
1852 in6_src_ioctl(u_long cmd, caddr_t data)
1853 {
1854 int i;
1855 struct in6_addrpolicy ent0;
1856
1857 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
1858 return (EOPNOTSUPP); /* check for safety */
1859
1860 bcopy(data, &ent0, sizeof (ent0));
1861
1862 if (ent0.label == ADDR_LABEL_NOTAPP)
1863 return (EINVAL);
1864 /* check if the prefix mask is consecutive. */
1865 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
1866 return (EINVAL);
1867 /* clear trailing garbages (if any) of the prefix address. */
1868 for (i = 0; i < 4; i++) {
1869 ent0.addr.sin6_addr.s6_addr32[i] &=
1870 ent0.addrmask.sin6_addr.s6_addr32[i];
1871 }
1872 ent0.use = 0;
1873
1874 switch (cmd) {
1875 case SIOCAADDRCTL_POLICY:
1876 #ifdef ENABLE_ADDRSEL
1877 return (add_addrsel_policyent(&ent0));
1878 #else
1879 return (ENOTSUP);
1880 #endif
1881 case SIOCDADDRCTL_POLICY:
1882 #ifdef ENABLE_ADDRSEL
1883 return (delete_addrsel_policyent(&ent0));
1884 #else
1885 return (ENOTSUP);
1886 #endif
1887 }
1888
1889 return (0); /* XXX: compromise compilers */
1890 }
1891
1892 /*
1893 * generate kernel-internal form (scopeid embedded into s6_addr16[1]).
1894 * If the address scope of is link-local, embed the interface index in the
1895 * address. The routine determines our precedence
1896 * between advanced API scope/interface specification and basic API
1897 * specification.
1898 *
1899 * this function should be nuked in the future, when we get rid of
1900 * embedded scopeid thing.
1901 *
1902 * XXX actually, it is over-specification to return ifp against sin6_scope_id.
1903 * there can be multiple interfaces that belong to a particular scope zone
1904 * (in specification, we have 1:N mapping between a scope zone and interfaces).
1905 * we may want to change the function to return something other than ifp.
1906 */
1907 int
1908 in6_embedscope(struct in6_addr *in6, const struct sockaddr_in6 *sin6,
1909 struct in6pcb *in6p, struct ifnet **ifpp, struct ip6_pktopts *opt)
1910 {
1911 struct ifnet *ifp = NULL;
1912 u_int32_t scopeid;
1913 struct ip6_pktopts *optp = NULL;
1914
1915 *in6 = sin6->sin6_addr;
1916 scopeid = sin6->sin6_scope_id;
1917 if (ifpp != NULL)
1918 *ifpp = NULL;
1919
1920 /*
1921 * don't try to read sin6->sin6_addr beyond here, since the caller may
1922 * ask us to overwrite existing sockaddr_in6
1923 */
1924
1925 #ifdef ENABLE_DEFAULT_SCOPE
1926 if (scopeid == 0)
1927 scopeid = scope6_addr2default(in6);
1928 #endif
1929
1930 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) {
1931 struct in6_pktinfo *pi;
1932 struct ifnet *im6o_multicast_ifp = NULL;
1933
1934 if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
1935 in6p->in6p_moptions != NULL) {
1936 IM6O_LOCK(in6p->in6p_moptions);
1937 im6o_multicast_ifp =
1938 in6p->in6p_moptions->im6o_multicast_ifp;
1939 IM6O_UNLOCK(in6p->in6p_moptions);
1940 }
1941
1942 if (opt != NULL)
1943 optp = opt;
1944 else if (in6p != NULL)
1945 optp = in6p->in6p_outputopts;
1946 /*
1947 * KAME assumption: link id == interface id
1948 */
1949 if (in6p != NULL && optp != NULL &&
1950 (pi = optp->ip6po_pktinfo) != NULL &&
1951 pi->ipi6_ifindex != 0) {
1952 /* ifp is needed here if only we're returning it */
1953 if (ifpp != NULL) {
1954 ifnet_head_lock_shared();
1955 ifp = ifindex2ifnet[pi->ipi6_ifindex];
1956 ifnet_head_done();
1957 }
1958 in6->s6_addr16[1] = htons(pi->ipi6_ifindex);
1959 } else if (in6p != NULL && IN6_IS_ADDR_MULTICAST(in6) &&
1960 in6p->in6p_moptions != NULL && im6o_multicast_ifp != NULL) {
1961 ifp = im6o_multicast_ifp;
1962 in6->s6_addr16[1] = htons(ifp->if_index);
1963 } else if (scopeid != 0) {
1964 /*
1965 * Since scopeid is unsigned, we only have to check it
1966 * against if_index (ifnet_head_lock not needed since
1967 * if_index is an ever-increasing integer.)
1968 */
1969 if (if_index < scopeid)
1970 return (ENXIO); /* XXX EINVAL? */
1971
1972 /* ifp is needed here only if we're returning it */
1973 if (ifpp != NULL) {
1974 ifnet_head_lock_shared();
1975 ifp = ifindex2ifnet[scopeid];
1976 ifnet_head_done();
1977 }
1978 /* XXX assignment to 16bit from 32bit variable */
1979 in6->s6_addr16[1] = htons(scopeid & 0xffff);
1980 }
1981
1982 if (ifpp != NULL) {
1983 if (ifp != NULL)
1984 ifnet_reference(ifp); /* for caller */
1985 *ifpp = ifp;
1986 }
1987 }
1988
1989 return (0);
1990 }
1991
1992 /*
1993 * generate standard sockaddr_in6 from embedded form.
1994 * touches sin6_addr and sin6_scope_id only.
1995 *
1996 * this function should be nuked in the future, when we get rid of
1997 * embedded scopeid thing.
1998 */
1999 int
2000 in6_recoverscope(
2001 struct sockaddr_in6 *sin6,
2002 const struct in6_addr *in6,
2003 struct ifnet *ifp)
2004 {
2005 u_int32_t scopeid;
2006
2007 sin6->sin6_addr = *in6;
2008
2009 /*
2010 * don't try to read *in6 beyond here, since the caller may
2011 * ask us to overwrite existing sockaddr_in6
2012 */
2013
2014 sin6->sin6_scope_id = 0;
2015 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) {
2016 /*
2017 * KAME assumption: link id == interface id
2018 */
2019 scopeid = ntohs(sin6->sin6_addr.s6_addr16[1]);
2020 if (scopeid) {
2021 /*
2022 * sanity check
2023 *
2024 * Since scopeid is unsigned, we only have to check it
2025 * against if_index
2026 */
2027 if (if_index < scopeid)
2028 return (ENXIO);
2029 if (ifp && ifp->if_index != scopeid)
2030 return (ENXIO);
2031 sin6->sin6_addr.s6_addr16[1] = 0;
2032 sin6->sin6_scope_id = scopeid;
2033 }
2034 }
2035
2036 return (0);
2037 }