X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/8f6c56a50524aa785f7e596d52dddfb331e18961..d190cdc3f5544636abb56dc1874be391d3e1b148:/bsd/netinet6/in6_pcb.c diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index 3c28774df..54b9555f6 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -102,10 +102,13 @@ #include #include #include +#include +#include #include #include #include +#include #include #include @@ -117,15 +120,11 @@ #include #include #include +#include #include #include -#include "faith.h" -#if defined(NFAITH) && NFAITH > 0 -#include -#endif - #if IPSEC #include #if INET6 @@ -136,58 +135,108 @@ #include #endif #include -extern lck_mtx_t *sadb_mutex; #endif /* IPSEC */ -struct in6_addr zeroin6_addr; +#if NECP +#include +#endif /* NECP */ + +/* + * in6_pcblookup_local_and_cleanup does everything + * in6_pcblookup_local does but it checks for a socket + * that's going away. Since we know that the lock is + * held read+write when this function is called, we + * can safely dispose of this socket like the slow + * timer would usually do and return NULL. This is + * great for bind. + */ +static struct inpcb * +in6_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo, + struct in6_addr *laddr, u_int lport_arg, int wild_okay) +{ + struct inpcb *inp; + + /* Perform normal lookup */ + inp = in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay); + /* Check if we found a match but it's waiting to be disposed */ + if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) { + struct socket *so = inp->inp_socket; + + lck_mtx_lock(&inp->inpcb_mtx); + + if (so->so_usecount == 0) { + if (inp->inp_state != INPCB_STATE_DEAD) + in6_pcbdetach(inp); + in_pcbdispose(inp); /* will unlock & destroy */ + inp = NULL; + } else { + lck_mtx_unlock(&inp->inpcb_mtx); + } + } + + return (inp); +} + +/* + * Bind an INPCB to an address and/or port. This routine should not alter + * the caller-supplied local address "nam". + */ int -in6_pcbbind( - struct inpcb *inp, - struct sockaddr *nam, - struct proc *p) +in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p) { struct socket *so = inp->inp_socket; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; u_short lport = 0; int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); + struct ifnet *outif = NULL; + struct sockaddr_in6 sin6; + int error; + kauth_cred_t cred; if (!in6_ifaddrs) /* XXX broken! */ return (EADDRNOTAVAIL); if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) - return(EINVAL); - if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) + return (EINVAL); + if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) wild = 1; + socket_unlock(so, 0); /* keep reference */ - lck_rw_lock_exclusive(pcbinfo->mtx); - if (nam) { - sin6 = (struct sockaddr_in6 *)nam; - if (nam->sa_len != sizeof(*sin6)) { - lck_rw_done(pcbinfo->mtx); + lck_rw_lock_exclusive(pcbinfo->ipi_lock); + + bzero(&sin6, sizeof (sin6)); + if (nam != NULL) { + if (nam->sa_len != sizeof (struct sockaddr_in6)) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EINVAL); + return (EINVAL); } /* * family check. */ if (nam->sa_family != AF_INET6) { - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EAFNOSUPPORT); + return (EAFNOSUPPORT); } + lport = SIN6(nam)->sin6_port; + + *(&sin6) = *SIN6(nam); /* KAME hack: embed scopeid */ - if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) { - lck_rw_done(pcbinfo->mtx); + if (in6_embedscope(&sin6.sin6_addr, &sin6, inp, NULL, + NULL) != 0) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return EINVAL; + return (EINVAL); } - /* this must be cleared for ifa_ifwithaddr() */ - sin6->sin6_scope_id = 0; - lport = sin6->sin6_port; - if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { + /* Sanitize local copy for address searches */ + sin6.sin6_flowinfo = 0; + sin6.sin6_scope_id = 0; + sin6.sin6_port = 0; + + if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { /* * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; * allow compepte duplication of binding if @@ -197,202 +246,266 @@ in6_pcbbind( */ if (so->so_options & SO_REUSEADDR) reuseport = SO_REUSEADDR|SO_REUSEPORT; - } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { - struct ifaddr *ia = NULL; + } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { + struct ifaddr *ifa; - sin6->sin6_port = 0; /* yech... */ - if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) { - lck_rw_done(pcbinfo->mtx); + ifa = ifa_ifwithaddr(SA(&sin6)); + if (ifa == NULL) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EADDRNOTAVAIL); - } - - /* - * XXX: bind to an anycast address might accidentally - * cause sending a packet with anycast source address. - * We should allow to bind to a deprecated address, since - * the application dare to use it. - */ - if (ia && - ((struct in6_ifaddr *)ia)->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { - ifafree(ia); - lck_rw_done(pcbinfo->mtx); - socket_lock(so, 0); - return(EADDRNOTAVAIL); + return (EADDRNOTAVAIL); + } else { + /* + * XXX: bind to an anycast address might + * accidentally cause sending a packet with + * anycast source address. We should allow + * to bind to a deprecated address, since + * the application dare to use it. + */ + IFA_LOCK_SPIN(ifa); + if (((struct in6_ifaddr *)ifa)->ia6_flags & + (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| + IN6_IFF_DETACHED)) { + IFA_UNLOCK(ifa); + IFA_REMREF(ifa); + lck_rw_done(pcbinfo->ipi_lock); + socket_lock(so, 0); + return (EADDRNOTAVAIL); + } + /* + * Opportunistically determine the outbound + * interface that may be used; this may not + * hold true if we end up using a route + * going over a different interface, e.g. + * when sending to a local address. This + * will get updated again after sending. + */ + outif = ifa->ifa_ifp; + IFA_UNLOCK(ifa); + IFA_REMREF(ifa); } - ifafree(ia); - ia = NULL; } - if (lport) { + if (lport != 0) { struct inpcb *t; + uid_t u; /* GROSS */ - if (ntohs(lport) < IPV6PORT_RESERVED && p && - ((so->so_state & SS_PRIV) == 0)) { - lck_rw_done(pcbinfo->mtx); - socket_lock(so, 0); - return(EACCES); + if (ntohs(lport) < IPV6PORT_RESERVED) { + cred = kauth_cred_proc_ref(p); + error = priv_check_cred(cred, + PRIV_NETINET_RESERVEDPORT, 0); + kauth_cred_unref(&cred); + if (error != 0) { + lck_rw_done(pcbinfo->ipi_lock); + socket_lock(so, 0); + return (EACCES); + } } - - if (so->so_uid && - !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { - t = in6_pcblookup_local(pcbinfo, - &sin6->sin6_addr, lport, + if (!IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr) && + (u = kauth_cred_getuid(so->so_cred)) != 0) { + t = in6_pcblookup_local_and_cleanup(pcbinfo, + &sin6.sin6_addr, lport, INPLOOKUP_WILDCARD); - if (t && - (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || - !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || - (t->inp_socket->so_options & - SO_REUSEPORT) == 0) && - so->so_uid != t->inp_socket->so_uid) { - lck_rw_done(pcbinfo->mtx); + if (t != NULL && (!IN6_IS_ADDR_UNSPECIFIED( + &sin6.sin6_addr) || + !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || + !(t->inp_socket->so_options & + SO_REUSEPORT)) && (u != kauth_cred_getuid( + t->inp_socket->so_cred)) && + !(t->inp_socket->so_flags & + SOF_REUSESHAREUID)) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRINUSE); } - if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && - IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(inp->inp_flags & IN6P_IPV6_V6ONLY) && + IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { struct sockaddr_in sin; - in6_sin6_2_sin(&sin, sin6); - t = in_pcblookup_local(pcbinfo, - sin.sin_addr, lport, - INPLOOKUP_WILDCARD); - if (t && - (so->so_uid != - t->inp_socket->so_uid) && - (ntohl(t->inp_laddr.s_addr) != - INADDR_ANY || - INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket))) { - - lck_rw_done(pcbinfo->mtx); + in6_sin6_2_sin(&sin, &sin6); + t = in_pcblookup_local_and_cleanup( + pcbinfo, sin.sin_addr, lport, + INPLOOKUP_WILDCARD); + if (t != NULL && + !(t->inp_socket->so_options & + SO_REUSEPORT) && + (kauth_cred_getuid(so->so_cred) != + kauth_cred_getuid(t->inp_socket-> + so_cred)) && (t->inp_laddr.s_addr != + INADDR_ANY || SOCK_DOM(so) == + SOCK_DOM(t->inp_socket))) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRINUSE); } } } - t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, - lport, wild); - if (t && (reuseport & t->inp_socket->so_options) == 0) { - lck_rw_done(pcbinfo->mtx); + t = in6_pcblookup_local_and_cleanup(pcbinfo, + &sin6.sin6_addr, lport, wild); + if (t != NULL && + (reuseport & t->inp_socket->so_options) == 0) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); - return(EADDRINUSE); + return (EADDRINUSE); } - if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && - IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { + if (!(inp->inp_flags & IN6P_IPV6_V6ONLY) && + IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { struct sockaddr_in sin; - in6_sin6_2_sin(&sin, sin6); - t = in_pcblookup_local(pcbinfo, sin.sin_addr, - lport, wild); - if (t && - (reuseport & t->inp_socket->so_options) - == 0 && - (ntohl(t->inp_laddr.s_addr) - != INADDR_ANY || - INP_SOCKAF(so) == - INP_SOCKAF(t->inp_socket))) { - lck_rw_done(pcbinfo->mtx); + in6_sin6_2_sin(&sin, &sin6); + t = in_pcblookup_local_and_cleanup(pcbinfo, + sin.sin_addr, lport, wild); + if (t != NULL && (reuseport & + t->inp_socket->so_options) == 0 && + (t->inp_laddr.s_addr != INADDR_ANY || + SOCK_DOM(so) == SOCK_DOM(t->inp_socket))) { + lck_rw_done(pcbinfo->ipi_lock); socket_lock(so, 0); return (EADDRINUSE); } } } - inp->in6p_laddr = sin6->sin6_addr; } + socket_lock(so, 0); + /* + * We unlocked socket's protocol lock for a long time. + * The socket might have been dropped/defuncted. + * Checking if world has changed since. + */ + if (inp->inp_state == INPCB_STATE_DEAD) { + lck_rw_done(pcbinfo->ipi_lock); + return (ECONNABORTED); + } + + /* check if the socket got bound when the lock was released */ + if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + lck_rw_done(pcbinfo->ipi_lock); + return (EINVAL); + } + + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) { + inp->in6p_laddr = sin6.sin6_addr; + inp->in6p_last_outifp = outif; + } + if (lport == 0) { int e; if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p, 1)) != 0) { - lck_rw_done(pcbinfo->mtx); - return(e); + /* Undo any address bind from above. */ + inp->in6p_laddr = in6addr_any; + inp->in6p_last_outifp = NULL; + lck_rw_done(pcbinfo->ipi_lock); + return (e); } - } - else { + } else { inp->inp_lport = lport; if (in_pcbinshash(inp, 1) != 0) { inp->in6p_laddr = in6addr_any; inp->inp_lport = 0; - lck_rw_done(pcbinfo->mtx); + inp->in6p_last_outifp = NULL; + lck_rw_done(pcbinfo->ipi_lock); return (EAGAIN); } - } - lck_rw_done(pcbinfo->mtx); - return(0); + } + lck_rw_done(pcbinfo->ipi_lock); + sflt_notify(so, sock_evt_bound, NULL); + return (0); } /* - * Transform old in6_pcbconnect() into an inner subroutine for new - * in6_pcbconnect(): Do some validity-checking on the remote - * address (in mbuf 'nam') and then determine local host address - * (i.e., which interface) to use to access that remote host. + * Transform old in6_pcbconnect() into an inner subroutine for new + * in6_pcbconnect(); do some validity-checking on the remote address + * (in "nam") and then determine local host address (i.e., which + * interface) to use to access that remote host. + * + * This routine may alter the caller-supplied remote address "nam". * - * This preserves definition of in6_pcbconnect(), while supporting a - * slightly different version for T/TCP. (This is more than - * a bit of a kludge, but cleaning up the internal interfaces would - * have forced minor changes in every protocol). + * This routine might return an ifp with a reference held if the caller + * provides a non-NULL outif, even in the error case. The caller is + * responsible for releasing its reference. */ - int -in6_pcbladdr( - struct inpcb *inp, - struct sockaddr *nam, - struct in6_addr *plocal_addr6) +in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, + struct in6_addr *plocal_addr6, struct ifnet **outif) { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; struct in6_addr *addr6 = NULL; struct in6_addr src_storage; - - struct ifnet *ifp = NULL; int error = 0; + unsigned int ifscope; - if (nam->sa_len != sizeof (*sin6)) + if (outif != NULL) + *outif = NULL; + if (nam->sa_len != sizeof (struct sockaddr_in6)) return (EINVAL); - if (sin6->sin6_family != AF_INET6) + if (SIN6(nam)->sin6_family != AF_INET6) return (EAFNOSUPPORT); - if (sin6->sin6_port == 0) + if (SIN6(nam)->sin6_port == 0) return (EADDRNOTAVAIL); /* KAME hack: embed scopeid */ - if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) - return EINVAL; + if (in6_embedscope(&SIN6(nam)->sin6_addr, SIN6(nam), inp, NULL, NULL) != 0) + return (EINVAL); if (in6_ifaddrs) { /* * If the destination address is UNSPECIFIED addr, * use the loopback addr, e.g ::1. */ - if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) - sin6->sin6_addr = in6addr_loopback; + if (IN6_IS_ADDR_UNSPECIFIED(&SIN6(nam)->sin6_addr)) + SIN6(nam)->sin6_addr = in6addr_loopback; } - { + + ifscope = (inp->inp_flags & INP_BOUND_IF) ? + inp->inp_boundifp->if_index : IFSCOPE_NONE; + + /* + * XXX: in6_selectsrc might replace the bound local address + * with the address specified by setsockopt(IPV6_PKTINFO). + * Is it the intended behavior? + * + * in6_selectsrc() might return outif with its reference held + * even in the error case; caller always needs to release it + * if non-NULL. + */ + addr6 = in6_selectsrc(SIN6(nam), inp->in6p_outputopts, inp, + &inp->in6p_route, outif, &src_storage, ifscope, &error); + + if (outif != NULL) { + struct rtentry *rt = inp->in6p_route.ro_rt; /* - * XXX: in6_selectsrc might replace the bound local address - * with the address specified by setsockopt(IPV6_PKTINFO). - * Is it the intended behavior? + * If in6_selectsrc() returns a route, it should be one + * which points to the same ifp as outif. Just in case + * it isn't, use the one from the route for consistency. + * Otherwise if there is no route, leave outif alone as + * it could still be useful to the caller. */ - addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, - inp->in6p_moptions, - &inp->in6p_route, - &inp->in6p_laddr, &src_storage, &error); - if (addr6 == 0) { - if (error == 0) - error = EADDRNOTAVAIL; - return(error); + if (rt != NULL && rt->rt_ifp != *outif) { + ifnet_reference(rt->rt_ifp); /* for caller */ + if (*outif != NULL) + ifnet_release(*outif); + *outif = rt->rt_ifp; } - *plocal_addr6 = *addr6; - /* - * Don't do pcblookup call here; return interface in - * plocal_addr6 - * and exit to caller, that will do the lookup. - */ } - if (inp->in6p_route.ro_rt) - ifp = inp->in6p_route.ro_rt->rt_ifp; + if (addr6 == NULL) { + if (outif != NULL && (*outif) != NULL && + inp_restricted_send(inp, *outif)) { + soevent(inp->inp_socket, + (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); + error = EHOSTUNREACH; + } + if (error == 0) + error = EADDRNOTAVAIL; + return (error); + } - return(0); + *plocal_addr6 = *addr6; + /* + * Don't do pcblookup call here; return interface in + * plocal_addr6 and exit to caller, that will do the lookup. + */ + return (0); } /* @@ -403,432 +516,306 @@ in6_pcbladdr( * then pick one. */ int -in6_pcbconnect(inp, nam, p) - struct inpcb *inp; - struct sockaddr *nam; - struct proc *p; +in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p) { struct in6_addr addr6; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam; struct inpcb *pcb; - int error; + int error = 0; + struct ifnet *outif = NULL; + struct socket *so = inp->inp_socket; /* * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. + * + * in6_pcbladdr() might return an ifp with its reference held + * even in the error case, so make sure that it's released + * whenever it's non-NULL. */ - if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) - return(error); - socket_unlock(inp->inp_socket, 0); + if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0) { + if (outif != NULL && inp_restricted_send(inp, outif)) + soevent(so, + (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED)); + goto done; + } + socket_unlock(so, 0); pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, - sin6->sin6_port, - IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) - ? &addr6 : &inp->in6p_laddr, - inp->inp_lport, 0, NULL); - socket_lock(inp->inp_socket, 0); + sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? + &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL); + socket_lock(so, 0); if (pcb != NULL) { - in_pcb_checkstate(pcb, WNT_RELEASE, 0); - return (EADDRINUSE); + in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); + error = EADDRINUSE; + goto done; } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { - error = in6_pcbbind(inp, (struct sockaddr *)0, p); + error = in6_pcbbind(inp, NULL, p); if (error) - return (error); + goto done; } inp->in6p_laddr = addr6; + inp->in6p_last_outifp = outif; /* no reference needed */ + inp->in6p_flags |= INP_IN6ADDR_ANY; } - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); - socket_lock(inp->inp_socket, 0); + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* lock inversion issue, mostly with udp multicast packets */ + socket_unlock(so, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); + socket_lock(so, 0); } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; - /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ - inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; - if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) - inp->in6p_flowinfo |= - (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); - + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_invalidate_cache(inp); in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); - return (0); -} + lck_rw_done(inp->inp_pcbinfo->ipi_lock); -#if 0 -/* - * Return an IPv6 address, which is the most appropriate for given - * destination and user specified options. - * If necessary, this function lookups the routing table and return - * an entry to the caller for later use. - */ -struct in6_addr * -in6_selectsrc( - struct sockaddr_in6 *dstsock, - struct ip6_pktopts *opts, - struct ip6_moptions *mopts, - struct route_in6 *ro, - struct in6_addr *laddr, - struct in6_addr *src_storage, - int *errorp) -{ - struct in6_addr *dst; - struct in6_ifaddr *ia6 = 0; - struct in6_pktinfo *pi = NULL; +done: + if (outif != NULL) + ifnet_release(outif); - dst = &dstsock->sin6_addr; - *errorp = 0; - - /* - * If the source address is explicitly specified by the caller, - * use it. - */ - if (opts && (pi = opts->ip6po_pktinfo) && - !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) - return(&pi->ipi6_addr); - - /* - * If the source address is not specified but the socket(if any) - * is already bound, use the bound address. - */ - if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) - return(laddr); - - /* - * If the caller doesn't specify the source address but - * the outgoing interface, use an address associated with - * the interface. - */ - if (pi && pi->ipi6_ifindex) { - /* XXX boundary check is assumed to be already done. */ - ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex], - dst); - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifafree(&ia6->ia_ifa); - return(src_storage); - } - - /* - * If the destination address is a link-local unicast address or - * a multicast address, and if the outgoing interface is specified - * by the sin6_scope_id filed, use an address associated with the - * interface. - * XXX: We're now trying to define more specific semantics of - * sin6_scope_id field, so this part will be rewritten in - * the near future. - */ - if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) && - dstsock->sin6_scope_id) { - /* - * I'm not sure if boundary check for scope_id is done - * somewhere... - */ - if (dstsock->sin6_scope_id < 0 || - if_index < dstsock->sin6_scope_id) { - *errorp = ENXIO; /* XXX: better error? */ - return(0); - } - ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id], - dst); - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifafree(&ia6->ia_ifa); - return(src_storage); - } - - /* - * If the destination address is a multicast address and - * the outgoing interface for the address is specified - * by the caller, use an address associated with the interface. - * There is a sanity check here; if the destination has node-local - * scope, the outgoing interfacde should be a loopback address. - * Even if the outgoing interface is not specified, we also - * choose a loopback interface as the outgoing interface. - */ - if (IN6_IS_ADDR_MULTICAST(dst)) { - struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL; - - if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) { - ifp = &loif[0]; - } - - if (ifp) { - ia6 = in6_ifawithscope(ifp, dst); - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = ia6->ia_addr.sin6_addr; - ifafree(&ia6->ia_ifa); - return(src_storage); - } - } - - /* - * If the next hop address for the packet is specified - * by caller, use an address associated with the route - * to the next hop. - */ - { - struct sockaddr_in6 *sin6_next; - struct rtentry *rt; - - if (opts && opts->ip6po_nexthop) { - sin6_next = satosin6(opts->ip6po_nexthop); - rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL, 0); - if (rt) { - ia6 = in6_ifawithscope(rt->rt_ifp, dst); - if (ia6 == 0) { - ifaref(&rt->rt_ifa); - ia6 = ifatoia6(rt->rt_ifa); - } - } - if (ia6 == 0) { - *errorp = EADDRNOTAVAIL; - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifaref(&rt->rt_ifa); - return(src_storage); - } - } - - /* - * If route is known or can be allocated now, - * our src addr is taken from the i/f, else punt. - */ - if (ro) { - if (ro->ro_rt && - !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) { - rtfree(ro->ro_rt); - ro->ro_rt = (struct rtentry *)0; - } - if (ro->ro_rt == (struct rtentry *)0 || - ro->ro_rt->rt_ifp == (struct ifnet *)0) { - struct sockaddr_in6 *dst6; - - /* No route yet, so try to acquire one */ - bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); - dst6 = (struct sockaddr_in6 *)&ro->ro_dst; - dst6->sin6_family = AF_INET6; - dst6->sin6_len = sizeof(struct sockaddr_in6); - dst6->sin6_addr = *dst; - if (IN6_IS_ADDR_MULTICAST(dst)) { - ro->ro_rt = rtalloc1(&((struct route *)ro) - ->ro_dst, 0, 0UL); - } else { - rtalloc((struct route *)ro); - } - } - - /* - * in_pcbconnect() checks out IFF_LOOPBACK to skip using - * the address. But we don't know why it does so. - * It is necessary to ensure the scope even for lo0 - * so doesn't check out IFF_LOOPBACK. - */ - - if (ro->ro_rt) { - ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst); - if (ia6 == 0) { /* xxx scope error ?*/ - ifaref(ro->ro_rt->rt_ifa); - ia6 = ifatoia6(ro->ro_rt->rt_ifa); - } - } - if (ia6 == 0) { - *errorp = EHOSTUNREACH; /* no route */ - return(0); - } - *src_storage = satosin6(&ia6->ia_addr)->sin6_addr; - ifaref(&rt->rt_ifa); - return(src_storage); - } - - *errorp = EADDRNOTAVAIL; - return(0); -} - -/* - * Default hop limit selection. The precedence is as follows: - * 1. Hoplimit valued specified via ioctl. - * 2. (If the outgoing interface is detected) the current - * hop limit of the interface specified by router advertisement. - * 3. The system default hoplimit. -*/ -int -in6_selecthlim( - struct in6pcb *in6p, - struct ifnet *ifp) -{ - if (in6p && in6p->in6p_hops >= 0) - return(in6p->in6p_hops); - else if (ifp) - return(nd_ifinfo[ifp->if_index].chlim); - else - return(ip6_defhlim); + return (error); } -#endif void -in6_pcbdisconnect(inp) - struct inpcb *inp; +in6_pcbdisconnect(struct inpcb *inp) { - if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) { - /*lock inversion issue, mostly with udp multicast packets */ - socket_unlock(inp->inp_socket, 0); - lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx); - socket_lock(inp->inp_socket, 0); + struct socket *so = inp->inp_socket; + + if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) { + /* lock inversion issue, mostly with udp multicast packets */ + socket_unlock(so, 0); + lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock); + socket_lock(so, 0); } - bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); + if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP) + nstat_pcb_cache(inp); + bzero((caddr_t)&inp->in6p_faddr, sizeof (inp->in6p_faddr)); inp->inp_fport = 0; - /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ - inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; + /* clear flowinfo - RFC 6437 */ + inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; in_pcbrehash(inp); - lck_rw_done(inp->inp_pcbinfo->mtx); - if (inp->inp_socket->so_state & SS_NOFDREF) + lck_rw_done(inp->inp_pcbinfo->ipi_lock); + /* + * A multipath subflow socket would have its SS_NOFDREF set by default, + * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB; + * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared. + */ + if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF)) in6_pcbdetach(inp); } void -in6_pcbdetach(inp) - struct inpcb *inp; +in6_pcbdetach(struct inpcb *inp) { struct socket *so = inp->inp_socket; - struct inpcbinfo *ipi = inp->inp_pcbinfo; + if (so->so_pcb == NULL) { + /* PCB has been disposed */ + panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__, + inp, so, SOCK_PROTO(so)); + /* NOTREACHED */ + } + #if IPSEC if (inp->in6p_sp != NULL) { - lck_mtx_lock(sadb_mutex); - ipsec6_delete_pcbpolicy(inp); - lck_mtx_unlock(sadb_mutex); + (void) ipsec6_delete_pcbpolicy(inp); } #endif /* IPSEC */ - if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) - printf("in6_pcbdetach so=%x can't be marked dead ok\n", so); + /* + * Let NetworkStatistics know this PCB is going away + * before we detach it. + */ + if (nstat_collect && + (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP)) + nstat_pcb_detach(inp); + /* mark socket state as dead */ + if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) { + panic("%s: so=%p proto=%d couldn't set to STOPUSING\n", + __func__, so, SOCK_PROTO(so)); + /* NOTREACHED */ + } - inp->inp_state = INPCB_STATE_DEAD; + if (!(so->so_flags & SOF_PCBCLEARING)) { + struct ip_moptions *imo; + struct ip6_moptions *im6o; - if ((so->so_flags & SOF_PCBCLEARING) == 0) { inp->inp_vflag = 0; - so->so_flags |= SOF_PCBCLEARING; - inp->inp_gencnt = ++ipi->ipi_gencnt; - if (inp->in6p_options) + if (inp->in6p_options != NULL) { m_freem(inp->in6p_options); - ip6_freepcbopts(inp->in6p_outputopts); - ip6_freemoptions(inp->in6p_moptions); - if (inp->in6p_route.ro_rt) - rtfree(inp->in6p_route.ro_rt); - /* Check and free IPv4 related resources in case of mapped addr */ - if (inp->inp_options) - (void)m_free(inp->inp_options); - ip_freemoptions(inp->inp_moptions); + inp->in6p_options = NULL; + } + ip6_freepcbopts(inp->in6p_outputopts); + ROUTE_RELEASE(&inp->in6p_route); + /* free IPv4 related resources in case of mapped addr */ + if (inp->inp_options != NULL) { + (void) m_free(inp->inp_options); + inp->inp_options = NULL; + } + im6o = inp->in6p_moptions; + inp->in6p_moptions = NULL; + + imo = inp->inp_moptions; inp->inp_moptions = NULL; - + + sofreelastref(so, 0); + inp->inp_state = INPCB_STATE_DEAD; + /* makes sure we're not called twice from so_close */ + so->so_flags |= SOF_PCBCLEARING; + + inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST); + + /* + * See inp_join_group() for why we need to unlock + */ + if (im6o != NULL || imo != NULL) { + socket_unlock(so, 0); + if (im6o != NULL) + IM6O_REMREF(im6o); + if (imo != NULL) + IMO_REMREF(imo); + socket_lock(so, 0); + } } } struct sockaddr * -in6_sockaddr(port, addr_p) - in_port_t port; - struct in6_addr *addr_p; +in6_sockaddr(in_port_t port, struct in6_addr *addr_p) { struct sockaddr_in6 *sin6; - MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK); - bzero(sin6, sizeof *sin6); + MALLOC(sin6, struct sockaddr_in6 *, sizeof (*sin6), M_SONAME, M_WAITOK); + if (sin6 == NULL) + return (NULL); + bzero(sin6, sizeof (*sin6)); sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(*sin6); + sin6->sin6_len = sizeof (*sin6); sin6->sin6_port = port; sin6->sin6_addr = *addr_p; + + /* would be good to use sa6_recoverscope(), except for locking */ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); else - sin6->sin6_scope_id = 0; /*XXX*/ + sin6->sin6_scope_id = 0; /* XXX */ if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) sin6->sin6_addr.s6_addr16[1] = 0; - return (struct sockaddr *)sin6; + return ((struct sockaddr *)sin6); } -struct sockaddr * -in6_v4mapsin6_sockaddr(port, addr_p) - in_port_t port; - struct in_addr *addr_p; +void +in6_sockaddr_s(in_port_t port, struct in6_addr *addr_p, + struct sockaddr_in6 *sin6) { - struct sockaddr_in sin; - struct sockaddr_in6 *sin6_p; - - bzero(&sin, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(sin); - sin.sin_port = port; - sin.sin_addr = *addr_p; - - MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, - M_WAITOK); - in6_sin_2_v4mapsin6(&sin, sin6_p); + bzero(sin6, sizeof (*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof (*sin6); + sin6->sin6_port = port; + sin6->sin6_addr = *addr_p; - return (struct sockaddr *)sin6_p; + /* would be good to use sa6_recoverscope(), except for locking */ + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); + else + sin6->sin6_scope_id = 0; /* XXX */ + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + sin6->sin6_addr.s6_addr16[1] = 0; } /* - * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was + * The calling convention of in6_getsockaddr() and in6_getpeeraddr() was * modified to match the pru_sockaddr() and pru_peeraddr() entry points * in struct pr_usrreqs, so that protocols can just reference then directly - * without the need for a wrapper function. The socket must have a valid - * (i.e., non-nil) PCB, but it should be impossible to get an invalid one - * except through a kernel programming error, so it is acceptable to panic - * (or in this case trap) if the PCB is invalid. (Actually, we don't trap - * because there actually /is/ a programming error somewhere... XXX) + * without the need for a wrapper function. */ int -in6_setsockaddr(so, nam) - struct socket *so; - struct sockaddr **nam; +in6_getsockaddr(struct socket *so, struct sockaddr **nam) { struct inpcb *inp; struct in6_addr addr; in_port_t port; - inp = sotoinpcb(so); - if (!inp) { - return EINVAL; - } + if ((inp = sotoinpcb(so)) == NULL) + return (EINVAL); + port = inp->inp_lport; addr = inp->in6p_laddr; *nam = in6_sockaddr(port, &addr); - return 0; + if (*nam == NULL) + return (ENOBUFS); + return (0); } int -in6_setpeeraddr(so, nam) - struct socket *so; - struct sockaddr **nam; +in6_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss) { struct inpcb *inp; struct in6_addr addr; in_port_t port; - inp = sotoinpcb(so); - if (!inp) { - return EINVAL; - } + VERIFY(ss != NULL); + bzero(ss, sizeof (*ss)); + + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) + return (inp == NULL ? EINVAL : EPROTOTYPE); + + port = inp->inp_lport; + addr = inp->in6p_laddr; + + in6_sockaddr_s(port, &addr, SIN6(ss)); + return (0); +} + +int +in6_getpeeraddr(struct socket *so, struct sockaddr **nam) +{ + struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + if ((inp = sotoinpcb(so)) == NULL) + return (EINVAL); + port = inp->inp_fport; addr = inp->in6p_faddr; *nam = in6_sockaddr(port, &addr); - return 0; + if (*nam == NULL) + return (ENOBUFS); + return (0); +} + +int +in6_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss) +{ + struct inpcb *inp; + struct in6_addr addr; + in_port_t port; + + VERIFY(ss != NULL); + bzero(ss, sizeof (*ss)); + + if ((inp = sotoinpcb(so)) == NULL +#if NECP + || (necp_socket_should_use_flow_divert(inp)) +#endif /* NECP */ + ) + return (inp == NULL ? EINVAL : EPROTOTYPE); + + port = inp->inp_fport; + addr = inp->in6p_faddr; + + in6_sockaddr_s(port, &addr, SIN6(ss)); + return (0); } int @@ -838,16 +825,16 @@ in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) int error; if (inp == NULL) - return EINVAL; + return (EINVAL); if (inp->inp_vflag & INP_IPV4) { - error = in_setsockaddr(so, nam); + error = in_getsockaddr(so, nam); if (error == 0) - in6_sin_2_v4mapsin6_in_sock(nam); - } else - /* scope issues will be handled in in6_setsockaddr(). */ - error = in6_setsockaddr(so, nam); - - return error; + error = in6_sin_2_v4mapsin6_in_sock(nam); + } else { + /* scope issues will be handled in in6_getsockaddr(). */ + error = in6_getsockaddr(so, nam); + } + return (error); } int @@ -857,16 +844,16 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) int error; if (inp == NULL) - return EINVAL; + return (EINVAL); if (inp->inp_vflag & INP_IPV4) { - error = in_setpeeraddr(so, nam); + error = in_getpeeraddr(so, nam); if (error == 0) - in6_sin_2_v4mapsin6_in_sock(nam); - } else - /* scope issues will be handled in in6_setpeeraddr(). */ - error = in6_setpeeraddr(so, nam); - - return error; + error = in6_sin_2_v4mapsin6_in_sock(nam); + } else { + /* scope issues will be handled in in6_getpeeraddr(). */ + error = in6_getpeeraddr(so, nam); + } + return (error); } /* @@ -877,37 +864,31 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) * cmds that are uninteresting (e.g., no error in the map). * Call the protocol specific routine (if any) to report * any errors for each matching socket. - * - * Must be called at splnet. */ void -in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify) - struct inpcbinfo *pcbinfo; - struct sockaddr *dst; - const struct sockaddr *src; - u_int fport_arg, lport_arg; - int cmd; -// struct inpcb *(*notify)(struct inpcb *, int); - void (*notify)(struct inpcb *, int); +in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg, + const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg, + void (*notify)(struct inpcb *, int)) { + struct inpcbhead *head = pcbinfo->ipi_listhead; struct inpcb *inp, *ninp; struct sockaddr_in6 sa6_src, *sa6_dst; u_short fport = fport_arg, lport = lport_arg; u_int32_t flowinfo; int errno; - struct inpcbhead *head = pcbinfo->listhead; - if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6) + if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) return; - sa6_dst = (struct sockaddr_in6 *)dst; + sa6_dst = (struct sockaddr_in6 *)(void *)dst; if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) return; /* * note that src can be NULL when we get notify by local fragmentation. */ - sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; + sa6_src = (src == NULL) ? + sa6_any : *(struct sockaddr_in6 *)(uintptr_t)(size_t)src; flowinfo = sa6_src.sin6_flowinfo; /* @@ -921,19 +902,36 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify) if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { fport = 0; lport = 0; - bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); + bzero((caddr_t)&sa6_src.sin6_addr, sizeof (sa6_src.sin6_addr)); if (cmd != PRC_HOSTDEAD) notify = in6_rtchange; } errno = inet6ctlerrmap[cmd]; - lck_rw_lock_shared(pcbinfo->mtx); - for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { - ninp = LIST_NEXT(inp, inp_list); + lck_rw_lock_shared(pcbinfo->ipi_lock); + for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { + ninp = LIST_NEXT(inp, inp_list); - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; + /* + * If the error designates a new path MTU for a destination + * and the application (associated with this socket) wanted to + * know the value, notify. Note that we notify for all + * disconnected sockets if the corresponding application + * wanted. This is because some UDP applications keep sending + * sockets disconnected. + * XXX: should we avoid to notify the value to TCP sockets? + */ + if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && + (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, + &sa6_dst->sin6_addr))) { + ip6_notify_pmtu(inp, (struct sockaddr_in6 *)(void *)dst, + (u_int32_t *)cmdarg); + } + /* * Detect if we should notify the error. If no source and * destination ports are specifed, but non-zero flowinfo and @@ -944,46 +942,44 @@ in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, notify) */ if (lport == 0 && fport == 0 && flowinfo && inp->inp_socket != NULL && - flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && + flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) goto do_notify; else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, - &sa6_dst->sin6_addr) || - inp->inp_socket == 0 || - (lport && inp->inp_lport != lport) || - (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, - &sa6_src.sin6_addr)) || - (fport && inp->inp_fport != fport)) + &sa6_dst->sin6_addr) || inp->inp_socket == NULL || + (lport && inp->inp_lport != lport) || + (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && + !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport)) continue; - - do_notify: +do_notify: if (notify) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == + WNT_STOPUSING) continue; socket_lock(inp->inp_socket, 1); (*notify)(inp, errno); - (void)in_pcb_checkstate(inp, WNT_RELEASE, 1); + (void) in_pcb_checkstate(inp, WNT_RELEASE, 1); socket_unlock(inp->inp_socket, 1); } } - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); } /* * Lookup a PCB based on the local address and port. */ struct inpcb * -in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) - struct inpcbinfo *pcbinfo; - struct in6_addr *laddr; - u_int lport_arg; - int wild_okay; +in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, + u_int lport_arg, int wild_okay) { struct inpcb *inp; int matchwild = 3, wildcard; u_short lport = lport_arg; + struct inpcbporthead *porthash; + struct inpcb *match = NULL; + struct inpcbport *phd; if (!wild_okay) { struct inpcbhead *head; @@ -991,10 +987,10 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) * Look for an unconnected (wildcard foreign addr) PCB that * matches the local address and port we're looking for. */ - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && @@ -1009,97 +1005,51 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) * Not found. */ return (NULL); - } else { - struct inpcbporthead *porthash; - struct inpcbport *phd; - struct inpcb *match = NULL; + } + /* + * Best fit PCB lookup. + * + * First see if this local port is in use by looking on the + * port hash list. + */ + porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, + pcbinfo->ipi_porthashmask)]; + LIST_FOREACH(phd, porthash, phd_hash) { + if (phd->phd_port == lport) + break; + } + if (phd != NULL) { /* - * Best fit PCB lookup. - * - * First see if this local port is in use by looking on the - * port hash list. + * Port is in use by one or more PCBs. Look for best + * fit. */ - porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, - pcbinfo->porthashmask)]; - LIST_FOREACH(phd, porthash, phd_hash) { - if (phd->phd_port == lport) - break; - } - if (phd != NULL) { - /* - * Port is in use by one or more PCBs. Look for best - * fit. - */ - LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { - wildcard = 0; - if ((inp->inp_vflag & INP_IPV6) == 0) + LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { + wildcard = 0; + if (!(inp->inp_vflag & INP_IPV6)) + continue; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + wildcard++; + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (IN6_IS_ADDR_UNSPECIFIED(laddr)) + wildcard++; + else if (!IN6_ARE_ADDR_EQUAL( + &inp->in6p_laddr, laddr)) continue; - if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + } else { + if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) wildcard++; - if (!IN6_IS_ADDR_UNSPECIFIED( - &inp->in6p_laddr)) { - if (IN6_IS_ADDR_UNSPECIFIED(laddr)) - wildcard++; - else if (!IN6_ARE_ADDR_EQUAL( - &inp->in6p_laddr, laddr)) - continue; - } else { - if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) - wildcard++; - } - if (wildcard < matchwild) { - match = inp; - matchwild = wildcard; - if (matchwild == 0) { - break; - } - } } - } - return (match); - } -} -#ifndef APPLE -/* this is not used in Darwin */ -void -in6_pcbpurgeif0( - struct in6pcb *head, - struct ifnet *ifp) -{ - struct in6pcb *in6p; - struct ip6_moptions *im6o; - struct in6_multi_mship *imm, *nimm; - - for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) { - im6o = in6p->in6p_moptions; - if ((in6p->inp_vflag & INP_IPV6) && - im6o) { - /* - * Unselect the outgoing interface if it is being - * detached. - */ - if (im6o->im6o_multicast_ifp == ifp) - im6o->im6o_multicast_ifp = NULL; - - /* - * Drop multicast group membership if we joined - * through the interface being detached. - * XXX controversial - is it really legal for kernel - * to force this? - */ - for (imm = im6o->im6o_memberships.lh_first; - imm != NULL; imm = nimm) { - nimm = imm->i6mm_chain.le_next; - if (imm->i6mm_maddr->in6m_ifp == ifp) { - LIST_REMOVE(imm, i6mm_chain); - in6_delmulti(imm->i6mm_maddr); - FREE(imm, M_IPMADDR); + if (wildcard < matchwild) { + match = inp; + matchwild = wildcard; + if (matchwild == 0) { + break; } } } } + return (match); } -#endif /* * Check for alternatives when higher level complains @@ -1108,33 +1058,31 @@ in6_pcbpurgeif0( * (by a redirect), time to try a default gateway again. */ void -in6_losing(in6p) - struct inpcb *in6p; +in6_losing(struct inpcb *in6p) { struct rtentry *rt; - struct rt_addrinfo info; if ((rt = in6p->in6p_route.ro_rt) != NULL) { - in6p->in6p_route.ro_rt = 0; - bzero((caddr_t)&info, sizeof(info)); - info.rti_info[RTAX_DST] = - (struct sockaddr *)&in6p->in6p_route.ro_dst; - info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; - info.rti_info[RTAX_NETMASK] = rt_mask(rt); - lck_mtx_lock(rt_mtx); - rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); - if (rt->rt_flags & RTF_DYNAMIC) - (void)rtrequest_locked(RTM_DELETE, rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, - (struct rtentry **)0); - else + RT_LOCK(rt); + if (rt->rt_flags & RTF_DYNAMIC) { + /* + * Prevent another thread from modifying rt_key, + * rt_gateway via rt_setgate() after the rt_lock + * is dropped by marking the route as defunct. + */ + rt->rt_flags |= RTF_CONDEMNED; + RT_UNLOCK(rt); + (void) rtrequest(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL); + } else { + RT_UNLOCK(rt); + } /* * A new route can be allocated * the next time output is attempted. */ - rtfree_locked(rt); - lck_mtx_unlock(rt_mtx); } + ROUTE_RELEASE(&in6p->in6p_route); } /* @@ -1142,105 +1090,194 @@ in6_losing(in6p) * and allocate a (hopefully) better one. */ void -in6_rtchange( - struct inpcb *inp, - int errno) +in6_rtchange(struct inpcb *inp, int errno) { - if (inp->in6p_route.ro_rt) { - rtfree(inp->in6p_route.ro_rt); - inp->in6p_route.ro_rt = 0; - /* - * A new route can be allocated the next time - * output is attempted. - */ +#pragma unused(errno) + /* + * A new route can be allocated the next time + * output is attempted. + */ + ROUTE_RELEASE(&inp->in6p_route); +} + +/* + * Check if PCB exists hash list. Also returns uid and gid of socket + */ +int +in6_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard, + uid_t *uid, gid_t *gid, struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp; + u_short fport = fport_arg, lport = lport_arg; + int found; + + *uid = UID_MAX; + *gid = GID_MAX; + + lck_rw_lock_shared(pcbinfo->ipi_lock); + + /* + * First look for an exact match. + */ + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, + lport, fport, pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { + if (!(inp->inp_vflag & INP_IPV6)) + continue; + + if (inp_restricted_recv(inp, ifp)) + continue; + + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_fport == fport && + inp->inp_lport == lport) { + if ((found = (inp->inp_socket != NULL))) { + /* + * Found. Check if pcb is still valid + */ + *uid = kauth_cred_getuid( + inp->inp_socket->so_cred); + *gid = kauth_cred_getgid( + inp->inp_socket->so_cred); + } + lck_rw_done(pcbinfo->ipi_lock); + return (found); + } + } + if (wildcard) { + struct inpcb *local_wild = NULL; + + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; + LIST_FOREACH(inp, head, inp_hash) { + if (!(inp->inp_vflag & INP_IPV6)) + continue; + + if (inp_restricted_recv(inp, ifp)) + continue; + + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && + inp->inp_lport == lport) { + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, + laddr)) { + found = (inp->inp_socket != NULL); + if (found) { + *uid = kauth_cred_getuid( + inp->inp_socket->so_cred); + *gid = kauth_cred_getgid( + inp->inp_socket->so_cred); + } + lck_rw_done(pcbinfo->ipi_lock); + return (found); + } else if (IN6_IS_ADDR_UNSPECIFIED( + &inp->in6p_laddr)) { + local_wild = inp; + } + } + } + if (local_wild) { + if ((found = (local_wild->inp_socket != NULL))) { + *uid = kauth_cred_getuid( + local_wild->inp_socket->so_cred); + *gid = kauth_cred_getgid( + local_wild->inp_socket->so_cred); + } + lck_rw_done(pcbinfo->ipi_lock); + return (found); + } } + + /* + * Not found. + */ + lck_rw_done(pcbinfo->ipi_lock); + return (0); } /* * Lookup PCB in hash list. */ struct inpcb * -in6_pcblookup_hash( - struct inpcbinfo *pcbinfo, - struct in6_addr *faddr, - u_int fport_arg, - struct in6_addr *laddr, - u_int lport_arg, - int wildcard, - struct ifnet *ifp) +in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, + u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard, + struct ifnet *ifp) { struct inpcbhead *head; struct inpcb *inp; u_short fport = fport_arg, lport = lport_arg; - int faith; -#if defined(NFAITH) && NFAITH > 0 - faith = faithprefix(laddr); -#else - faith = 0; -#endif - - lck_rw_lock_shared(pcbinfo->mtx); + lck_rw_lock_shared(pcbinfo->ipi_lock); /* * First look for an exact match. */ - head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, - lport, fport, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, + lport, fport, pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) + continue; + + if (inp_restricted_recv(inp, ifp)) continue; + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && inp->inp_lport == lport) { /* - * Found. Check if pcb is still valid - */ - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + * Found. Check if pcb is still valid + */ + if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != + WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (inp); - } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); - return(NULL); + } else { + /* it's there but dead, say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); + return (NULL); } } } if (wildcard) { struct inpcb *local_wild = NULL; - head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, - pcbinfo->hashmask)]; + head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, + pcbinfo->ipi_hashmask)]; LIST_FOREACH(inp, head, inp_hash) { - if ((inp->inp_vflag & INP_IPV6) == 0) + if (!(inp->inp_vflag & INP_IPV6)) continue; + + if (inp_restricted_recv(inp, ifp)) + continue; + if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && inp->inp_lport == lport) { - if (faith && (inp->inp_flags & INP_FAITH) == 0) - continue; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, - laddr)) { - if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + laddr)) { + if (in_pcb_checkstate(inp, WNT_ACQUIRE, + 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (inp); + } else { + /* dead; say it isn't found */ + lck_rw_done(pcbinfo->ipi_lock); + return (NULL); } - else { /* it's there but dead, say it isn't found */ - lck_rw_done(pcbinfo->mtx); - return(NULL); - } - } - else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) + } else if (IN6_IS_ADDR_UNSPECIFIED( + &inp->in6p_laddr)) { local_wild = inp; + } } } - if (local_wild && in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) { - lck_rw_done(pcbinfo->mtx); + if (local_wild && in_pcb_checkstate(local_wild, + WNT_ACQUIRE, 0) != WNT_STOPUSING) { + lck_rw_done(pcbinfo->ipi_lock); return (local_wild); - } - else { - lck_rw_done(pcbinfo->mtx); + } else { + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } } @@ -1248,7 +1285,7 @@ in6_pcblookup_hash( /* * Not found. */ - lck_rw_done(pcbinfo->mtx); + lck_rw_done(pcbinfo->ipi_lock); return (NULL); } @@ -1258,15 +1295,70 @@ init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) struct ip6_hdr *ip; ip = mtod(m, struct ip6_hdr *); - bzero(sin6, sizeof(*sin6)); - sin6->sin6_len = sizeof(*sin6); + bzero(sin6, sizeof (*sin6)); + sin6->sin6_len = sizeof (*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_addr = ip->ip6_src; - if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) + if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) { sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - ? m->m_pkthdr.rcvif->if_index : 0; + if ((m->m_pkthdr.pkt_flags & (PKTF_LOOP|PKTF_IFAINFO)) == + (PKTF_LOOP|PKTF_IFAINFO)) + sin6->sin6_scope_id = m->m_pkthdr.src_ifindex; + else if (m->m_pkthdr.rcvif != NULL) + sin6->sin6_scope_id = m->m_pkthdr.rcvif->if_index; + } +} + +/* + * The following routines implement this scheme: + * + * Callers of ip6_output() that intend to cache the route in the inpcb pass + * a local copy of the struct route to ip6_output(). Using a local copy of + * the cached route significantly simplifies things as IP no longer has to + * worry about having exclusive access to the passed in struct route, since + * it's defined in the caller's stack; in essence, this allows for a lock- + * less operation when updating the struct route at the IP level and below, + * whenever necessary. The scheme works as follows: + * + * Prior to dropping the socket's lock and calling ip6_output(), the caller + * copies the struct route from the inpcb into its stack, and adds a reference + * to the cached route entry, if there was any. The socket's lock is then + * dropped and ip6_output() is called with a pointer to the copy of struct + * route defined on the stack (not to the one in the inpcb.) + * + * Upon returning from ip6_output(), the caller then acquires the socket's + * lock and synchronizes the cache; if there is no route cached in the inpcb, + * it copies the local copy of struct route (which may or may not contain any + * route) back into the cache; otherwise, if the inpcb has a route cached in + * it, the one in the local copy will be freed, if there's any. Trashing the + * cached route in the inpcb can be avoided because ip6_output() is single- + * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized + * by the socket/transport layer.) + */ +void +in6p_route_copyout(struct inpcb *inp, struct route_in6 *dst) +{ + struct route_in6 *src = &inp->in6p_route; + + lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + + /* Minor sanity check */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6) + panic("%s: wrong or corrupted route: %p", __func__, src); + + route_copyout((struct route *)dst, (struct route *)src, sizeof (*dst)); +} + +void +in6p_route_copyin(struct inpcb *inp, struct route_in6 *src) +{ + struct route_in6 *dst = &inp->in6p_route; + + lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); + + /* Minor sanity check */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6) + panic("%s: wrong or corrupted route: %p", __func__, src); - return; + route_copyin((struct route *)src, (struct route *)dst, sizeof (*src)); }