]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet6/in6_pcb.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / bsd / netinet6 / in6_pcb.c
index 709196f222c5eb362455f110b0a01baff9ecbb8b..54b9555f60a775950e423c9e893fd646d2810737 100644 (file)
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2003-2013 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/proc.h>
+#include <sys/kauth.h>
+#include <sys/priv.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/route.h>
+#include <net/ntstat.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 #include <net/if_types.h>
+#include <net/if_var.h>
 
-#include "faith.h"
-#if defined(NFAITH) && NFAITH > 0
-#include <net/if_faith.h>
-#endif
+#include <kern/kern_types.h>
+#include <kern/zalloc.h>
 
 #if IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif /* IPSEC */
 
-struct in6_addr zeroin6_addr;
+#if NECP
+#include <net/necp.h>
+#endif /* NECP */
+
+/*
+ * in6_pcblookup_local_and_cleanup does everything
+ * in6_pcblookup_local does but it checks for a socket
+ * that's going away. Since we know that the lock is
+ * held read+write when this function is called, we
+ * can safely dispose of this socket like the slow
+ * timer would usually do and return NULL. This is
+ * great for bind.
+ */
+static struct inpcb *
+in6_pcblookup_local_and_cleanup(struct inpcbinfo *pcbinfo,
+    struct in6_addr *laddr, u_int lport_arg, int wild_okay)
+{
+       struct inpcb *inp;
+
+       /* Perform normal lookup */
+       inp = in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
 
+       /* Check if we found a match but it's waiting to be disposed */
+       if (inp != NULL && inp->inp_wantcnt == WNT_STOPUSING) {
+               struct socket *so = inp->inp_socket;
+
+               lck_mtx_lock(&inp->inpcb_mtx);
+
+               if (so->so_usecount == 0) {
+                       if (inp->inp_state != INPCB_STATE_DEAD)
+                               in6_pcbdetach(inp);
+                       in_pcbdispose(inp);     /* will unlock & destroy */
+                       inp = NULL;
+               } else {
+                       lck_mtx_unlock(&inp->inpcb_mtx);
+               }
+       }
+
+       return (inp);
+}
+
+/*
+ * Bind an INPCB to an address and/or port.  This routine should not alter
+ * the caller-supplied local address "nam".
+ */
 int
-in6_pcbbind(inp, nam, p)
-       register struct inpcb *inp;
-       struct sockaddr *nam;
-       struct proc *p;
+in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 {
        struct socket *so = inp->inp_socket;
-       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
        struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
        u_short lport = 0;
        int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
+       struct ifnet *outif = NULL;
+       struct sockaddr_in6 sin6;
+       int error;
+       kauth_cred_t cred;
 
-       if (!in6_ifaddr) /* XXX broken! */
+       if (!in6_ifaddrs) /* XXX broken! */
                return (EADDRNOTAVAIL);
        if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
-               return(EINVAL);
-       if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
+               return (EINVAL);
+       if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT)))
                wild = 1;
-       if (nam) {
-               sin6 = (struct sockaddr_in6 *)nam;
-               if (nam->sa_len != sizeof(*sin6))
-                       return(EINVAL);
+
+       socket_unlock(so, 0); /* keep reference */
+       lck_rw_lock_exclusive(pcbinfo->ipi_lock);
+
+       bzero(&sin6, sizeof (sin6));
+       if (nam != NULL) {
+               if (nam->sa_len != sizeof (struct sockaddr_in6)) {
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       socket_lock(so, 0);
+                       return (EINVAL);
+               }
                /*
                 * family check.
                 */
-               if (nam->sa_family != AF_INET6)
-                       return(EAFNOSUPPORT);
+               if (nam->sa_family != AF_INET6) {
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       socket_lock(so, 0);
+                       return (EAFNOSUPPORT);
+               }
+               lport = SIN6(nam)->sin6_port;
+
+               *(&sin6) = *SIN6(nam);
 
                /* KAME hack: embed scopeid */
-               if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0)
-                       return EINVAL;
-               /* this must be cleared for ifa_ifwithaddr() */
-               sin6->sin6_scope_id = 0;
+               if (in6_embedscope(&sin6.sin6_addr, &sin6, inp, NULL,
+                   NULL) != 0) {
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       socket_lock(so, 0);
+                       return (EINVAL);
+               }
 
-               lport = sin6->sin6_port;
-               if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
+               /* Sanitize local copy for address searches */
+                sin6.sin6_flowinfo = 0;
+                sin6.sin6_scope_id = 0;
+               sin6.sin6_port = 0;
+
+               if (IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) {
                        /*
                         * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
                         * allow compepte duplication of binding if
@@ -155,173 +246,266 @@ in6_pcbbind(inp, nam, p)
                         */
                        if (so->so_options & SO_REUSEADDR)
                                reuseport = SO_REUSEADDR|SO_REUSEPORT;
-               } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-                       struct ifaddr *ia = NULL;
-
-                       sin6->sin6_port = 0;            /* yech... */
-                       if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0)
-                               return(EADDRNOTAVAIL);
-
-                       /*
-                        * XXX: bind to an anycast address might accidentally
-                        * cause sending a packet with anycast source address.
-                        * We should allow to bind to a deprecated address, since
-                        * the application dare to use it.
-                        */
-                       if (ia &&
-                           ((struct in6_ifaddr *)ia)->ia6_flags &
-                           (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
-                               return(EADDRNOTAVAIL);
+               } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) {
+                       struct ifaddr *ifa;
+
+                       ifa = ifa_ifwithaddr(SA(&sin6));
+                       if (ifa == NULL) {
+                               lck_rw_done(pcbinfo->ipi_lock);
+                               socket_lock(so, 0);
+                               return (EADDRNOTAVAIL);
+                       } else {
+                               /*
+                                * XXX: bind to an anycast address might
+                                * accidentally cause sending a packet with
+                                * anycast source address.  We should allow
+                                * to bind to a deprecated address, since
+                                * the application dare to use it.
+                                */
+                               IFA_LOCK_SPIN(ifa);
+                               if (((struct in6_ifaddr *)ifa)->ia6_flags &
+                                   (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
+                                   IN6_IFF_DETACHED)) {
+                                       IFA_UNLOCK(ifa);
+                                       IFA_REMREF(ifa);
+                                       lck_rw_done(pcbinfo->ipi_lock);
+                                       socket_lock(so, 0);
+                                       return (EADDRNOTAVAIL);
+                               }
+                               /*
+                                * Opportunistically determine the outbound
+                                * interface that may be used; this may not
+                                * hold true if we end up using a route
+                                * going over a different interface, e.g.
+                                * when sending to a local address.  This
+                                * will get updated again after sending.
+                                */
+                               outif = ifa->ifa_ifp;
+                               IFA_UNLOCK(ifa);
+                               IFA_REMREF(ifa);
                        }
                }
-               if (lport) {
+               if (lport != 0) {
                        struct inpcb *t;
+                       uid_t u;
 
                        /* GROSS */
-                       if (ntohs(lport) < IPV6PORT_RESERVED && p &&
-#if 0
-                           suser(p->p_ucred, &p->p_acflag))
-#else
-                            ((so->so_state & SS_PRIV) == 0))
-#endif
-                               return(EACCES);
-
-                       if (so->so_uid &&
-                           !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
-                               t = in6_pcblookup_local(pcbinfo,
-                                   &sin6->sin6_addr, lport,
+                       if (ntohs(lport) < IPV6PORT_RESERVED) {
+                               cred = kauth_cred_proc_ref(p);
+                               error = priv_check_cred(cred,
+                                   PRIV_NETINET_RESERVEDPORT, 0);
+                               kauth_cred_unref(&cred);
+                               if (error != 0) {
+                                       lck_rw_done(pcbinfo->ipi_lock);
+                                       socket_lock(so, 0);
+                                       return (EACCES);
+                               }
+                       }
+                       if (!IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr) &&
+                           (u = kauth_cred_getuid(so->so_cred)) != 0) {
+                               t = in6_pcblookup_local_and_cleanup(pcbinfo,
+                                   &sin6.sin6_addr, lport,
                                    INPLOOKUP_WILDCARD);
-                               if (t &&
-                                   (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
-                                    !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
-                                    (t->inp_socket->so_options &
-                                     SO_REUSEPORT) == 0) &&
-                                   so->so_uid != t->inp_socket->so_uid)
+                               if (t != NULL && (!IN6_IS_ADDR_UNSPECIFIED(
+                                   &sin6.sin6_addr) ||
+                                   !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
+                                   !(t->inp_socket->so_options &
+                                   SO_REUSEPORT)) && (u != kauth_cred_getuid(
+                                   t->inp_socket->so_cred)) &&
+                                   !(t->inp_socket->so_flags &
+                                   SOF_REUSESHAREUID)) {
+                                       lck_rw_done(pcbinfo->ipi_lock);
+                                       socket_lock(so, 0);
                                        return (EADDRINUSE);
-                               if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
-                                   IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+                               }
+                               if (!(inp->inp_flags & IN6P_IPV6_V6ONLY) &&
+                                   IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) {
                                        struct sockaddr_in sin;
 
-                                       in6_sin6_2_sin(&sin, sin6);
-                                       t = in_pcblookup_local(pcbinfo,
-                                               sin.sin_addr, lport,
-                                               INPLOOKUP_WILDCARD);
-                                       if (t &&
-                                           (so->so_uid !=
-                                            t->inp_socket->so_uid) &&
-                                           (ntohl(t->inp_laddr.s_addr) !=
-                                            INADDR_ANY ||
-                                            INP_SOCKAF(so) ==
-                                            INP_SOCKAF(t->inp_socket)))
+                                       in6_sin6_2_sin(&sin, &sin6);
+                                       t = in_pcblookup_local_and_cleanup(
+                                           pcbinfo, sin.sin_addr, lport,
+                                           INPLOOKUP_WILDCARD);
+                                       if (t != NULL &&
+                                           !(t->inp_socket->so_options &
+                                           SO_REUSEPORT) &&
+                                           (kauth_cred_getuid(so->so_cred) !=
+                                           kauth_cred_getuid(t->inp_socket->
+                                           so_cred)) && (t->inp_laddr.s_addr !=
+                                           INADDR_ANY || SOCK_DOM(so) ==
+                                           SOCK_DOM(t->inp_socket))) {
+                                               lck_rw_done(pcbinfo->ipi_lock);
+                                               socket_lock(so, 0);
                                                return (EADDRINUSE);
+                                       }
                                }
                        }
-                       t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
-                                               lport, wild);
-                       if (t && (reuseport & t->inp_socket->so_options) == 0)
-                               return(EADDRINUSE);
-                       if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
-                           IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+                       t = in6_pcblookup_local_and_cleanup(pcbinfo,
+                           &sin6.sin6_addr, lport, wild);
+                       if (t != NULL &&
+                           (reuseport & t->inp_socket->so_options) == 0) {
+                               lck_rw_done(pcbinfo->ipi_lock);
+                               socket_lock(so, 0);
+                               return (EADDRINUSE);
+                       }
+                       if (!(inp->inp_flags & IN6P_IPV6_V6ONLY) &&
+                           IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) {
                                struct sockaddr_in sin;
 
-                               in6_sin6_2_sin(&sin, sin6);
-                               t = in_pcblookup_local(pcbinfo, sin.sin_addr,
-                                                      lport, wild);
-                               if (t &&
-                                   (reuseport & t->inp_socket->so_options)
-                                   == 0 &&
-                                   (ntohl(t->inp_laddr.s_addr)
-                                    != INADDR_ANY ||
-                                    INP_SOCKAF(so) ==
-                                    INP_SOCKAF(t->inp_socket)))
+                               in6_sin6_2_sin(&sin, &sin6);
+                               t = in_pcblookup_local_and_cleanup(pcbinfo,
+                                   sin.sin_addr, lport, wild);
+                               if (t != NULL && (reuseport &
+                                   t->inp_socket->so_options) == 0 &&
+                                   (t->inp_laddr.s_addr != INADDR_ANY ||
+                                   SOCK_DOM(so) == SOCK_DOM(t->inp_socket))) {
+                                       lck_rw_done(pcbinfo->ipi_lock);
+                                       socket_lock(so, 0);
                                        return (EADDRINUSE);
+                               }
                        }
                }
-               inp->in6p_laddr = sin6->sin6_addr;
        }
+
+       socket_lock(so, 0);
+       /*
+        * We unlocked socket's protocol lock for a long time.
+        * The socket might have been dropped/defuncted.
+        * Checking if world has changed since.
+        */
+       if (inp->inp_state == INPCB_STATE_DEAD) {
+               lck_rw_done(pcbinfo->ipi_lock);
+               return (ECONNABORTED);
+       }
+
+       /* check if the socket got bound when the lock was released */
+       if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+               lck_rw_done(pcbinfo->ipi_lock);
+               return (EINVAL);
+       }
+
+       if (!IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr)) {
+               inp->in6p_laddr = sin6.sin6_addr;
+               inp->in6p_last_outifp = outif;
+       }
+
        if (lport == 0) {
                int e;
-               if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p)) != 0)
-                       return(e);
-       }
-       else {
+               if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p, 1)) != 0) {
+                       /* Undo any address bind from above. */
+                       inp->in6p_laddr = in6addr_any;
+                       inp->in6p_last_outifp = NULL;   
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       return (e);
+               }
+       } else {
                inp->inp_lport = lport;
-               if (in_pcbinshash(inp) != 0) {
+               if (in_pcbinshash(inp, 1) != 0) {
                        inp->in6p_laddr = in6addr_any;
                        inp->inp_lport = 0;
+                       inp->in6p_last_outifp = NULL;
+                       lck_rw_done(pcbinfo->ipi_lock);
                        return (EAGAIN);
                }
        }
-       return(0);
+       lck_rw_done(pcbinfo->ipi_lock);
+       sflt_notify(so, sock_evt_bound, NULL);
+       return (0);
 }
 
 /*
- *   Transform old in6_pcbconnect() into an inner subroutine for new
- *   in6_pcbconnect(): Do some validity-checking on the remote
- *   address (in mbuf 'nam') and then determine local host address
- *   (i.e., which interface) to use to access that remote host.
+ * Transform old in6_pcbconnect() into an inner subroutine for new
+ * in6_pcbconnect(); do some validity-checking on the remote address
+ * (in "nam") and then determine local host address (i.e., which
+ * interface) to use to access that remote host.
+ *
+ * This routine may alter the caller-supplied remote address "nam".
  *
- *   This preserves definition of in6_pcbconnect(), while supporting a
- *   slightly different version for T/TCP.  (This is more than
- *   a bit of a kludge, but cleaning up the internal interfaces would
- *   have forced minor changes in every protocol).
+ * This routine might return an ifp with a reference held if the caller
+ * provides a non-NULL outif, even in the error case.  The caller is
+ * responsible for releasing its reference.
  */
-
 int
-in6_pcbladdr(inp, nam, plocal_addr6)
-       register struct inpcb *inp;
-       struct sockaddr *nam;
-       struct in6_addr **plocal_addr6;
+in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
+    struct in6_addr *plocal_addr6, struct ifnet **outif)
 {
-       register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
-       struct ifnet *ifp = NULL;
+       struct in6_addr *addr6 = NULL;
+       struct in6_addr src_storage;
        int error = 0;
+       unsigned int ifscope;
 
-       if (nam->sa_len != sizeof (*sin6))
+       if (outif != NULL)
+               *outif = NULL;
+       if (nam->sa_len != sizeof (struct sockaddr_in6))
                return (EINVAL);
-       if (sin6->sin6_family != AF_INET6)
+       if (SIN6(nam)->sin6_family != AF_INET6)
                return (EAFNOSUPPORT);
-       if (sin6->sin6_port == 0)
+       if (SIN6(nam)->sin6_port == 0)
                return (EADDRNOTAVAIL);
 
        /* KAME hack: embed scopeid */
-       if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0)
-               return EINVAL;
+       if (in6_embedscope(&SIN6(nam)->sin6_addr, SIN6(nam), inp, NULL, NULL) != 0)
+               return (EINVAL);
 
-       if (in6_ifaddr) {
+       if (in6_ifaddrs) {
                /*
                 * If the destination address is UNSPECIFIED addr,
                 * use the loopback addr, e.g ::1.
                 */
-               if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
-                       sin6->sin6_addr = in6addr_loopback;
+               if (IN6_IS_ADDR_UNSPECIFIED(&SIN6(nam)->sin6_addr))
+                       SIN6(nam)->sin6_addr = in6addr_loopback;
        }
-       {
+
+       ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+           inp->inp_boundifp->if_index : IFSCOPE_NONE;
+
+       /*
+        * XXX: in6_selectsrc might replace the bound local address
+        * with the address specified by setsockopt(IPV6_PKTINFO).
+        * Is it the intended behavior?
+        *
+        * in6_selectsrc() might return outif with its reference held
+        * even in the error case; caller always needs to release it
+        * if non-NULL.
+        */
+       addr6 = in6_selectsrc(SIN6(nam), inp->in6p_outputopts, inp,
+           &inp->in6p_route, outif, &src_storage, ifscope, &error);
+
+       if (outif != NULL) {
+               struct rtentry *rt = inp->in6p_route.ro_rt;
                /*
-                * XXX: in6_selectsrc might replace the bound local address
-                * with the address specified by setsockopt(IPV6_PKTINFO).
-                * Is it the intended behavior?
+                * If in6_selectsrc() returns a route, it should be one
+                * which points to the same ifp as outif.  Just in case
+                * it isn't, use the one from the route for consistency.
+                * Otherwise if there is no route, leave outif alone as
+                * it could still be useful to the caller.
                 */
-               *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
-                                             inp->in6p_moptions,
-                                             &inp->in6p_route,
-                                             &inp->in6p_laddr, &error);
-               if (*plocal_addr6 == 0) {
-                       if (error == 0)
-                               error = EADDRNOTAVAIL;
-                       return(error);
+               if (rt != NULL && rt->rt_ifp != *outif) {
+                       ifnet_reference(rt->rt_ifp);    /* for caller */
+                       if (*outif != NULL)
+                               ifnet_release(*outif);
+                       *outif = rt->rt_ifp;
                }
-               /*
-                * Don't do pcblookup call here; return interface in
-                * plocal_addr6
-                * and exit to caller, that will do the lookup.
-                */
        }
 
-       if (inp->in6p_route.ro_rt)
-               ifp = inp->in6p_route.ro_rt->rt_ifp;
+       if (addr6 == NULL) {
+               if (outif != NULL && (*outif) != NULL &&
+                   inp_restricted_send(inp, *outif)) {
+                       soevent(inp->inp_socket,
+                           (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED));
+                       error = EHOSTUNREACH;
+               }
+               if (error == 0)
+                       error = EADDRNOTAVAIL;
+               return (error);
+       }
 
-       return(0);
+       *plocal_addr6 = *addr6;
+       /*
+        * Don't do pcblookup call here; return interface in
+        * plocal_addr6 and exit to caller, that will do the lookup.
+        */
+       return (0);
 }
 
 /*
@@ -332,399 +516,306 @@ in6_pcbladdr(inp, nam, plocal_addr6)
  * then pick one.
  */
 int
-in6_pcbconnect(inp, nam, p)
-       register struct inpcb *inp;
-       struct sockaddr *nam;
-       struct proc *p;
+in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 {
-       struct in6_addr *addr6;
-       register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
-       int error;
+       struct in6_addr addr6;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
+       struct inpcb *pcb;
+       int error = 0;
+       struct ifnet *outif = NULL;
+       struct socket *so = inp->inp_socket;
 
        /*
         * Call inner routine, to assign local interface address.
         * in6_pcbladdr() may automatically fill in sin6_scope_id.
+        *
+        * in6_pcbladdr() might return an ifp with its reference held
+        * even in the error case, so make sure that it's released
+        * whenever it's non-NULL.
         */
-       if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
-               return(error);
-
-       if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
-                              sin6->sin6_port,
-                             IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
-                             ? addr6 : &inp->in6p_laddr,
-                             inp->inp_lport, 0, NULL) != NULL) {
-               return (EADDRINUSE);
+       if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0) {
+               if (outif != NULL && inp_restricted_send(inp, outif)) 
+                       soevent(so,
+                           (SO_FILT_HINT_LOCKED | SO_FILT_HINT_IFDENIED));
+               goto done;
+       }
+       socket_unlock(so, 0);
+       pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
+           sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ?
+           &addr6 : &inp->in6p_laddr, inp->inp_lport, 0, NULL);
+       socket_lock(so, 0);
+       if (pcb != NULL) {
+               in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
+               error = EADDRINUSE;
+               goto done;
        }
        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
                if (inp->inp_lport == 0) {
-                       error = in6_pcbbind(inp, (struct sockaddr *)0, p);
+                       error = in6_pcbbind(inp, NULL, p);
                        if (error)
-                               return (error);
+                               goto done;
                }
-               inp->in6p_laddr = *addr6;
+               inp->in6p_laddr = addr6;
+               inp->in6p_last_outifp = outif;  /* no reference needed */
+               inp->in6p_flags |= INP_IN6ADDR_ANY;
+       }
+       if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
+               /* lock inversion issue, mostly with udp multicast packets */
+               socket_unlock(so, 0);
+               lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
+               socket_lock(so, 0);
        }
        inp->in6p_faddr = sin6->sin6_addr;
        inp->inp_fport = sin6->sin6_port;
-       /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
-       inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
-       if (inp->in6p_flags & IN6P_AUTOFLOWLABEL)
-               inp->in6p_flowinfo |=
-                   (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
-
+       if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
+               nstat_pcb_invalidate_cache(inp);
        in_pcbrehash(inp);
-       return (0);
+       lck_rw_done(inp->inp_pcbinfo->ipi_lock);
+
+done:
+       if (outif != NULL)
+               ifnet_release(outif);
+
+       return (error);
 }
 
-#if 0
-/*
- * Return an IPv6 address, which is the most appropriate for given
- * destination and user specified options.
- * If necessary, this function lookups the routing table and return
- * an entry to the caller for later use.
- */
-struct in6_addr *
-in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp)
-       struct sockaddr_in6 *dstsock;
-       struct ip6_pktopts *opts;
-       struct ip6_moptions *mopts;
-       struct route_in6 *ro;
-       struct in6_addr *laddr;
-       int *errorp;
+void
+in6_pcbdisconnect(struct inpcb *inp)
 {
-       struct in6_addr *dst;
-       struct in6_ifaddr *ia6 = 0;
-       struct in6_pktinfo *pi = NULL;
-
-       dst = &dstsock->sin6_addr;
-       *errorp = 0;
+       struct socket *so = inp->inp_socket;
 
+       if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
+               /* lock inversion issue, mostly with udp multicast packets */
+               socket_unlock(so, 0);
+               lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
+               socket_lock(so, 0);
+       }
+       if (nstat_collect && SOCK_PROTO(so) == IPPROTO_UDP)
+               nstat_pcb_cache(inp);
+       bzero((caddr_t)&inp->in6p_faddr, sizeof (inp->in6p_faddr));
+       inp->inp_fport = 0;
+       /* clear flowinfo - RFC 6437 */
+       inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
+       in_pcbrehash(inp);
+       lck_rw_done(inp->inp_pcbinfo->ipi_lock);
        /*
-        * If the source address is explicitly specified by the caller,
-        * use it.
+        * A multipath subflow socket would have its SS_NOFDREF set by default,
+        * so check for SOF_MP_SUBFLOW socket flag before detaching the PCB;
+        * when the socket is closed for real, SOF_MP_SUBFLOW would be cleared.
         */
-       if (opts && (pi = opts->ip6po_pktinfo) &&
-           !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
-               return(&pi->ipi6_addr);
+       if (!(so->so_flags & SOF_MP_SUBFLOW) && (so->so_state & SS_NOFDREF))
+               in6_pcbdetach(inp);
+}
 
-       /*
-        * If the source address is not specified but the socket(if any)
-        * is already bound, use the bound address.
-        */
-       if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
-               return(laddr);
+void
+in6_pcbdetach(struct inpcb *inp)
+{
+       struct socket *so = inp->inp_socket;
 
-       /*
-        * If the caller doesn't specify the source address but
-        * the outgoing interface, use an address associated with
-        * the interface.
-        */
-       if (pi && pi->ipi6_ifindex) {
-               /* XXX boundary check is assumed to be already done. */
-               ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex],
-                                      dst);
-               if (ia6 == 0) {
-                       *errorp = EADDRNOTAVAIL;
-                       return(0);
-               }
-               return(&satosin6(&ia6->ia_addr)->sin6_addr);
+       if (so->so_pcb == NULL) {
+               /* PCB has been disposed */
+               panic("%s: inp=%p so=%p proto=%d so_pcb is null!\n", __func__,
+                   inp, so, SOCK_PROTO(so));
+               /* NOTREACHED */
+       }
+       
+#if IPSEC
+       if (inp->in6p_sp != NULL) {
+               (void) ipsec6_delete_pcbpolicy(inp);
        }
+#endif /* IPSEC */
 
        /*
-        * If the destination address is a link-local unicast address or
-        * a multicast address, and if the outgoing interface is specified
-        * by the sin6_scope_id filed, use an address associated with the
-        * interface.
-        * XXX: We're now trying to define more specific semantics of
-        *      sin6_scope_id field, so this part will be rewritten in
-        *      the near future.
+        * Let NetworkStatistics know this PCB is going away
+        * before we detach it.
         */
-       if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
-           dstsock->sin6_scope_id) {
-               /*
-                * I'm not sure if boundary check for scope_id is done
-                * somewhere...
-                */
-               if (dstsock->sin6_scope_id < 0 ||
-                   if_index < dstsock->sin6_scope_id) {
-                       *errorp = ENXIO; /* XXX: better error? */
-                       return(0);
-               }
-               ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id],
-                                      dst);
-               if (ia6 == 0) {
-                       *errorp = EADDRNOTAVAIL;
-                       return(0);
-               }
-               return(&satosin6(&ia6->ia_addr)->sin6_addr);
+       if (nstat_collect &&
+           (SOCK_PROTO(so) == IPPROTO_TCP || SOCK_PROTO(so) == IPPROTO_UDP))
+               nstat_pcb_detach(inp);
+       /* mark socket state as dead */
+       if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING) {
+               panic("%s: so=%p proto=%d couldn't set to STOPUSING\n",
+                   __func__, so, SOCK_PROTO(so));
+               /* NOTREACHED */
        }
 
-       /*
-        * If the destination address is a multicast address and
-        * the outgoing interface for the address is specified
-        * by the caller, use an address associated with the interface.
-        * There is a sanity check here; if the destination has node-local
-        * scope, the outgoing interfacde should be a loopback address.
-        * Even if the outgoing interface is not specified, we also
-        * choose a loopback interface as the outgoing interface.
-        */
-       if (IN6_IS_ADDR_MULTICAST(dst)) {
-               struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
+       if (!(so->so_flags & SOF_PCBCLEARING)) {
+               struct ip_moptions *imo;
+               struct ip6_moptions *im6o;
 
-               if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
-                       ifp = &loif[0];
+               inp->inp_vflag = 0;
+               if (inp->in6p_options != NULL) {
+                       m_freem(inp->in6p_options);
+                       inp->in6p_options = NULL;
                }
-
-               if (ifp) {
-                       ia6 = in6_ifawithscope(ifp, dst);
-                       if (ia6 == 0) {
-                               *errorp = EADDRNOTAVAIL;
-                               return(0);
-                       }
-                       return(&ia6->ia_addr.sin6_addr);
+               ip6_freepcbopts(inp->in6p_outputopts);
+               ROUTE_RELEASE(&inp->in6p_route);
+               /* free IPv4 related resources in case of mapped addr */
+               if (inp->inp_options != NULL) {
+                       (void) m_free(inp->inp_options);
+                       inp->inp_options = NULL;
                }
-       }
+               im6o = inp->in6p_moptions;
+               inp->in6p_moptions = NULL;
 
-       /*
-        * If the next hop address for the packet is specified
-        * by caller, use an address associated with the route
-        * to the next hop.
-        */
-       {
-               struct sockaddr_in6 *sin6_next;
-               struct rtentry *rt;
-
-               if (opts && opts->ip6po_nexthop) {
-                       sin6_next = satosin6(opts->ip6po_nexthop);
-                       rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL);
-                       if (rt) {
-                               ia6 = in6_ifawithscope(rt->rt_ifp, dst);
-                               if (ia6 == 0)
-                                       ia6 = ifatoia6(rt->rt_ifa);
-                       }
-                       if (ia6 == 0) {
-                               *errorp = EADDRNOTAVAIL;
-                               return(0);
-                       }
-                       return(&satosin6(&ia6->ia_addr)->sin6_addr);
-               }
-       }
+               imo = inp->inp_moptions;
+               inp->inp_moptions = NULL;
 
-       /*
-        * If route is known or can be allocated now,
-        * our src addr is taken from the i/f, else punt.
-        */
-       if (ro) {
-               if (ro->ro_rt &&
-                   !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) {
-                       rtfree(ro->ro_rt);
-                       ro->ro_rt = (struct rtentry *)0;
-               }
-               if (ro->ro_rt == (struct rtentry *)0 ||
-                   ro->ro_rt->rt_ifp == (struct ifnet *)0) {
-                       struct sockaddr_in6 *dst6;
-
-                       /* No route yet, so try to acquire one */
-                       bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
-                       dst6 = (struct sockaddr_in6 *)&ro->ro_dst;
-                       dst6->sin6_family = AF_INET6;
-                       dst6->sin6_len = sizeof(struct sockaddr_in6);
-                       dst6->sin6_addr = *dst;
-                       if (IN6_IS_ADDR_MULTICAST(dst)) {
-                               ro->ro_rt = rtalloc1(&((struct route *)ro)
-                                                    ->ro_dst, 0, 0UL);
-                       } else {
-                               rtalloc((struct route *)ro);
-                       }
-               }
+               sofreelastref(so, 0);
+               inp->inp_state = INPCB_STATE_DEAD;
+               /* makes sure we're not called twice from so_close */
+               so->so_flags |= SOF_PCBCLEARING;
+               inpcb_gc_sched(inp->inp_pcbinfo, INPCB_TIMER_FAST);
 
                /*
-                * in_pcbconnect() checks out IFF_LOOPBACK to skip using
-                * the address. But we don't know why it does so.
-                * It is necessary to ensure the scope even for lo0
-                * so doesn't check out IFF_LOOPBACK.
+                * See inp_join_group() for why we need to unlock
                 */
-
-               if (ro->ro_rt) {
-                       ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
-                       if (ia6 == 0) /* xxx scope error ?*/
-                               ia6 = ifatoia6(ro->ro_rt->rt_ifa);
-               }
-               if (ia6 == 0) {
-                       *errorp = EHOSTUNREACH; /* no route */
-                       return(0);
+               if (im6o != NULL || imo != NULL) {
+                       socket_unlock(so, 0);
+                       if (im6o != NULL)
+                               IM6O_REMREF(im6o);
+                       if (imo != NULL)
+                               IMO_REMREF(imo);
+                       socket_lock(so, 0);
                }
-               return(&satosin6(&ia6->ia_addr)->sin6_addr);
        }
-
-       *errorp = EADDRNOTAVAIL;
-       return(0);
-}
-
-/*
- * Default hop limit selection. The precedence is as follows:
- * 1. Hoplimit valued specified via ioctl.
- * 2. (If the outgoing interface is detected) the current
- *     hop limit of the interface specified by router advertisement.
- * 3. The system default hoplimit.
-*/
-int
-in6_selecthlim(in6p, ifp)
-       struct in6pcb *in6p;
-       struct ifnet *ifp;
-{
-       if (in6p && in6p->in6p_hops >= 0)
-               return(in6p->in6p_hops);
-       else if (ifp)
-               return(nd_ifinfo[ifp->if_index].chlim);
-       else
-               return(ip6_defhlim);
-}
-#endif
-
-void
-in6_pcbdisconnect(inp)
-       struct inpcb *inp;
-{
-       bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
-       inp->inp_fport = 0;
-       /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
-       inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
-       in_pcbrehash(inp);
-       if (inp->inp_socket->so_state & SS_NOFDREF)
-               in6_pcbdetach(inp);
-}
-
-void
-in6_pcbdetach(inp)
-       struct inpcb *inp;
-{
-       struct socket *so = inp->inp_socket;
-       struct inpcbinfo *ipi = inp->inp_pcbinfo;
-
-#if IPSEC
-       if (inp->in6p_sp != NULL)
-               ipsec6_delete_pcbpolicy(inp);
-#endif /* IPSEC */
-       inp->inp_gencnt = ++ipi->ipi_gencnt;
-       in_pcbremlists(inp);
-       sotoinpcb(so) = 0;
-       sofree(so);
-
-       if (inp->in6p_options)
-               m_freem(inp->in6p_options);
-       ip6_freepcbopts(inp->in6p_outputopts);
-       ip6_freemoptions(inp->in6p_moptions);
-       if (inp->in6p_route.ro_rt)
-               rtfree(inp->in6p_route.ro_rt);
-       /* Check and free IPv4 related resources in case of mapped addr */
-       if (inp->inp_options)
-               (void)m_free(inp->inp_options);
-       ip_freemoptions(inp->inp_moptions);
-
-       inp->inp_vflag = 0;
-       zfree(ipi->ipi_zone, inp);
 }
 
 struct sockaddr *
-in6_sockaddr(port, addr_p)
-       in_port_t port;
-       struct in6_addr *addr_p;
+in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
 {
        struct sockaddr_in6 *sin6;
 
-       MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK);
-       bzero(sin6, sizeof *sin6);
+       MALLOC(sin6, struct sockaddr_in6 *, sizeof (*sin6), M_SONAME, M_WAITOK);
+       if (sin6 == NULL)
+               return (NULL);
+       bzero(sin6, sizeof (*sin6));
        sin6->sin6_family = AF_INET6;
-       sin6->sin6_len = sizeof(*sin6);
+       sin6->sin6_len = sizeof (*sin6);
        sin6->sin6_port = port;
        sin6->sin6_addr = *addr_p;
+
+       /* would be good to use sa6_recoverscope(), except for locking  */
        if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
                sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
        else
-               sin6->sin6_scope_id = 0;        /*XXX*/
+               sin6->sin6_scope_id = 0;        /* XXX */
        if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
                sin6->sin6_addr.s6_addr16[1] = 0;
 
-       return (struct sockaddr *)sin6;
+       return ((struct sockaddr *)sin6);
 }
 
-struct sockaddr *
-in6_v4mapsin6_sockaddr(port, addr_p)
-       in_port_t port;
-       struct in_addr *addr_p;
+void
+in6_sockaddr_s(in_port_t port, struct in6_addr *addr_p,
+    struct sockaddr_in6 *sin6)
 {
-       struct sockaddr_in sin;
-       struct sockaddr_in6 *sin6_p;
-
-       bzero(&sin, sizeof sin);
-       sin.sin_family = AF_INET;
-       sin.sin_len = sizeof(sin);
-       sin.sin_port = port;
-       sin.sin_addr = *addr_p;
-
-       MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME,
-               M_WAITOK);
-       in6_sin_2_v4mapsin6(&sin, sin6_p);
+       bzero(sin6, sizeof (*sin6));
+       sin6->sin6_family = AF_INET6;
+       sin6->sin6_len = sizeof (*sin6);
+       sin6->sin6_port = port;
+       sin6->sin6_addr = *addr_p;
 
-       return (struct sockaddr *)sin6_p;
+       /* would be good to use sa6_recoverscope(), except for locking  */
+       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
+               sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
+       else
+               sin6->sin6_scope_id = 0;        /* XXX */
+       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
+               sin6->sin6_addr.s6_addr16[1] = 0;
 }
 
 /*
- * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was
+ * The calling convention of in6_getsockaddr() and in6_getpeeraddr() was
  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
  * in struct pr_usrreqs, so that protocols can just reference then directly
- * without the need for a wrapper function.  The socket must have a valid
- * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
- * except through a kernel programming error, so it is acceptable to panic
- * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
- * because there actually /is/ a programming error somewhere... XXX)
+ * without the need for a wrapper function.
  */
 int
-in6_setsockaddr(so, nam)
-       struct socket *so;
-       struct sockaddr **nam;
+in6_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
-       int s;
-       register struct inpcb *inp;
+       struct inpcb *inp;
        struct in6_addr addr;
        in_port_t port;
 
-       s = splnet();
-       inp = sotoinpcb(so);
-       if (!inp) {
-               splx(s);
-               return EINVAL;
-       }
+       if ((inp = sotoinpcb(so)) == NULL)
+               return (EINVAL);
+
        port = inp->inp_lport;
        addr = inp->in6p_laddr;
-       splx(s);
 
        *nam = in6_sockaddr(port, &addr);
-       return 0;
+       if (*nam == NULL)
+               return (ENOBUFS);
+       return (0);
 }
 
 int
-in6_setpeeraddr(so, nam)
-       struct socket *so;
-       struct sockaddr **nam;
+in6_getsockaddr_s(struct socket *so, struct sockaddr_storage *ss)
 {
-       int s;
        struct inpcb *inp;
        struct in6_addr addr;
        in_port_t port;
 
-       s = splnet();
-       inp = sotoinpcb(so);
-       if (!inp) {
-               splx(s);
-               return EINVAL;
-       }
+       VERIFY(ss != NULL);
+       bzero(ss, sizeof (*ss));
+
+       if ((inp = sotoinpcb(so)) == NULL
+#if NECP
+               || (necp_socket_should_use_flow_divert(inp))
+#endif /* NECP */
+               )
+               return (inp == NULL ? EINVAL : EPROTOTYPE);
+
+       port = inp->inp_lport;
+       addr = inp->in6p_laddr;
+
+       in6_sockaddr_s(port, &addr, SIN6(ss));
+       return (0);
+}
+
+int
+in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
+{
+       struct inpcb *inp;
+       struct in6_addr addr;
+       in_port_t port;
+
+       if ((inp = sotoinpcb(so)) == NULL)
+               return (EINVAL);
+
        port = inp->inp_fport;
        addr = inp->in6p_faddr;
-       splx(s);
 
        *nam = in6_sockaddr(port, &addr);
-       return 0;
+       if (*nam == NULL)
+               return (ENOBUFS);
+       return (0);
+}
+
+int
+in6_getpeeraddr_s(struct socket *so, struct sockaddr_storage *ss)
+{
+       struct inpcb *inp;
+       struct in6_addr addr;
+       in_port_t port;
+
+       VERIFY(ss != NULL);
+       bzero(ss, sizeof (*ss));
+
+       if ((inp = sotoinpcb(so)) == NULL
+#if NECP
+               || (necp_socket_should_use_flow_divert(inp))
+#endif /* NECP */
+               )
+               return (inp == NULL ? EINVAL : EPROTOTYPE);
+
+       port = inp->inp_fport;
+       addr = inp->in6p_faddr;
+
+       in6_sockaddr_s(port, &addr, SIN6(ss));
+       return (0);
 }
 
 int
@@ -734,16 +825,16 @@ in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
        int     error;
 
        if (inp == NULL)
-               return EINVAL;
+               return (EINVAL);
        if (inp->inp_vflag & INP_IPV4) {
-               error = in_setsockaddr(so, nam);
+               error = in_getsockaddr(so, nam);
                if (error == 0)
-                       in6_sin_2_v4mapsin6_in_sock(nam);
-       } else
-       /* scope issues will be handled in in6_setsockaddr(). */
-       error = in6_setsockaddr(so, nam);
-
-       return error;
+                       error = in6_sin_2_v4mapsin6_in_sock(nam);
+       } else {
+               /* scope issues will be handled in in6_getsockaddr(). */
+               error = in6_getsockaddr(so, nam);
+       }
+       return (error);
 }
 
 int
@@ -753,16 +844,16 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
        int     error;
 
        if (inp == NULL)
-               return EINVAL;
+               return (EINVAL);
        if (inp->inp_vflag & INP_IPV4) {
-               error = in_setpeeraddr(so, nam);
+               error = in_getpeeraddr(so, nam);
                if (error == 0)
-                       in6_sin_2_v4mapsin6_in_sock(nam);
-       } else
-       /* scope issues will be handled in in6_setpeeraddr(). */
-       error = in6_setpeeraddr(so, nam);
-
-       return error;
+                       error = in6_sin_2_v4mapsin6_in_sock(nam);
+       } else {
+               /* scope issues will be handled in in6_getpeeraddr(). */
+               error = in6_getpeeraddr(so, nam);
+       }
+       return (error);
 }
 
 /*
@@ -773,36 +864,31 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
- *
- * Must be called at splnet.
  */
 void
-in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify)
-       struct inpcbhead *head;
-       struct sockaddr *dst;
-       const struct sockaddr *src;
-       u_int fport_arg, lport_arg;
-       int cmd;
-//     struct inpcb *(*notify) __P((struct inpcb *, int));
-       void (*notify) __P((struct inpcb *, int));
+in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, u_int fport_arg,
+    const struct sockaddr *src, u_int lport_arg, int cmd, void *cmdarg,
+    void (*notify)(struct inpcb *, int))
 {
+       struct inpcbhead *head = pcbinfo->ipi_listhead;
        struct inpcb *inp, *ninp;
        struct sockaddr_in6 sa6_src, *sa6_dst;
        u_short fport = fport_arg, lport = lport_arg;
        u_int32_t flowinfo;
-       int errno, s;
+       int errno;
 
-       if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6)
+       if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
                return;
 
-       sa6_dst = (struct sockaddr_in6 *)dst;
+       sa6_dst = (struct sockaddr_in6 *)(void *)dst;
        if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
                return;
 
        /*
         * note that src can be NULL when we get notify by local fragmentation.
         */
-       sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
+       sa6_src = (src == NULL) ?
+           sa6_any : *(struct sockaddr_in6 *)(uintptr_t)(size_t)src;
        flowinfo = sa6_src.sin6_flowinfo;
 
        /*
@@ -816,19 +902,36 @@ in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify)
        if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
                fport = 0;
                lport = 0;
-               bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
+               bzero((caddr_t)&sa6_src.sin6_addr, sizeof (sa6_src.sin6_addr));
 
                if (cmd != PRC_HOSTDEAD)
                        notify = in6_rtchange;
        }
        errno = inet6ctlerrmap[cmd];
-       s = splnet();
-       for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
-               ninp = LIST_NEXT(inp, inp_list);
+       lck_rw_lock_shared(pcbinfo->ipi_lock);
+       for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
+               ninp = LIST_NEXT(inp, inp_list);
 
-               if ((inp->inp_vflag & INP_IPV6) == 0)
+               if (!(inp->inp_vflag & INP_IPV6))
                        continue;
 
+               /*
+                * If the error designates a new path MTU for a destination
+                * and the application (associated with this socket) wanted to
+                * know the value, notify. Note that we notify for all
+                * disconnected sockets if the corresponding application
+                * wanted. This is because some UDP applications keep sending
+                * sockets disconnected.
+                * XXX: should we avoid to notify the value to TCP sockets?
+                */
+               if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
+                   (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+                   IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
+                   &sa6_dst->sin6_addr))) {
+                       ip6_notify_pmtu(inp, (struct sockaddr_in6 *)(void *)dst,
+                           (u_int32_t *)cmdarg);
+               }
+
                /*
                 * Detect if we should notify the error. If no source and
                 * destination ports are specifed, but non-zero flowinfo and
@@ -839,39 +942,44 @@ in6_pcbnotify(head, dst, fport_arg, src, lport_arg, cmd, notify)
                 */
                if (lport == 0 && fport == 0 && flowinfo &&
                    inp->inp_socket != NULL &&
-                   flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) &&
+                   flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) &&
                    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
                        goto do_notify;
                else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
-                                            &sa6_dst->sin6_addr) ||
-                        inp->inp_socket == 0 ||
-                        (lport && inp->inp_lport != lport) ||
-                        (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
-                         !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
-                                             &sa6_src.sin6_addr)) ||
-                        (fport && inp->inp_fport != fport))
+                   &sa6_dst->sin6_addr) || inp->inp_socket == NULL ||
+                   (lport && inp->inp_lport != lport) ||
+                   (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
+                   !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
+                   &sa6_src.sin6_addr)) || (fport && inp->inp_fport != fport))
                        continue;
 
-         do_notify:
-               if (notify)
+do_notify:
+               if (notify) {
+                       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) ==
+                           WNT_STOPUSING)
+                               continue;
+                       socket_lock(inp->inp_socket, 1);
                        (*notify)(inp, errno);
+                       (void) in_pcb_checkstate(inp, WNT_RELEASE, 1);
+                       socket_unlock(inp->inp_socket, 1);
+               }
        }
-       splx(s);
+       lck_rw_done(pcbinfo->ipi_lock);
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
-in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
-       struct inpcbinfo *pcbinfo;
-       struct in6_addr *laddr;
-       u_int lport_arg;
-       int wild_okay;
+in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
+    u_int lport_arg, int wild_okay)
 {
-       register struct inpcb *inp;
+       struct inpcb *inp;
        int matchwild = 3, wildcard;
        u_short lport = lport_arg;
+       struct inpcbporthead *porthash;
+       struct inpcb *match = NULL;
+       struct inpcbport *phd;
 
        if (!wild_okay) {
                struct inpcbhead *head;
@@ -879,10 +987,10 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
                 * Look for an unconnected (wildcard foreign addr) PCB that
                 * matches the local address and port we're looking for.
                 */
-               head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
-                                                     pcbinfo->hashmask)];
+               head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
+                   pcbinfo->ipi_hashmask)];
                LIST_FOREACH(inp, head, inp_hash) {
-                       if ((inp->inp_vflag & INP_IPV6) == 0)
+                       if (!(inp->inp_vflag & INP_IPV6))
                                continue;
                        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
                            IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
@@ -897,94 +1005,50 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
                 * Not found.
                 */
                return (NULL);
-       } else {
-               struct inpcbporthead *porthash;
-               struct inpcbport *phd;
-               struct inpcb *match = NULL;
+       }
+       /*
+        * Best fit PCB lookup.
+        *
+        * First see if this local port is in use by looking on the
+        * port hash list.
+        */
+       porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
+           pcbinfo->ipi_porthashmask)];
+       LIST_FOREACH(phd, porthash, phd_hash) {
+               if (phd->phd_port == lport)
+                       break;
+       }
+       if (phd != NULL) {
                /*
-                * Best fit PCB lookup.
-                *
-                * First see if this local port is in use by looking on the
-                * port hash list.
+                * Port is in use by one or more PCBs. Look for best
+                * fit.
                 */
-               porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
-                   pcbinfo->porthashmask)];
-               LIST_FOREACH(phd, porthash, phd_hash) {
-                       if (phd->phd_port == lport)
-                               break;
-               }
-               if (phd != NULL) {
-                       /*
-                        * Port is in use by one or more PCBs. Look for best
-                        * fit.
-                        */
-                       LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
-                               wildcard = 0;
-                               if ((inp->inp_vflag & INP_IPV6) == 0)
+               LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
+                       wildcard = 0;
+                       if (!(inp->inp_vflag & INP_IPV6))
+                               continue;
+                       if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
+                               wildcard++;
+                       if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+                               if (IN6_IS_ADDR_UNSPECIFIED(laddr))
+                                       wildcard++;
+                               else if (!IN6_ARE_ADDR_EQUAL(
+                                   &inp->in6p_laddr, laddr))
                                        continue;
-                               if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
+                       } else {
+                               if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
                                        wildcard++;
-                               if (!IN6_IS_ADDR_UNSPECIFIED(
-                                       &inp->in6p_laddr)) {
-                                       if (IN6_IS_ADDR_UNSPECIFIED(laddr))
-                                               wildcard++;
-                                       else if (!IN6_ARE_ADDR_EQUAL(
-                                               &inp->in6p_laddr, laddr))
-                                               continue;
-                               } else {
-                                       if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
-                                               wildcard++;
-                               }
-                               if (wildcard < matchwild) {
-                                       match = inp;
-                                       matchwild = wildcard;
-                                       if (matchwild == 0) {
-                                               break;
-                                       }
-                               }
                        }
-               }
-               return (match);
-       }
-}
-
-void
-in6_pcbpurgeif0(head, ifp)
-       struct in6pcb *head;
-       struct ifnet *ifp;
-{
-       struct in6pcb *in6p;
-       struct ip6_moptions *im6o;
-       struct in6_multi_mship *imm, *nimm;
-
-       for (in6p = head; in6p != NULL; in6p = LIST_NEXT(in6p, inp_list)) {
-               im6o = in6p->in6p_moptions;
-               if ((in6p->inp_vflag & INP_IPV6) &&
-                   im6o) {
-                       /*
-                        * Unselect the outgoing interface if it is being
-                        * detached.
-                        */
-                       if (im6o->im6o_multicast_ifp == ifp)
-                               im6o->im6o_multicast_ifp = NULL;
-
-                       /*
-                        * Drop multicast group membership if we joined
-                        * through the interface being detached.
-                        * XXX controversial - is it really legal for kernel
-                        * to force this?
-                        */
-                       for (imm = im6o->im6o_memberships.lh_first;
-                            imm != NULL; imm = nimm) {
-                               nimm = imm->i6mm_chain.le_next;
-                               if (imm->i6mm_maddr->in6m_ifp == ifp) {
-                                       LIST_REMOVE(imm, i6mm_chain);
-                                       in6_delmulti(imm->i6mm_maddr);
-                                       FREE(imm, M_IPMADDR);
+                       if (wildcard < matchwild) {
+                               match = inp;
+                               matchwild = wildcard;
+                               if (matchwild == 0) {
+                                       break;
                                }
                        }
                }
        }
+       return (match);
 }
 
 /*
@@ -994,31 +1058,31 @@ in6_pcbpurgeif0(head, ifp)
  * (by a redirect), time to try a default gateway again.
  */
 void
-in6_losing(in6p)
-       struct inpcb *in6p;
+in6_losing(struct inpcb *in6p)
 {
        struct rtentry *rt;
-       struct rt_addrinfo info;
 
        if ((rt = in6p->in6p_route.ro_rt) != NULL) {
-               in6p->in6p_route.ro_rt = 0;
-               bzero((caddr_t)&info, sizeof(info));
-               info.rti_info[RTAX_DST] =
-                       (struct sockaddr *)&in6p->in6p_route.ro_dst;
-               info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-               info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-               rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
-               if (rt->rt_flags & RTF_DYNAMIC)
-                       (void)rtrequest(RTM_DELETE, rt_key(rt),
-                                       rt->rt_gateway, rt_mask(rt), rt->rt_flags,
-                                       (struct rtentry **)0);
-               else
+               RT_LOCK(rt);
+               if (rt->rt_flags & RTF_DYNAMIC) {
+                       /*
+                        * Prevent another thread from modifying rt_key,
+                        * rt_gateway via rt_setgate() after the rt_lock
+                        * is dropped by marking the route as defunct.
+                        */
+                       rt->rt_flags |= RTF_CONDEMNED;
+                       RT_UNLOCK(rt);
+                       (void) rtrequest(RTM_DELETE, rt_key(rt),
+                           rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
+               } else {
+                       RT_UNLOCK(rt);
+               }
                /*
                 * A new route can be allocated
                 * the next time output is attempted.
                 */
-                       rtfree(rt);
        }
+       ROUTE_RELEASE(&in6p->in6p_route);
 }
 
 /*
@@ -1026,86 +1090,202 @@ in6_losing(in6p)
  * and allocate a (hopefully) better one.
  */
 void
-in6_rtchange(inp, errno)
-       struct inpcb *inp;
-       int errno;
+in6_rtchange(struct inpcb *inp, int errno)
 {
-       if (inp->in6p_route.ro_rt) {
-               rtfree(inp->in6p_route.ro_rt);
-               inp->in6p_route.ro_rt = 0;
-               /*
-                * A new route can be allocated the next time
-                * output is attempted.
-                */
+#pragma unused(errno)
+       /*
+        * A new route can be allocated the next time
+        * output is attempted.
+        */
+       ROUTE_RELEASE(&inp->in6p_route);
+}
+
+/*
+ * Check if PCB exists hash list. Also returns uid and gid of socket
+ */
+int
+in6_pcblookup_hash_exists(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+    u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard,
+    uid_t *uid, gid_t *gid, struct ifnet *ifp)
+{
+       struct inpcbhead *head;
+       struct inpcb *inp;
+       u_short fport = fport_arg, lport = lport_arg;
+       int found;
+
+       *uid = UID_MAX;
+       *gid = GID_MAX;
+
+       lck_rw_lock_shared(pcbinfo->ipi_lock);
+
+       /*
+        * First look for an exact match.
+        */
+       head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
+           lport, fport, pcbinfo->ipi_hashmask)];
+       LIST_FOREACH(inp, head, inp_hash) {
+               if (!(inp->inp_vflag & INP_IPV6))
+                       continue;
+
+               if (inp_restricted_recv(inp, ifp))
+                       continue;
+
+               if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+                   IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+                   inp->inp_fport == fport &&
+                   inp->inp_lport == lport) {
+                       if ((found = (inp->inp_socket != NULL))) {
+                               /*
+                                * Found. Check if pcb is still valid
+                                */
+                               *uid = kauth_cred_getuid(
+                                   inp->inp_socket->so_cred);
+                               *gid = kauth_cred_getgid(
+                                   inp->inp_socket->so_cred);
+                       }
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       return (found);
+               }
        }
+       if (wildcard) {
+               struct inpcb *local_wild = NULL;
+
+               head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
+                   pcbinfo->ipi_hashmask)];
+               LIST_FOREACH(inp, head, inp_hash) {
+                       if (!(inp->inp_vflag & INP_IPV6))
+                               continue;
+
+                       if (inp_restricted_recv(inp, ifp))
+                               continue;
+
+                       if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+                           inp->inp_lport == lport) {
+                               if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
+                                   laddr)) {
+                                       found = (inp->inp_socket != NULL);
+                                       if (found) {
+                                               *uid = kauth_cred_getuid(
+                                                   inp->inp_socket->so_cred);
+                                               *gid = kauth_cred_getgid(
+                                                   inp->inp_socket->so_cred);
+                                       }
+                                       lck_rw_done(pcbinfo->ipi_lock);
+                                       return (found);
+                               } else if (IN6_IS_ADDR_UNSPECIFIED(
+                                   &inp->in6p_laddr)) {
+                                       local_wild = inp;
+                               }
+                       }
+               }
+               if (local_wild) {
+                       if ((found = (local_wild->inp_socket != NULL))) {
+                               *uid = kauth_cred_getuid(
+                                   local_wild->inp_socket->so_cred);
+                               *gid = kauth_cred_getgid(
+                                   local_wild->inp_socket->so_cred);
+                       }
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       return (found);
+               }
+       }
+
+       /*
+        * Not found.
+        */
+       lck_rw_done(pcbinfo->ipi_lock);
+       return (0);
 }
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
-in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
-       struct inpcbinfo *pcbinfo;
-       struct in6_addr *faddr, *laddr;
-       u_int fport_arg, lport_arg;
-       int wildcard;
-       struct ifnet *ifp;
+in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
+    u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int wildcard,
+    struct ifnet *ifp)
 {
        struct inpcbhead *head;
-       register struct inpcb *inp;
+       struct inpcb *inp;
        u_short fport = fport_arg, lport = lport_arg;
-       int faith;
 
-#if defined(NFAITH) && NFAITH > 0
-       faith = faithprefix(laddr);
-#else
-       faith = 0;
-#endif
+       lck_rw_lock_shared(pcbinfo->ipi_lock);
 
        /*
         * First look for an exact match.
         */
-       head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
-                                             lport, fport,
-                                             pcbinfo->hashmask)];
+       head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
+           lport, fport, pcbinfo->ipi_hashmask)];
        LIST_FOREACH(inp, head, inp_hash) {
-               if ((inp->inp_vflag & INP_IPV6) == 0)
+               if (!(inp->inp_vflag & INP_IPV6))
+                       continue;
+
+               if (inp_restricted_recv(inp, ifp))
                        continue;
+
                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
                    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
                    inp->inp_fport == fport &&
                    inp->inp_lport == lport) {
                        /*
-                        * Found.
+                        * Found. Check if pcb is still valid
                         */
-                       return (inp);
+                       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) !=
+                           WNT_STOPUSING) {
+                               lck_rw_done(pcbinfo->ipi_lock);
+                               return (inp);
+                       } else {
+                               /* it's there but dead, say it isn't found */
+                               lck_rw_done(pcbinfo->ipi_lock);
+                               return (NULL);
+                       }
                }
        }
        if (wildcard) {
                struct inpcb *local_wild = NULL;
 
-               head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
-                                                     pcbinfo->hashmask)];
+               head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
+                   pcbinfo->ipi_hashmask)];
                LIST_FOREACH(inp, head, inp_hash) {
-                       if ((inp->inp_vflag & INP_IPV6) == 0)
+                       if (!(inp->inp_vflag & INP_IPV6))
                                continue;
+
+                       if (inp_restricted_recv(inp, ifp))
+                               continue;
+
                        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
                            inp->inp_lport == lport) {
-                               if (faith && (inp->inp_flags & INP_FAITH) == 0)
-                                       continue;
                                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
-                                                      laddr))
-                                       return (inp);
-                               else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+                                   laddr)) {
+                                       if (in_pcb_checkstate(inp, WNT_ACQUIRE,
+                                           0) != WNT_STOPUSING) {
+                                               lck_rw_done(pcbinfo->ipi_lock);
+                                               return (inp);
+                                       } else {
+                                               /* dead; say it isn't found */
+                                               lck_rw_done(pcbinfo->ipi_lock);
+                                               return (NULL);
+                                       }
+                               } else if (IN6_IS_ADDR_UNSPECIFIED(
+                                   &inp->in6p_laddr)) {
                                        local_wild = inp;
+                               }
                        }
                }
-               return (local_wild);
+               if (local_wild && in_pcb_checkstate(local_wild,
+                   WNT_ACQUIRE, 0) != WNT_STOPUSING) {
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       return (local_wild);
+               } else {
+                       lck_rw_done(pcbinfo->ipi_lock);
+                       return (NULL);
+               }
        }
 
        /*
         * Not found.
         */
+       lck_rw_done(pcbinfo->ipi_lock);
        return (NULL);
 }
 
@@ -1115,15 +1295,70 @@ init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
        struct ip6_hdr *ip;
 
        ip = mtod(m, struct ip6_hdr *);
-       bzero(sin6, sizeof(*sin6));
-       sin6->sin6_len = sizeof(*sin6);
+       bzero(sin6, sizeof (*sin6));
+       sin6->sin6_len = sizeof (*sin6);
        sin6->sin6_family = AF_INET6;
        sin6->sin6_addr = ip->ip6_src;
-       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
+       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
                sin6->sin6_addr.s6_addr16[1] = 0;
-       sin6->sin6_scope_id =
-               (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-               ? m->m_pkthdr.rcvif->if_index : 0;
+               if ((m->m_pkthdr.pkt_flags & (PKTF_LOOP|PKTF_IFAINFO)) ==
+                   (PKTF_LOOP|PKTF_IFAINFO))
+                       sin6->sin6_scope_id = m->m_pkthdr.src_ifindex;
+               else if (m->m_pkthdr.rcvif != NULL)
+                       sin6->sin6_scope_id = m->m_pkthdr.rcvif->if_index;
+       }
+}
+
+/*
+ * The following routines implement this scheme:
+ *
+ * Callers of ip6_output() that intend to cache the route in the inpcb pass
+ * a local copy of the struct route to ip6_output().  Using a local copy of
+ * the cached route significantly simplifies things as IP no longer has to
+ * worry about having exclusive access to the passed in struct route, since
+ * it's defined in the caller's stack; in essence, this allows for a lock-
+ * less operation when updating the struct route at the IP level and below,
+ * whenever necessary. The scheme works as follows:
+ *
+ * Prior to dropping the socket's lock and calling ip6_output(), the caller
+ * copies the struct route from the inpcb into its stack, and adds a reference
+ * to the cached route entry, if there was any.  The socket's lock is then
+ * dropped and ip6_output() is called with a pointer to the copy of struct
+ * route defined on the stack (not to the one in the inpcb.)
+ *
+ * Upon returning from ip6_output(), the caller then acquires the socket's
+ * lock and synchronizes the cache; if there is no route cached in the inpcb,
+ * it copies the local copy of struct route (which may or may not contain any
+ * route) back into the cache; otherwise, if the inpcb has a route cached in
+ * it, the one in the local copy will be freed, if there's any.  Trashing the
+ * cached route in the inpcb can be avoided because ip6_output() is single-
+ * threaded per-PCB (i.e. multiple transmits on a PCB are always serialized
+ * by the socket/transport layer.)
+ */
+void
+in6p_route_copyout(struct inpcb *inp, struct route_in6 *dst)
+{
+       struct route_in6 *src = &inp->in6p_route;
+
+       lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
+       /* Minor sanity check */
+       if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6)
+               panic("%s: wrong or corrupted route: %p", __func__, src);
+
+       route_copyout((struct route *)dst, (struct route *)src, sizeof (*dst));
+}
+
+void
+in6p_route_copyin(struct inpcb *inp, struct route_in6 *src)
+{
+       struct route_in6 *dst = &inp->in6p_route;
+
+       lck_mtx_assert(&inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED);
+
+       /* Minor sanity check */
+       if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET6)
+               panic("%s: wrong or corrupted route: %p", __func__, src);
 
-       return;
+       route_copyin((struct route *)src, (struct route *)dst, sizeof (*src));
 }