]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet6/in6_pcb.c
xnu-1699.22.73.tar.gz
[apple/xnu.git] / bsd / netinet6 / in6_pcb.c
index daa61ee2fe2ea159480082f41ab2d1e0d837c9b7..2ea4d7a5d5d656b2c98c1bb8c664f505e80342c3 100644 (file)
@@ -1,7 +1,34 @@
+/*
+ * Copyright (c) 2003-2011 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -13,7 +40,7 @@
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -25,6 +52,7 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
  */
 
 /*
  *     @(#)in_pcb.c    8.2 (Berkeley) 1/4/94
  */
 
-#ifdef __NetBSD__      /*XXX*/
-#include "opt_ipsec.h"
-#endif
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
+#include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/proc.h>
+#include <sys/kauth.h>
+#include <sys/priv.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip6.h>
+#include <netinet/ip_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 #include <net/if_types.h>
 
-#include "faith.h"
+#include <kern/kern_types.h>
+#include <kern/zalloc.h>
 
 #if IPSEC
 #include <netinet6/ipsec.h>
+#if INET6
+#include <netinet6/ipsec6.h>
+#endif
+#include <netinet6/ah.h>
+#if INET6
+#include <netinet6/ah6.h>
+#endif
 #include <netkey/key.h>
-#include <netkey/key_debug.h>
 #endif /* IPSEC */
 
-struct in6_addr zeroin6_addr;
+struct in6_addr zeroin6_addr;
+
+/*
+  in6_pcblookup_local_and_cleanup does everything
+  in6_pcblookup_local does but it checks for a socket
+  that's going away. Since we know that the lock is
+  held read+write when this function is called, we
+  can safely dispose of this socket like the slow
+  timer would usually do and return NULL. This is
+  great for bind.
+*/
+static struct inpcb*
+in6_pcblookup_local_and_cleanup(
+       struct inpcbinfo *pcbinfo,
+       struct in6_addr *laddr,
+       u_int lport_arg,
+       int wild_okay)
+{
+       struct inpcb *inp;
+       
+       /* Perform normal lookup */
+       inp = in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay);
+       
+       /* Check if we found a match but it's waiting to be disposed */
+       if (inp && inp->inp_wantcnt == WNT_STOPUSING) {
+               struct socket *so = inp->inp_socket;
+               
+               lck_mtx_lock(&inp->inpcb_mtx);
+               
+               if (so->so_usecount == 0) {
+                       if (inp->inp_state != INPCB_STATE_DEAD)
+                               in6_pcbdetach(inp);
+                       in_pcbdispose(inp);
+                       inp = NULL;
+               }
+               else {
+                       lck_mtx_unlock(&inp->inpcb_mtx);
+               }
+       }
+       
+       return inp;
+}
 
 int
-in6_pcbbind(inp, nam, p)
-       register struct inpcb *inp;
-       struct sockaddr *nam;
-       struct proc *p;
+in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct proc *p)
 {
        struct socket *so = inp->inp_socket;
-       unsigned short *lastport;
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
        struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
        u_short lport = 0;
        int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
        int error;
+       kauth_cred_t cred;
 
-       if (!in6_ifaddr) /* XXX broken! */
+       if (!in6_ifaddrs) /* XXX broken! */
                return (EADDRNOTAVAIL);
        if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
                return(EINVAL);
        if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
-               wild = INPLOOKUP_WILDCARD;
+               wild = 1;
+       socket_unlock(so, 0); /* keep reference */
+       lck_rw_lock_exclusive(pcbinfo->mtx);
        if (nam) {
-               if (nam->sa_len != sizeof(*sin6))
+               unsigned int outif = 0;
+
+               sin6 = (struct sockaddr_in6 *)nam;
+               if (nam->sa_len != sizeof(*sin6)) {
+                       lck_rw_done(pcbinfo->mtx);
+                       socket_lock(so, 0);
                        return(EINVAL);
+               }
                /*
-                * We should check the family, but old programs
-                * incorrectly fail to intialize it.
+                * family check.
                 */
-               if (nam->sa_family != AF_INET6)
+               if (nam->sa_family != AF_INET6) {
+                       lck_rw_done(pcbinfo->mtx);
+                       socket_lock(so, 0);
                        return(EAFNOSUPPORT);
-               sin6 = (struct sockaddr_in6 *)nam;
+               }
 
-               /*
-                * If the scope of the destination is link-local, embed the
-                * interface index in the address.
-                */
-               if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
-                       /* XXX boundary check is assumed to be already done. */
-                       /* XXX sin6_scope_id is weaker than advanced-api. */
-                       struct in6_pktinfo *pi;
-                       if (inp->in6p_outputopts &&
-                           (pi = inp->in6p_outputopts->ip6po_pktinfo) &&
-                           pi->ipi6_ifindex) {
-                               sin6->sin6_addr.s6_addr16[1]
-                                       = htons(pi->ipi6_ifindex);
-                       } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)
-                               && inp->in6p_moptions
-                               && inp->in6p_moptions->im6o_multicast_ifp) {
-                               sin6->sin6_addr.s6_addr16[1] =
-                                       htons(inp->in6p_moptions->im6o_multicast_ifp->if_index);
-                       } else if (sin6->sin6_scope_id) {
-                               /* boundary check */
-                               if (sin6->sin6_scope_id < 0 
-                                || if_index < sin6->sin6_scope_id) {
-                                       return ENXIO;  /* XXX EINVAL? */
-                               }
-                               sin6->sin6_addr.s6_addr16[1]
-                                       = htons(sin6->sin6_scope_id & 0xffff);/*XXX*/
-                               /* this must be cleared for ifa_ifwithaddr() */
-                               sin6->sin6_scope_id = 0;
-                       }
+               /* KAME hack: embed scopeid */
+               if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL,
+                   NULL) != 0) {
+                       lck_rw_done(pcbinfo->mtx);
+                       socket_lock(so, 0);
+                       return EINVAL;
                }
+               /* this must be cleared for ifa_ifwithaddr() */
+               sin6->sin6_scope_id = 0;
 
                lport = sin6->sin6_port;
                if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
@@ -176,140 +237,139 @@ in6_pcbbind(inp, nam, p)
                        if (so->so_options & SO_REUSEADDR)
                                reuseport = SO_REUSEADDR|SO_REUSEPORT;
                } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
-                       struct ifaddr *ia = NULL;
+                       struct ifaddr *ifa;
 
                        sin6->sin6_port = 0;            /* yech... */
-                       if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0)
+                       if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == 0) {
+                               lck_rw_done(pcbinfo->mtx);
+                               socket_lock(so, 0);
                                return(EADDRNOTAVAIL);
+                       }
 
                        /*
                         * XXX: bind to an anycast address might accidentally
                         * cause sending a packet with anycast source address.
+                        * We should allow to bind to a deprecated address, since
+                        * the application dare to use it.
                         */
-                       if (ia &&
-                           ((struct in6_ifaddr *)ia)->ia6_flags &
-                           (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
-                            IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
-                               return(EADDRNOTAVAIL);
+                       if (ifa != NULL) {
+                               IFA_LOCK_SPIN(ifa);
+                               if (((struct in6_ifaddr *)ifa)->ia6_flags &
+                                   (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
+                                       IFA_UNLOCK(ifa);
+                                       IFA_REMREF(ifa);
+                                       lck_rw_done(pcbinfo->mtx);
+                                       socket_lock(so, 0);
+                                       return(EADDRNOTAVAIL);
+                               }
+                               outif = ifa->ifa_ifp->if_index;
+                               IFA_UNLOCK(ifa);
+                               IFA_REMREF(ifa);
                        }
                }
                if (lport) {
                        struct inpcb *t;
 
                        /* GROSS */
-                       if (ntohs(lport) < IPV6PORT_RESERVED && p &&
-#if 0
-                           suser(p->p_ucred, &p->p_acflag))
-#else
-                            ((so->so_state & SS_PRIV) == 0))
-#endif
-                               return(EACCES);
+                       if (ntohs(lport) < IPV6PORT_RESERVED) {
+                               cred = kauth_cred_proc_ref(p);
+                               error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
+                               kauth_cred_unref(&cred);
+                               if (error != 0) {
+                                       lck_rw_done(pcbinfo->mtx);
+                                       socket_lock(so, 0);
+                                       return(EACCES);
+                               }
+                       }
 
                        if (so->so_uid &&
                            !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
-                               t = in6_pcblookup_local(inp->inp_pcbinfo,
-                                                       &sin6->sin6_addr,
-                                                       lport,
-                                                       INPLOOKUP_WILDCARD);
+                               t = in6_pcblookup_local_and_cleanup(pcbinfo,
+                                   &sin6->sin6_addr, lport,
+                                   INPLOOKUP_WILDCARD);
                                if (t &&
                                    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
                                     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
                                     (t->inp_socket->so_options &
                                      SO_REUSEPORT) == 0) &&
-                                   so->so_uid != t->inp_socket->so_uid)
+                                    (so->so_uid != t->inp_socket->so_uid) &&
+                                    ((t->inp_socket->so_flags & SOF_REUSESHAREUID) == 0)) {
+                                       lck_rw_done(pcbinfo->mtx);
+                                       socket_lock(so, 0);
                                        return (EADDRINUSE);
+                               }
+                               if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
+                                   IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+                                       struct sockaddr_in sin;
+
+                                       in6_sin6_2_sin(&sin, sin6);
+                                       t = in_pcblookup_local_and_cleanup(pcbinfo,
+                                               sin.sin_addr, lport,
+                                               INPLOOKUP_WILDCARD);
+                                       if (t && (t->inp_socket->so_options & SO_REUSEPORT) == 0 &&
+                                           (so->so_uid !=
+                                            t->inp_socket->so_uid) &&
+                                           (ntohl(t->inp_laddr.s_addr) !=
+                                            INADDR_ANY ||
+                                            INP_SOCKAF(so) ==
+                                            INP_SOCKAF(t->inp_socket))) {
+
+                                               lck_rw_done(pcbinfo->mtx);
+                                               socket_lock(so, 0);
+                                               return (EADDRINUSE);
+                                       }
+                               }
                        }
-                       t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
+                       t = in6_pcblookup_local_and_cleanup(pcbinfo, &sin6->sin6_addr,
                                                lport, wild);
-                       if (t && (reuseport & t->inp_socket->so_options) == 0)
+                       if (t && (reuseport & t->inp_socket->so_options) == 0) {
+                               lck_rw_done(pcbinfo->mtx);
+                               socket_lock(so, 0);
                                return(EADDRINUSE);
+                       }
+                       if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
+                           IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+                               struct sockaddr_in sin;
+
+                               in6_sin6_2_sin(&sin, sin6);
+                               t = in_pcblookup_local_and_cleanup(pcbinfo, sin.sin_addr,
+                                                      lport, wild);
+                               if (t &&
+                                   (reuseport & t->inp_socket->so_options)
+                                   == 0 &&
+                                   (ntohl(t->inp_laddr.s_addr)
+                                    != INADDR_ANY ||
+                                    INP_SOCKAF(so) ==
+                                    INP_SOCKAF(t->inp_socket))) {
+                                       lck_rw_done(pcbinfo->mtx);
+                                       socket_lock(so, 0);
+                                       return (EADDRINUSE);
+                               }
+                       }
                }
                inp->in6p_laddr = sin6->sin6_addr;
+               inp->in6p_last_outif = outif;
        }
+       socket_lock(so, 0);
        if (lport == 0) {
-               ushort first, last;
-               int count;
-
-               inp->inp_flags |= INP_ANONPORT;
-
-               if (inp->inp_flags & INP_HIGHPORT) {
-                       first = ipport_hifirstauto;     /* sysctl */
-                       last  = ipport_hilastauto;
-                       lastport = &pcbinfo->lasthi;
-               } else if (inp->inp_flags & INP_LOWPORT) {
-#if 0
-                       if (p && (error = suser(p->p_ucred, &p->p_acflag)))
-                               return error;
-#else
-                        if ((so->so_state & SS_PRIV) == 0)
-                                return (EPERM);
-#endif
-                       first = ipport_lowfirstauto;    /* 1023 */
-                       last  = ipport_lowlastauto;     /* 600 */
-                       lastport = &pcbinfo->lastlow;
-               } else {
-                       first = ipport_firstauto;       /* sysctl */
-                       last  = ipport_lastauto;
-                       lastport = &pcbinfo->lastport;
-               }
-               /*
-                * Simple check to ensure all ports are not used up causing
-                * a deadlock here.
-                *
-                * We split the two cases (up and down) so that the direction
-                * is not being tested on each round of the loop.
-                */
-               if (first > last) {
-                       /*
-                        * counting down
-                        */
-                       count = first - last;
-
-                       do {
-                               if (count-- < 0) {      /* completely used? */
-                                       /*
-                                        * Undo any address bind that may have
-                                        * occurred above.
-                                        */
-                                       inp->in6p_laddr = in6addr_any;
-                                       return (EAGAIN);
-                               }
-                               --*lastport;
-                               if (*lastport > first || *lastport < last)
-                                       *lastport = first;
-                               lport = htons(*lastport);
-                       } while (in6_pcblookup_local(pcbinfo,
-                                &inp->in6p_laddr, lport, wild));
-               } else {
-                       /*
-                        * counting up
-                        */
-                       count = last - first;
-
-                       do {
-                               if (count-- < 0) {      /* completely used? */
-                                       /*
-                                        * Undo any address bind that may have
-                                        * occurred above.
-                                        */
-                                       inp->in6p_laddr = in6addr_any;
-                                       return (EAGAIN);
-                               }
-                               ++*lastport;
-                               if (*lastport < first || *lastport > last)
-                                       *lastport = first;
-                               lport = htons(*lastport);
-                       } while (in6_pcblookup_local(pcbinfo,
-                                &inp->in6p_laddr, lport, wild));
+               int e;
+               if ((e = in6_pcbsetport(&inp->in6p_laddr, inp, p, 1)) != 0) {
+                       lck_rw_done(pcbinfo->mtx);
+                       return(e);
                }
        }
-       inp->inp_lport = lport;
-       if (in_pcbinshash(inp) != 0) {
-               inp->in6p_laddr = in6addr_any;
-               inp->inp_lport = 0;
-               return (EAGAIN);
+       else {
+               inp->inp_lport = lport;
+               if (in_pcbinshash(inp, 1) != 0) {
+                       inp->in6p_laddr = in6addr_any;
+                       inp->inp_lport = 0;
+                       inp->in6p_last_outif = 0;
+                       lck_rw_done(pcbinfo->mtx);
+                       return (EAGAIN);
+               }
        }
-       inp->in6p_flowinfo = sin6 ? sin6->sin6_flowinfo : 0;    /*XXX*/
+       lck_rw_done(pcbinfo->mtx);
+       sflt_notify(so, sock_evt_bound, NULL);
        return(0);
 }
 
@@ -326,15 +386,14 @@ in6_pcbbind(inp, nam, p)
  */
 
 int
-in6_pcbladdr(inp, nam, plocal_addr6)
-       register struct inpcb *inp;
-       struct sockaddr *nam;
-       struct in6_addr **plocal_addr6;
+in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
+    struct in6_addr *plocal_addr6, unsigned int *poutif)
 {
-       register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
-       struct in6_pktinfo *pi;
-       struct ifnet *ifp = NULL;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+       struct in6_addr *addr6 = NULL;
+       struct in6_addr src_storage;
        int error = 0;
+       unsigned int ifscope;
 
        if (nam->sa_len != sizeof (*sin6))
                return (EINVAL);
@@ -343,72 +402,49 @@ in6_pcbladdr(inp, nam, plocal_addr6)
        if (sin6->sin6_port == 0)
                return (EADDRNOTAVAIL);
 
-       /*
-        * If the scope of the destination is link-local, embed the interface
-        * index in the address.
-        */
-       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) {
-               /* XXX boundary check is assumed to be already done. */
-               /* XXX sin6_scope_id is weaker than advanced-api. */
-               if (inp->in6p_outputopts &&
-                   (pi = inp->in6p_outputopts->ip6po_pktinfo) &&
-                   pi->ipi6_ifindex) {
-                       sin6->sin6_addr.s6_addr16[1] = htons(pi->ipi6_ifindex);
-                       ifp = ifindex2ifnet[pi->ipi6_ifindex];
-               }
-               else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
-                        inp->in6p_moptions &&
-                        inp->in6p_moptions->im6o_multicast_ifp) {
-                       sin6->sin6_addr.s6_addr16[1] =
-                               htons(inp->in6p_moptions->im6o_multicast_ifp->if_index);
-                       ifp = ifindex2ifnet[inp->in6p_moptions->im6o_multicast_ifp->if_index];
-               } else if (sin6->sin6_scope_id) {
-                       /* boundary check */
-                       if (sin6->sin6_scope_id < 0 
-                        || if_index < sin6->sin6_scope_id) {
-                               return ENXIO;  /* XXX EINVAL? */
-                       }
-                       sin6->sin6_addr.s6_addr16[1]
-                               = htons(sin6->sin6_scope_id & 0xffff);/*XXX*/
-                       ifp = ifindex2ifnet[sin6->sin6_scope_id];
-               }
-       }
+       /* KAME hack: embed scopeid */
+       if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL, NULL) != 0)
+               return EINVAL;
 
-       if (in6_ifaddr) {
+       if (in6_ifaddrs) {
                /*
                 * If the destination address is UNSPECIFIED addr,
                 * use the loopback addr, e.g ::1.
                 */
-#define        satosin6(sa)    ((struct sockaddr_in6 *)(sa))
-#define ifatoia6(ifa)  ((struct in6_ifaddr *)(ifa))
                if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
                        sin6->sin6_addr = in6addr_loopback;
        }
-       {
-               /*
-                * XXX: in6_selectsrc might replace the bound local address
-                * with the address specified by setsockopt(IPV6_PKTINFO).
-                * Is it the intended behavior?
-                */
-               *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts,
-                                             inp->in6p_moptions,
-                                             &inp->in6p_route,
-                                             &inp->in6p_laddr, &error);
-               if (*plocal_addr6 == 0) {
-                       if (error == 0)
-                               error = EADDRNOTAVAIL;
-                       return(error);
-               }
-               /*
-                * Don't do pcblookup call here; return interface in 
-                * plocal_addr6
-                * and exit to caller, that will do the lookup.
-                */
+
+       ifscope = (inp->inp_flags & INP_BOUND_IF) ?
+          inp->inp_boundif : IFSCOPE_NONE;
+
+       /*
+        * XXX: in6_selectsrc might replace the bound local address
+        * with the address specified by setsockopt(IPV6_PKTINFO).
+        * Is it the intended behavior?
+        */
+       addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, inp,
+           &inp->in6p_route, NULL, &src_storage, ifscope, &error);
+       if (addr6 == 0) {
+               if (error == 0)
+                       error = EADDRNOTAVAIL;
+               return(error);
        }
 
-       if (inp->in6p_route.ro_rt)
-               ifp = inp->in6p_route.ro_rt->rt_ifp;
+       if (poutif != NULL) {
+               struct rtentry *rt;
+               if ((rt = inp->in6p_route.ro_rt) != NULL)
+                       *poutif = rt->rt_ifp->if_index;
+               else
+                       *poutif = 0;
+       }
 
+       *plocal_addr6 = *addr6;
+       /*
+        * Don't do pcblookup call here; return interface in
+        * plocal_addr6
+        * and exit to caller, that will do the lookup.
+        */
        return(0);
 }
 
@@ -420,26 +456,32 @@ in6_pcbladdr(inp, nam, plocal_addr6)
  * then pick one.
  */
 int
-in6_pcbconnect(inp, nam, p)
-       register struct inpcb *inp;
-       struct sockaddr *nam;
-       struct proc *p;
+in6_pcbconnect(
+       struct inpcb *inp,
+       struct sockaddr *nam,
+       struct proc *p)
 {
-       struct in6_addr *addr6;
-       register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+       struct in6_addr addr6;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
+       struct inpcb *pcb;
        int error;
+       unsigned int outif = 0;
 
        /*
-        *   Call inner routine, to assign local interface address.
+        * Call inner routine, to assign local interface address.
+        * in6_pcbladdr() may automatically fill in sin6_scope_id.
         */
-       if (error = in6_pcbladdr(inp, nam, &addr6))
+       if ((error = in6_pcbladdr(inp, nam, &addr6, &outif)) != 0)
                return(error);
-
-       if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
+       socket_unlock(inp->inp_socket, 0);
+       pcb = in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr,
                               sin6->sin6_port,
                              IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
-                             ? addr6 : &inp->in6p_laddr,
-                             inp->inp_lport, 0, NULL) != NULL) {
+                             ? &addr6 : &inp->in6p_laddr,
+                             inp->inp_lport, 0, NULL);
+       socket_lock(inp->inp_socket, 0);
+       if (pcb != NULL) {
+               in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0);
                return (EADDRINUSE);
        }
        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
@@ -448,292 +490,142 @@ in6_pcbconnect(inp, nam, p)
                        if (error)
                                return (error);
                }
-               inp->in6p_laddr = *addr6;
+               inp->in6p_laddr = addr6;
+               inp->in6p_last_outif = outif;
+       }
+       if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
+               /*lock inversion issue, mostly with udp multicast packets */
+               socket_unlock(inp->inp_socket, 0);
+               lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
+               socket_lock(inp->inp_socket, 0);
        }
        inp->in6p_faddr = sin6->sin6_addr;
        inp->inp_fport = sin6->sin6_port;
-       /*
-        * xxx kazu flowlabel is necessary for connect?
-        * but if this line is missing, the garbage value remains.
-        */
-       inp->in6p_flowinfo = sin6->sin6_flowinfo;
+       /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
+       inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
+       if (inp->in6p_flags & IN6P_AUTOFLOWLABEL)
+               inp->in6p_flowinfo |=
+                   (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
 
        in_pcbrehash(inp);
+       lck_rw_done(inp->inp_pcbinfo->mtx);
        return (0);
 }
 
-#if 0
-/*
- * Return an IPv6 address, which is the most appropriate for given
- * destination and user specified options.
- * If necessary, this function lookups the routing table and return
- * an entry to the caller for later use.
- */
-struct in6_addr *
-in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp)
-       struct sockaddr_in6 *dstsock;
-       struct ip6_pktopts *opts;
-       struct ip6_moptions *mopts;
-       struct route_in6 *ro;
-       struct in6_addr *laddr;
-       int *errorp;
+void
+in6_pcbdisconnect(
+       struct inpcb *inp)
 {
-       struct in6_addr *dst;
-       struct in6_ifaddr *ia6 = 0;
-       struct in6_pktinfo *pi = NULL;
-
-       dst = &dstsock->sin6_addr;
-       *errorp = 0;
-
-       /*
-        * If the source address is explicitly specified by the caller,
-        * use it.
-        */
-       if (opts && (pi = opts->ip6po_pktinfo) &&
-           !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr))
-               return(&pi->ipi6_addr);
-
-       /*
-        * If the source address is not specified but the socket(if any)
-        * is already bound, use the bound address.
-        */
-       if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr))
-               return(laddr);
-
-       /*
-        * If the caller doesn't specify the source address but
-        * the outgoing interface, use an address associated with
-        * the interface.
-        */
-       if (pi && pi->ipi6_ifindex) {
-               /* XXX boundary check is assumed to be already done. */
-               ia6 = in6_ifawithscope(ifindex2ifnet[pi->ipi6_ifindex],
-                                      dst);
-               if (ia6 == 0) {
-                       *errorp = EADDRNOTAVAIL;
-                       return(0);
-               }
-               return(&satosin6(&ia6->ia_addr)->sin6_addr);
-       }
-
-       /*
-        * If the destination address is a link-local unicast address or
-        * a multicast address, and if the outgoing interface is specified
-        * by the sin6_scope_id filed, use an address associated with the
-        * interface.
-        * XXX: We're now trying to define more specific semantics of
-        *      sin6_scope_id field, so this part will be rewritten in
-        *      the near future.
-        */
-       if ((IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst)) &&
-           dstsock->sin6_scope_id) {
-               /*
-                * I'm not sure if boundary check for scope_id is done
-                * somewhere...
-                */
-               if (dstsock->sin6_scope_id < 0 ||
-                   if_index < dstsock->sin6_scope_id) {
-                       *errorp = ENXIO; /* XXX: better error? */
-                       return(0);
-               }
-               ia6 = in6_ifawithscope(ifindex2ifnet[dstsock->sin6_scope_id],
-                                      dst);
-               if (ia6 == 0) {
-                       *errorp = EADDRNOTAVAIL;
-                       return(0);
-               }
-               return(&satosin6(&ia6->ia_addr)->sin6_addr);
-       }
-
-       /*
-        * If the destination address is a multicast address and
-        * the outgoing interface for the address is specified
-        * by the caller, use an address associated with the interface.
-        * There is a sanity check here; if the destination has node-local
-        * scope, the outgoing interfacde should be a loopback address.
-        * Even if the outgoing interface is not specified, we also
-        * choose a loopback interface as the outgoing interface.
-        */
-       if (IN6_IS_ADDR_MULTICAST(dst)) {
-               struct ifnet *ifp = mopts ? mopts->im6o_multicast_ifp : NULL;
-#ifdef __bsdi__
-               extern struct ifnet loif;
-#endif
-
-               if (ifp == NULL && IN6_IS_ADDR_MC_NODELOCAL(dst)) {
-#ifdef __bsdi__
-                       ifp = &loif;
-#else
-                       ifp = &loif[0];
-#endif
-               }
-
-               if (ifp) {
-                       ia6 = in6_ifawithscope(ifp, dst);
-                       if (ia6 == 0) {
-                               *errorp = EADDRNOTAVAIL;
-                               return(0);
-                       }
-                       return(&ia6->ia_addr.sin6_addr);
-               }
+       if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->mtx)) {
+               /*lock inversion issue, mostly with udp multicast packets */
+               socket_unlock(inp->inp_socket, 0);
+               lck_rw_lock_exclusive(inp->inp_pcbinfo->mtx);
+               socket_lock(inp->inp_socket, 0);
        }
+       bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
+       inp->inp_fport = 0;
+       /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
+       inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
+       in_pcbrehash(inp);
+       lck_rw_done(inp->inp_pcbinfo->mtx);
+       if (inp->inp_socket->so_state & SS_NOFDREF)
+               in6_pcbdetach(inp);
+}
 
-       /*
-        * If the next hop address for the packet is specified
-        * by caller, use an address associated with the route
-        * to the next hop.
-        */
-       {
-               struct sockaddr_in6 *sin6_next;
-               struct rtentry *rt;
+void
+in6_pcbdetach(
+       struct inpcb *inp)
+{
+       struct socket *so = inp->inp_socket;
+       struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
-               if (opts && opts->ip6po_nexthop) {
-                       sin6_next = satosin6(opts->ip6po_nexthop);
-                       rt = nd6_lookup(&sin6_next->sin6_addr, 1, NULL);
-                       if (rt) {
-                               ia6 = in6_ifawithscope(rt->rt_ifp, dst);
-                               if (ia6 == 0)
-                                       ia6 = ifatoia6(rt->rt_ifa);
-                       }
-                       if (ia6 == 0) {
-                               *errorp = EADDRNOTAVAIL;
-                               return(0);
-                       }
-                       return(&satosin6(&ia6->ia_addr)->sin6_addr);
-               }
+#if IPSEC
+       if (inp->in6p_sp != NULL) {
+               ipsec6_delete_pcbpolicy(inp);
        }
+#endif /* IPSEC */
 
-       /*
-        * If route is known or can be allocated now,
-        * our src addr is taken from the i/f, else punt.
-        */
-       if (ro) {
-               if (ro->ro_rt &&
-                   !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst)) {
-                       RTFREE(ro->ro_rt);
-                       ro->ro_rt = (struct rtentry *)0;
-               }
-               if (ro->ro_rt == (struct rtentry *)0 ||
-                   ro->ro_rt->rt_ifp == (struct ifnet *)0) {
-                       /* No route yet, so try to acquire one */
-                       bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
-                       ro->ro_dst.sin6_family = AF_INET6;
-                       ro->ro_dst.sin6_len = sizeof(struct sockaddr_in6);
-                       ro->ro_dst.sin6_addr = *dst;
-                       if (IN6_IS_ADDR_MULTICAST(dst)) {
-#if defined(__FreeBSD__) || defined (__APPLE__)
-                               ro->ro_rt = rtalloc1(&((struct route *)ro)
-                                                    ->ro_dst, 0, 0UL);
-#endif /*__FreeBSD__*/
-#if defined(__bsdi__) || defined(__NetBSD__)
-                               ro->ro_rt = rtalloc1(&((struct route *)ro)
-                                                    ->ro_dst, 0);
-#endif /*__bsdi__*/
-                       } else {
-                               rtalloc((struct route *)ro);
-                       }
-               }
-
-               /*
-                * in_pcbconnect() checks out IFF_LOOPBACK to skip using
-                * the address. But we don't know why it does so.
-                * It is necessary to ensure the scope even for lo0
-                * so doesn't check out IFF_LOOPBACK.
-                */
+       if (in_pcb_checkstate(inp, WNT_STOPUSING, 1) != WNT_STOPUSING)
+               printf("in6_pcbdetach so=%p can't be marked dead ok\n", so);
 
-               if (ro->ro_rt) {
-                       ia6 = in6_ifawithscope(ro->ro_rt->rt_ifa->ifa_ifp, dst);
-                       if (ia6 == 0) /* xxx scope error ?*/
-                               ia6 = ifatoia6(ro->ro_rt->rt_ifa);
-               }
-#if 0
-               /*
-                * xxx The followings are necessary? (kazu)
-                * I don't think so.
-                * It's for SO_DONTROUTE option in IPv4.(jinmei)
-                */
-               if (ia6 == 0) {
-                       struct sockaddr_in6 sin6 = {sizeof(sin6), AF_INET6, 0};
+       inp->inp_state = INPCB_STATE_DEAD;
 
-                       sin6->sin6_addr = *dst;
+       if ((so->so_flags & SOF_PCBCLEARING) == 0) {
+               struct ip_moptions *imo;
+               struct ip6_moptions *im6o;
 
-                       ia6 = ifatoia6(ifa_ifwithdstaddr(sin6tosa(&sin6)));
-                       if (ia6 == 0)
-                               ia6 = ifatoia6(ifa_ifwithnet(sin6tosa(&sin6)));
-                       if (ia6 == 0) {
-                               *errorp = EHOSTUNREACH; /* no route */
-                               return(0);
-                       }
-                       return(&satosin6(&ia6->ia_addr)->sin6_addr);
-               }
-#endif /* 0 */
-               if (ia6 == 0) {
-                       *errorp = EHOSTUNREACH; /* no route */
-                       return(0);
+               inp->inp_vflag = 0;
+               so->so_flags |= SOF_PCBCLEARING;
+               inp->inp_gencnt = ++ipi->ipi_gencnt;
+               if (inp->in6p_options)
+                       m_freem(inp->in6p_options);
+               ip6_freepcbopts(inp->in6p_outputopts);
+               if (inp->in6p_route.ro_rt) {
+                       rtfree(inp->in6p_route.ro_rt);
+                       inp->in6p_route.ro_rt = NULL;
                }
-               return(&satosin6(&ia6->ia_addr)->sin6_addr);
+               /* Check and free IPv4 related resources in case of mapped addr */
+               if (inp->inp_options)
+                       (void)m_free(inp->inp_options);
+
+               im6o = inp->in6p_moptions;
+               inp->in6p_moptions = NULL;
+               if (im6o != NULL)
+                       IM6O_REMREF(im6o);
+
+               imo = inp->inp_moptions;
+               inp->inp_moptions = NULL;
+               if (imo != NULL)
+                       IMO_REMREF(imo);
        }
-
-       *errorp = EADDRNOTAVAIL;
-       return(0);
 }
 
-/*
- * Default hop limit selection. The precedence is as follows:
- * 1. Hoplimit valued specified via ioctl.
- * 2. (If the outgoing interface is detected) the current
- *     hop limit of the interface specified by router advertisement.
- * 3. The system default hoplimit.
-*/
-int
-in6_selecthlim(in6p, ifp)
-       struct in6pcb *in6p;
-       struct ifnet *ifp;
+struct sockaddr *
+in6_sockaddr(
+       in_port_t port,
+       struct in6_addr *addr_p)
 {
-       if (in6p && in6p->in6p_hops >= 0)
-               return(in6p->in6p_hops);
-       else if (ifp)
-               return(nd_ifinfo[ifp->if_index].chlim);
+       struct sockaddr_in6 *sin6;
+
+       MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK);
+       if (sin6 == NULL)
+               return NULL;
+       bzero(sin6, sizeof *sin6);
+       sin6->sin6_family = AF_INET6;
+       sin6->sin6_len = sizeof(*sin6);
+       sin6->sin6_port = port;
+       sin6->sin6_addr = *addr_p;
+       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
+               sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
        else
-               return(ip6_defhlim);
-}
-#endif
+               sin6->sin6_scope_id = 0;        /*XXX*/
+       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
+               sin6->sin6_addr.s6_addr16[1] = 0;
 
-void
-in6_pcbdisconnect(inp)
-       struct inpcb *inp;
-{
-       bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
-       inp->inp_fport = 0;
-       in_pcbrehash(inp);
-       if (inp->inp_socket->so_state & SS_NOFDREF)
-               in6_pcbdetach(inp);
+       return (struct sockaddr *)sin6;
 }
 
-void
-in6_pcbdetach(inp)
-       struct inpcb *inp;
+struct sockaddr *
+in6_v4mapsin6_sockaddr(
+       in_port_t port,
+       struct in_addr *addr_p)
 {
-       struct socket *so = inp->inp_socket;
-       struct inpcbinfo *ipi = inp->inp_pcbinfo;
-
-#if IPSEC
-       ipsec6_delete_pcbpolicy(inp);
-#endif /* IPSEC */
-       inp->inp_gencnt = ++ipi->ipi_gencnt;
-       in_pcbremlists(inp);
-       sotoinpcb(so) = 0;
-       sofree(so);
-
-       if (inp->in6p_inputopts) /* Free all received options. */
-               m_freem(inp->in6p_inputopts->head); /* this is safe */
-       ip6_freepcbopts(inp->in6p_outputopts);
-       ip6_freemoptions(inp->in6p_moptions);
-
-       if (inp->in6p_route.ro_rt)
-               rtfree(inp->in6p_route.ro_rt);
-       inp->inp_vflag = 0;
-       zfree(ipi->ipi_zone, inp);
+       struct sockaddr_in sin;
+       struct sockaddr_in6 *sin6_p;
+
+       bzero(&sin, sizeof sin);
+       sin.sin_family = AF_INET;
+       sin.sin_len = sizeof(sin);
+       sin.sin_port = port;
+       sin.sin_addr = *addr_p;
+
+       MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME,
+               M_WAITOK);
+       if (sin6_p == NULL)
+               return NULL;
+       in6_sin_2_v4mapsin6(&sin, sin6_p);
+
+       return (struct sockaddr *)sin6_p;
 }
 
 /*
@@ -747,78 +639,46 @@ in6_pcbdetach(inp)
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
-in6_setsockaddr(so, nam)
-       struct socket *so;
-       struct sockaddr **nam;
+in6_setsockaddr(
+       struct socket *so,
+       struct sockaddr **nam)
 {
-       int s;
-       register struct inpcb *inp;
-       register struct sockaddr_in6 *sin6;
-
-       /*
-        * Do the malloc first in case it blocks.
-        */
-       MALLOC(sin6, struct sockaddr_in6 *, sizeof *sin6, M_SONAME, M_WAITOK);
-       bzero(sin6, sizeof *sin6);
-       sin6->sin6_family = AF_INET6;
-       sin6->sin6_len = sizeof(*sin6);
+       struct inpcb *inp;
+       struct in6_addr addr;
+       in_port_t port;
 
-       s = splnet();
        inp = sotoinpcb(so);
        if (!inp) {
-               splx(s);
-               _FREE(sin6, M_SONAME);
                return EINVAL;
        }
-       sin6->sin6_port = inp->inp_lport;
-       sin6->sin6_addr = inp->in6p_laddr;
-       splx(s);
-       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-               sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
-       else
-               sin6->sin6_scope_id = 0;        /*XXX*/
-       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-               sin6->sin6_addr.s6_addr16[1] = 0;
+       port = inp->inp_lport;
+       addr = inp->in6p_laddr;
 
-       *nam = (struct sockaddr *)sin6;
+       *nam = in6_sockaddr(port, &addr);
+       if (*nam == NULL)
+               return ENOBUFS;
        return 0;
 }
 
 int
-in6_setpeeraddr(so, nam)
-       struct socket *so;
-       struct sockaddr **nam;
+in6_setpeeraddr(
+       struct socket *so,
+       struct sockaddr **nam)
 {
-       int s;
        struct inpcb *inp;
-       register struct sockaddr_in6 *sin6;
-
-       /*
-        * Do the malloc first in case it blocks.
-        */
-       MALLOC(sin6, struct sockaddr_in6 *, sizeof(*sin6), M_SONAME, M_WAITOK);
-       bzero((caddr_t)sin6, sizeof (*sin6));
-       sin6->sin6_family = AF_INET6;
-       sin6->sin6_len = sizeof(struct sockaddr_in6);
+       struct in6_addr addr;
+       in_port_t port;
 
-       s = splnet();
        inp = sotoinpcb(so);
        if (!inp) {
-               splx(s);
-               _FREE(sin6, M_SONAME);
                return EINVAL;
        }
-       sin6->sin6_port = inp->inp_fport;
-       sin6->sin6_addr = inp->in6p_faddr;
-       splx(s);
-       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-               sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
-       else
-               sin6->sin6_scope_id = 0;        /*XXX*/
-       if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr))
-               sin6->sin6_addr.s6_addr16[1] = 0;
+       port = inp->inp_fport;
+       addr = inp->in6p_faddr;
 
-       *nam = (struct sockaddr *)sin6;
+       *nam = in6_sockaddr(port, &addr);
+       if (*nam == NULL)
+               return ENOBUFS;
        return 0;
 }
 
@@ -833,10 +693,11 @@ in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
        if (inp->inp_vflag & INP_IPV4) {
                error = in_setsockaddr(so, nam);
                if (error == 0)
-                       in6_sin_2_v4mapsin6_in_sock(nam);
-       } else
-       error = in6_setsockaddr(so, nam);
-
+                       error = in6_sin_2_v4mapsin6_in_sock(nam);
+       } else {
+               /* scope issues will be handled in in6_setsockaddr(). */
+               error = in6_setsockaddr(so, nam);
+       }
        return error;
 }
 
@@ -851,10 +712,11 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
        if (inp->inp_vflag & INP_IPV4) {
                error = in_setpeeraddr(so, nam);
                if (error == 0)
-                       in6_sin_2_v4mapsin6_in_sock(nam);
-       } else
+                       error = in6_sin_2_v4mapsin6_in_sock(nam);
+       } else {
+               /* scope issues will be handled in in6_setpeeraddr(). */
                error = in6_setpeeraddr(so, nam);
-
+       }
        return error;
 }
 
@@ -866,32 +728,37 @@ in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
- *
- * Must be called at splnet.
  */
 void
-in6_pcbnotify(head, dst, fport_arg, laddr6, lport_arg, cmd, notify)
-       struct inpcbhead *head;
+in6_pcbnotify(pcbinfo, dst, fport_arg, src, lport_arg, cmd, cmdarg, notify)
+       struct inpcbinfo *pcbinfo;
        struct sockaddr *dst;
+       const struct sockaddr *src;
        u_int fport_arg, lport_arg;
-       struct in6_addr *laddr6;
        int cmd;
-       void (*notify) __P((struct inpcb *, int));
+       void *cmdarg;
+       void (*notify)(struct inpcb *, int);
 {
        struct inpcb *inp, *ninp;
-       struct in6_addr faddr6;
+       struct sockaddr_in6 sa6_src, *sa6_dst;
        u_short fport = fport_arg, lport = lport_arg;
-       int errno, s;
-       void (*notify2) __P((struct inpcb *, int));
-
-       notify2 = NULL;
+       u_int32_t flowinfo;
+       int errno;
+       struct inpcbhead *head = pcbinfo->listhead;
 
        if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET6)
                return;
-       faddr6 = ((struct sockaddr_in6 *)dst)->sin6_addr;
-       if (IN6_IS_ADDR_UNSPECIFIED(&faddr6))
+
+       sa6_dst = (struct sockaddr_in6 *)dst;
+       if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
                return;
 
+       /*
+        * note that src can be NULL when we get notify by local fragmentation.
+        */
+       sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
+       flowinfo = sa6_src.sin6_flowinfo;
+
        /*
         * Redirects go to all references to the destination,
         * and use in6_rtchange to invalidate the route cache.
@@ -903,68 +770,83 @@ in6_pcbnotify(head, dst, fport_arg, laddr6, lport_arg, cmd, notify)
        if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
                fport = 0;
                lport = 0;
-               bzero((caddr_t)laddr6, sizeof(*laddr6));
-
-               /*
-                * Keep the old notify function to store a soft error
-                * in each PCB.
-                */
-               if (cmd == PRC_HOSTDEAD && notify != in6_rtchange)
-                       notify2 = notify;
+               bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
 
-               notify = in6_rtchange;
+               if (cmd != PRC_HOSTDEAD)
+                       notify = in6_rtchange;
        }
        errno = inet6ctlerrmap[cmd];
-       s = splnet();
-       for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
-               ninp = LIST_NEXT(inp, inp_list);
+       lck_rw_lock_shared(pcbinfo->mtx);
+       for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
+               ninp = LIST_NEXT(inp, inp_list);
 
-               if ((inp->inp_vflag & INP_IPV6) == 0)
+               if ((inp->inp_vflag & INP_IPV6) == 0)
                        continue;
 
-               if (notify == in6_rtchange) {
-                       /*
-                        * Since a non-connected PCB might have a cached route,
-                        * we always call in6_rtchange without matching
-                        * the PCB to the src/dst pair.
-                        *
-                        * XXX: we assume in6_rtchange does not free the PCB.
-                        */
-                       if (IN6_ARE_ADDR_EQUAL(&inp->in6p_route.ro_dst.sin6_addr,
-                                              &faddr6))
-                               in6_rtchange(inp, errno);
-
-                       if (notify2 == NULL)
-                               continue;
-
-                       notify = notify2;
+               /*
+                * If the error designates a new path MTU for a destination
+                * and the application (associated with this socket) wanted to
+                * know the value, notify. Note that we notify for all
+                * disconnected sockets if the corresponding application
+                * wanted. This is because some UDP applications keep sending
+                * sockets disconnected.
+                * XXX: should we avoid to notify the value to TCP sockets?
+                */
+               if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 &&
+                   (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+                    IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) {
+                       ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
+                                       (u_int32_t *)cmdarg);
                }
 
-               if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &faddr6) ||
-                  inp->inp_socket == 0 ||
-                  (lport && inp->inp_lport != lport) ||
-                  (!IN6_IS_ADDR_UNSPECIFIED(laddr6) &&
-                   !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr6)) ||
-                  (fport && inp->inp_fport != fport))
+               /*
+                * Detect if we should notify the error. If no source and
+                * destination ports are specifed, but non-zero flowinfo and
+                * local address match, notify the error. This is the case
+                * when the error is delivered with an encrypted buffer
+                * by ESP. Otherwise, just compare addresses and ports
+                * as usual.
+                */
+               if (lport == 0 && fport == 0 && flowinfo &&
+                   inp->inp_socket != NULL &&
+                   flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) &&
+                   IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
+                       goto do_notify;
+               else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
+                                            &sa6_dst->sin6_addr) ||
+                        inp->inp_socket == 0 ||
+                        (lport && inp->inp_lport != lport) ||
+                        (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
+                         !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
+                                             &sa6_src.sin6_addr)) ||
+                        (fport && inp->inp_fport != fport)) 
                        continue;
+                    
 
-               if (notify)
+         do_notify:
+               if (notify) {
+                       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING)
+                               continue;
+                       socket_lock(inp->inp_socket, 1);
                        (*notify)(inp, errno);
+                       (void)in_pcb_checkstate(inp, WNT_RELEASE, 1);
+                       socket_unlock(inp->inp_socket, 1);
+               }
        }
-       splx(s);
+       lck_rw_done(pcbinfo->mtx);
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
-in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
-       struct inpcbinfo *pcbinfo;
-       struct in6_addr *laddr;
-       u_int lport_arg;
-       int wild_okay;
+in6_pcblookup_local(
+       struct inpcbinfo *pcbinfo,
+       struct in6_addr *laddr,
+       u_int lport_arg,
+       int wild_okay)
 {
-       register struct inpcb *inp;
+       struct inpcb *inp;
        int matchwild = 3, wildcard;
        u_short lport = lport_arg;
 
@@ -1050,30 +932,39 @@ in6_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
  * (by a redirect), time to try a default gateway again.
  */
 void
-in6_losing(in6p)
-       struct inpcb *in6p;
+in6_losing(
+       struct inpcb *in6p)
 {
        struct rtentry *rt;
        struct rt_addrinfo info;
 
        if ((rt = in6p->in6p_route.ro_rt) != NULL) {
-               in6p->in6p_route.ro_rt = 0;
+               in6p->in6p_route.ro_rt = NULL;
+               RT_LOCK(rt);
                bzero((caddr_t)&info, sizeof(info));
                info.rti_info[RTAX_DST] =
                        (struct sockaddr *)&in6p->in6p_route.ro_dst;
                info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
                info.rti_info[RTAX_NETMASK] = rt_mask(rt);
                rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
-               if (rt->rt_flags & RTF_DYNAMIC)
-                       (void)rtrequest(RTM_DELETE, rt_key(rt),
-                                       rt->rt_gateway, rt_mask(rt), rt->rt_flags,
-                                       (struct rtentry **)0);
-               else
+               if (rt->rt_flags & RTF_DYNAMIC) {
+                       /*
+                        * Prevent another thread from modifying rt_key,
+                        * rt_gateway via rt_setgate() after the rt_lock
+                        * is dropped by marking the route as defunct.
+                        */
+                       rt->rt_flags |= RTF_CONDEMNED;
+                       RT_UNLOCK(rt);
+                       (void) rtrequest(RTM_DELETE, rt_key(rt),
+                           rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
+               } else {
+                       RT_UNLOCK(rt);
+               }
                /*
                 * A new route can be allocated
                 * the next time output is attempted.
                 */
-                       rtfree(rt);
+               rtfree(rt);
        }
 }
 
@@ -1082,9 +973,9 @@ in6_losing(in6p)
  * and allocate a (hopefully) better one.
  */
 void
-in6_rtchange(inp, errno)
-       struct inpcb *inp;
-       int errno;
+in6_rtchange(
+       struct inpcb *inp,
+       __unused int errno)
 {
        if (inp->in6p_route.ro_rt) {
                rtfree(inp->in6p_route.ro_rt);
@@ -1096,20 +987,129 @@ in6_rtchange(inp, errno)
        }
 }
 
+/*
+ * Check if PCB exists hash list. Also returns uid and gid of socket
+ */
+int
+in6_pcblookup_hash_exists(
+       struct inpcbinfo *pcbinfo,
+       struct in6_addr *faddr,
+       u_int fport_arg,
+       struct in6_addr *laddr,
+       u_int lport_arg,
+       int wildcard,
+       uid_t *uid,
+       gid_t *gid,
+       __unused struct ifnet *ifp)
+{
+       struct inpcbhead *head;
+       struct inpcb *inp;
+       u_short fport = fport_arg, lport = lport_arg;
+       int faith;
+       int found;
+
+#if defined(NFAITH) && NFAITH > 0
+       faith = faithprefix(laddr);
+#else
+       faith = 0;
+#endif
+
+       *uid = UID_MAX;
+       *gid = GID_MAX;
+
+       lck_rw_lock_shared(pcbinfo->mtx);
+
+       /*
+        * First look for an exact match.
+        */
+       head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
+                                             lport, fport,
+                                             pcbinfo->hashmask)];
+       LIST_FOREACH(inp, head, inp_hash) {
+               if ((inp->inp_vflag & INP_IPV6) == 0)
+                       continue;
+               if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+                   IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+                   inp->inp_fport == fport &&
+                   inp->inp_lport == lport) {
+                       if ((found = (inp->inp_socket != NULL))) {
+                               /*
+                                * Found. Check if pcb is still valid
+                                */
+                               *uid = inp->inp_socket->so_uid;
+                               *gid = inp->inp_socket->so_gid;
+                       }
+                       lck_rw_done(pcbinfo->mtx);
+                       return (found);
+               }
+       }
+       if (wildcard) {
+               struct inpcb *local_wild = NULL;
+
+               head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
+                                                     pcbinfo->hashmask)];
+               LIST_FOREACH(inp, head, inp_hash) {
+                       if ((inp->inp_vflag & INP_IPV6) == 0)
+                               continue;
+                       if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
+                           inp->inp_lport == lport) {
+                               if (faith && (inp->inp_flags & INP_FAITH) == 0)
+                                       continue;
+                               if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
+                                                      laddr)) {
+                                       if ((found = (inp->inp_socket != NULL))) {
+                                               *uid = inp->inp_socket->so_uid;
+                                               *gid = inp->inp_socket->so_gid;
+                                       }
+                                       lck_rw_done(pcbinfo->mtx);
+                                       return (found);
+                               }
+                               else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
+                                       local_wild = inp;
+                       }
+               }
+               if (local_wild) {
+                       if ((found = (local_wild->inp_socket != NULL))) {
+                               *uid = local_wild->inp_socket->so_uid;
+                               *gid = local_wild->inp_socket->so_gid;
+                       }
+                       lck_rw_done(pcbinfo->mtx);
+                       return (found);
+               }
+       }
+
+       /*
+        * Not found.
+        */
+       lck_rw_done(pcbinfo->mtx);
+       return (0);
+}
+
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
-in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
-       struct inpcbinfo *pcbinfo;
-       struct in6_addr *faddr, *laddr;
-       u_int fport_arg, lport_arg;
-       int wildcard;
-       struct ifnet *ifp;
+in6_pcblookup_hash(
+       struct inpcbinfo *pcbinfo,
+       struct in6_addr *faddr,
+       u_int fport_arg,
+       struct in6_addr *laddr,
+       u_int lport_arg,
+       int wildcard,
+       __unused struct ifnet *ifp)
 {
        struct inpcbhead *head;
-       register struct inpcb *inp;
+       struct inpcb *inp;
        u_short fport = fport_arg, lport = lport_arg;
+       int faith;
+
+#if defined(NFAITH) && NFAITH > 0
+       faith = faithprefix(laddr);
+#else
+       faith = 0;
+#endif
+
+       lck_rw_lock_shared(pcbinfo->mtx);
 
        /*
         * First look for an exact match.
@@ -1117,7 +1117,7 @@ in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
        head = &pcbinfo->hashbase[INP_PCBHASH(faddr->s6_addr32[3] /* XXX */,
                                              lport, fport,
                                              pcbinfo->hashmask)];
-       for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
+       LIST_FOREACH(inp, head, inp_hash) {
                if ((inp->inp_vflag & INP_IPV6) == 0)
                        continue;
                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
@@ -1125,9 +1125,16 @@ in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
                    inp->inp_fport == fport &&
                    inp->inp_lport == lport) {
                        /*
-                        * Found.
-                        */
-                       return (inp);
+                       * Found. Check if pcb is still valid
+                       */
+                       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
+                               lck_rw_done(pcbinfo->mtx);
+                               return (inp);
+                       }
+                       else {  /* it's there but dead, say it isn't found */
+                               lck_rw_done(pcbinfo->mtx);      
+                               return(NULL);
+                       }
                }
        }
        if (wildcard) {
@@ -1135,30 +1142,42 @@ in6_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp)
 
                head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0,
                                                      pcbinfo->hashmask)];
-               for (inp = head->lh_first; inp != NULL;
-                    inp = inp->inp_hash.le_next) {
+               LIST_FOREACH(inp, head, inp_hash) {
                        if ((inp->inp_vflag & INP_IPV6) == 0)
                                continue;
                        if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
                            inp->inp_lport == lport) {
-#if defined(NFAITH) && NFAITH > 0
-                               if (ifp && ifp->if_type == IFT_FAITH &&
-                                   (inp->inp_flags & INP_FAITH) == 0)
+                               if (faith && (inp->inp_flags & INP_FAITH) == 0)
                                        continue;
-#endif
                                if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
-                                                      laddr))
-                                       return (inp);
+                                                      laddr)) {
+                                       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
+                                               lck_rw_done(pcbinfo->mtx);
+                                               return (inp);
+                                       }
+                                       else {  /* it's there but dead, say it isn't found */
+                                               lck_rw_done(pcbinfo->mtx);      
+                                               return(NULL);
+                                       }
+                               }
                                else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
                                        local_wild = inp;
                        }
                }
-               return (local_wild);
+               if (local_wild && in_pcb_checkstate(local_wild, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
+                       lck_rw_done(pcbinfo->mtx);
+                       return (local_wild);
+               }
+               else {
+                       lck_rw_done(pcbinfo->mtx);
+                       return (NULL);
+               }
        }
 
        /*
         * Not found.
         */
+       lck_rw_done(pcbinfo->mtx);
        return (NULL);
 }