]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/uipc_socket.c
xnu-2050.48.11.tar.gz
[apple/xnu.git] / bsd / kern / uipc_socket.c
index 57dff6de9dd12be195a4de850448027ab0b83818..af4b4fbe13f304cc1c7b78ec439a4f3a76e5ae4c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 1998-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <sys/ev.h>
 #include <sys/kdebug.h>
 #include <sys/un.h>
+#include <sys/user.h>
+#include <sys/priv.h>
 #include <net/route.h>
+#include <net/ntstat.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
 #include <kern/zalloc.h>
 #include <kern/locks.h>
 #include <machine/limits.h>
 #include <libkern/OSAtomic.h>
 #include <pexpert/pexpert.h>
+#include <kern/assert.h>
+#include <kern/task.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/mcache.h>
 
 #if CONFIG_MACF
 #include <security/mac.h>
 #include <security/mac_framework.h>
 #endif /* MAC */
 
-/* how a timeval looks to a 64-bit process */
-struct timeval64 {
-       int64_t         tv_sec;
-       int32_t         tv_usec;
-};
 
 int                    so_cache_hw = 0;
 int                    so_cache_timeouts = 0;
 int                    so_cache_max_freed = 0;
 int                    cached_sock_count = 0;
+__private_extern__ int max_cached_sock_count = MAX_CACHED_SOCKETS;
 struct socket          *socket_cache_head = 0;
 struct socket          *socket_cache_tail = 0;
-u_long                 so_cache_time = 0;
+u_int32_t                      so_cache_time = 0;
 int                    so_cache_init_done = 0;
 struct zone            *so_cache_zone;
 
@@ -133,7 +138,8 @@ static void filt_sordetach(struct knote *kn);
 static int     filt_soread(struct knote *kn, long hint);
 static void    filt_sowdetach(struct knote *kn);
 static int     filt_sowrite(struct knote *kn, long hint);
-static int     filt_solisten(struct knote *kn, long hint);
+static void    filt_sockdetach(struct knote *kn);
+static int     filt_sockev(struct knote *kn, long hint);
 
 static int
 sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p);
@@ -141,12 +147,21 @@ sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p);
 static int
 sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p);
 
-static struct filterops solisten_filtops =
-       { 1, NULL, filt_sordetach, filt_solisten };
-static struct filterops soread_filtops =
-       { 1, NULL, filt_sordetach, filt_soread };
-static struct filterops sowrite_filtops =
-       { 1, NULL, filt_sowdetach, filt_sowrite };
+static struct filterops soread_filtops = {
+        .f_isfd = 1,
+        .f_detach = filt_sordetach,
+        .f_event = filt_soread,
+};
+static struct filterops sowrite_filtops = {
+        .f_isfd = 1,
+        .f_detach = filt_sowdetach,
+        .f_event = filt_sowrite,
+};
+static struct filterops sock_filtops = {
+       .f_isfd = 1,
+       .f_detach = filt_sockdetach,
+       .f_event = filt_sockev,
+};
 
 #define        EVEN_MORE_LOCKING_DEBUG 0
 int socket_debug = 0;
@@ -170,15 +185,15 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 SYSCTL_DECL(_kern_ipc);
 
 int somaxconn = SOMAXCONN;
-SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, "");
+SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW | CTLFLAG_LOCKED, &somaxconn, 0, "");
 
 /* Should we get a maximum also ??? */
 static int sosendmaxchain = 65536;
 static int sosendminchain = 16384;
 static int sorecvmincopy  = 16384;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendminchain,
     0, "");
-SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW | CTLFLAG_LOCKED, &sorecvmincopy,
     0, "");
 
 /*
@@ -186,7 +201,7 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy,
  * the socket is marked with SOF_MULTIPAGES; see below.
  */
 int sosendjcl = 1;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW, &sosendjcl, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW | CTLFLAG_LOCKED, &sosendjcl, 0, "");
 
 /*
  * Set this to ignore SOF_MULTIPAGES and use jumbo clusters for large
@@ -200,9 +215,17 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl, CTLFLAG_RW, &sosendjcl, 0, "");
  * capable.  Set this to 1 only for testing/debugging purposes.
  */
 int sosendjcl_ignore_capab = 0;
-SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW,
+SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW | CTLFLAG_LOCKED,
     &sosendjcl_ignore_capab, 0, "");
 
+int sodefunctlog = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sodefunctlog, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &sodefunctlog, 0, "");
+
+int sothrottlelog = 0;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sothrottlelog, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &sothrottlelog, 0, "");
+
 /*
  * Socket operation routines.
  * These routines are called by the routines in
@@ -214,6 +237,7 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, sosendjcl_ignore_capab, CTLFLAG_RW,
 /* sys_generic.c */
 extern void postevent(struct socket *, struct sockbuf *, int);
 extern void evsofree(struct socket *);
+extern int tcp_notsent_lowat_check(struct socket *so);
 
 /* TODO: these should be in header file */
 extern int get_inpcb_str_size(void);
@@ -233,7 +257,16 @@ static void cached_sock_free(struct socket *);
 static void so_cache_timer(void *);
 
 void soclose_wait_locked(struct socket *so);
+int so_isdstlocal(struct socket *so);
 
+/*
+ * SOTCDB_NO_DSCP is set by default, to prevent the networking stack from
+ * setting the DSCP code on the packet based on the service class; see
+ * <rdar://problem/11277343> for details.
+ */
+__private_extern__ u_int32_t sotcdb = SOTCDB_NO_DSCP;
+SYSCTL_INT(_kern_ipc, OID_AUTO, sotcdb, CTLFLAG_RW | CTLFLAG_LOCKED,
+    &sotcdb, 0, "");
 
 void
 socketinit(void)
@@ -272,6 +305,8 @@ socketinit(void)
            get_inpcb_str_size() + 4 + get_tcp_str_size());
 
        so_cache_zone = zinit(str_size, 120000*str_size, 8192, "socache zone");
+       zone_change(so_cache_zone, Z_CALLERACCT, FALSE);
+       zone_change(so_cache_zone, Z_NOENCRYPT, TRUE);
 #if TEMPDEBUG
        printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size);
 #endif
@@ -280,13 +315,19 @@ socketinit(void)
        so_cache_zone_element_size = str_size;
 
        sflt_init();
+
+       _CASSERT(_SO_TC_MAX == SO_TC_STATS_MAX);
+
+       socket_tclass_init();
+
+       socket_flowadv_init();
 }
 
 static void
 cached_sock_alloc(struct socket **so, int waitok)
 {
        caddr_t temp;
-       register u_long offset;
+       register uintptr_t offset;
 
        lck_mtx_lock(so_cache_mtx);
 
@@ -333,20 +374,18 @@ cached_sock_alloc(struct socket **so, int waitok)
                 * Define offsets for extra structures into our single block of
                 * memory. Align extra structures on longword boundaries.
                 */
-               offset = (u_long) *so;
+
+               offset = (uintptr_t) *so;
                offset += sizeof (struct socket);
-               if (offset & 0x3) {
-                       offset += 4;
-                       offset &= 0xfffffffc;
-               }
+
+               offset = ALIGN(offset);
+
                (*so)->so_saved_pcb = (caddr_t)offset;
                offset += get_inpcb_str_size();
-               if (offset & 0x3) {
-                       offset += 4;
-                       offset &= 0xfffffffc;
-               }
 
-               ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb =
+               offset = ALIGN(offset);
+
+               ((struct inpcb *)(void *)(*so)->so_saved_pcb)->inp_saved_ppcb =
                    (caddr_t)offset;
 #if TEMPDEBUG
                kprintf("Allocating cached socket - %p, pcb=%p tcpcb=%p\n",
@@ -364,7 +403,7 @@ cached_sock_free(struct socket *so)
 
        lck_mtx_lock(so_cache_mtx);
 
-       if (++cached_sock_count > MAX_CACHED_SOCKETS) {
+       if (++cached_sock_count > max_cached_sock_count) {
                --cached_sock_count;
                lck_mtx_unlock(so_cache_mtx);
 #if TEMPDEBUG
@@ -396,6 +435,24 @@ cached_sock_free(struct socket *so)
 #endif
 }
 
+static void
+so_update_last_owner_locked(
+       struct socket   *so,
+       proc_t                  self)
+{
+       if (so->last_pid != 0)
+       {
+               if (self == NULL)
+                       self = current_proc();
+               
+               if (self)
+               {
+                       so->last_upid = proc_uniqueid(self);
+                       so->last_pid = proc_pid(self);
+               }
+       }
+}
+
 static void
 so_cache_timer(__unused void *dummy)
 {
@@ -486,6 +543,7 @@ socreate(int dom, struct socket **aso, int type, int proto)
        register struct protosw *prp;
        register struct socket *so;
        register int error = 0;
+
 #if TCPDEBUG
        extern int tcpconsdebug;
 #endif
@@ -507,19 +565,20 @@ socreate(int dom, struct socket **aso, int type, int proto)
        }
        if (prp->pr_type != type)
                return (EPROTOTYPE);
-       so = soalloc(p != 0, dom, type);
+       so = soalloc(1, dom, type);
        if (so == 0)
                return (ENOBUFS);
 
        TAILQ_INIT(&so->so_incomp);
        TAILQ_INIT(&so->so_comp);
        so->so_type = type;
+       so->last_upid = proc_uniqueid(p);
+       so->last_pid = proc_pid(p);
+
+       so->so_cred = kauth_cred_proc_ref(p);
+       if (!suser(kauth_cred_get(), NULL))
+               so->so_state = SS_PRIV;
 
-       if (p != 0) {
-               so->so_uid = kauth_cred_getuid(kauth_cred_get());
-               if (!suser(kauth_cred_get(), NULL))
-                       so->so_state = SS_PRIV;
-       }
        so->so_proto = prp;
 #ifdef __APPLE__
        so->so_rcv.sb_flags |= SB_RECV; /* XXX */
@@ -562,6 +621,27 @@ socreate(int dom, struct socket **aso, int type, int proto)
                so->so_options |= SO_DEBUG;
 #endif
 #endif
+       so_set_default_traffic_class(so);
+       /*
+        * If this is a background thread/task, mark the socket as such.
+        */
+       if (proc_get_self_isbackground() != 0) {
+               socket_set_traffic_mgt_flags(so, TRAFFIC_MGT_SO_BACKGROUND);
+               so->so_background_thread = current_thread();
+       }
+
+       switch (dom) {
+       /*
+        * Don't mark Unix domain or system sockets as eligible for defunct by default.
+       */
+       case PF_LOCAL:
+       case PF_SYSTEM:
+               so->so_flags |= SOF_NODEFUNCT;
+               break;
+       default:
+               break;
+       }
+
        *aso = so;
        return (0);
 }
@@ -593,40 +673,25 @@ sobind(struct socket *so, struct sockaddr *nam)
 {
        struct proc *p = current_proc();
        int error = 0;
-       struct socket_filter_entry *filter;
-       int filtered = 0;
 
        socket_lock(so, 1);
+       VERIFY(so->so_usecount > 1);    
+       so_update_last_owner_locked(so, p);
 
        /*
-        * If this is a bind request on a previously-accepted socket
-        * that has been marked as inactive, reject it now before
-        * we go any further.
+        * If this is a bind request on a socket that has been marked
+        * as inactive, reject it now before we go any further.
         */
        if (so->so_flags & SOF_DEFUNCT) {
                error = EINVAL;
+               SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+                   __func__, proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+                   error));
                goto out;
        }
 
        /* Socket filter */
-       error = 0;
-       for (filter = so->so_filt; filter && (error == 0);
-           filter = filter->sfe_next_onsocket) {
-               if (filter->sfe_filter->sf_filter.sf_bind) {
-                       if (filtered == 0) {
-                               filtered = 1;
-                               sflt_use(so);
-                               socket_unlock(so, 0);
-                       }
-                       error = filter->sfe_filter->sf_filter.
-                           sf_bind(filter->sfe_cookie, so, nam);
-               }
-       }
-       if (filtered != 0) {
-               socket_lock(so, 0);
-               sflt_unuse(so);
-       }
-       /* End socket filter */
+       error = sflt_bind(so, nam);
 
        if (error == 0)
                error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
@@ -642,6 +707,11 @@ out:
 void
 sodealloc(struct socket *so)
 {
+       kauth_cred_unref(&so->so_cred);
+
+       /* Remove any filters */
+       sflt_termsock(so);
+
        so->so_gencnt = ++so_gencnt;
 
 #if CONFIG_MACF_SOCKET
@@ -681,10 +751,9 @@ solisten(struct socket *so, int backlog)
 {
        struct proc *p = current_proc();
        int error = 0;
-       struct socket_filter_entry *filter;
-       int filtered = 0;
 
        socket_lock(so, 1);
+       
        if (so->so_proto == NULL) {
                error = EINVAL;
                goto out;
@@ -696,13 +765,18 @@ solisten(struct socket *so, int backlog)
 
        /*
         * If the listen request is made on a socket that is not fully
-        * disconnected, or on a previously-accepted socket that has
-        * been marked as inactive, reject the request now.
+        * disconnected, or on a socket that has been marked as inactive,
+        * reject the request now.
         */
        if ((so->so_state &
            (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) ||
            (so->so_flags & SOF_DEFUNCT)) {
                error = EINVAL;
+               if (so->so_flags & SOF_DEFUNCT) {
+                       SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+                           __func__, proc_pid(p), so, INP_SOCKAF(so),
+                           INP_SOCKTYPE(so), error));
+               }
                goto out;
        }
 
@@ -711,23 +785,7 @@ solisten(struct socket *so, int backlog)
                goto out;
        }
 
-       error = 0;
-       for (filter = so->so_filt; filter && (error == 0);
-           filter = filter->sfe_next_onsocket) {
-               if (filter->sfe_filter->sf_filter.sf_listen) {
-                       if (filtered == 0) {
-                               filtered = 1;
-                               sflt_use(so);
-                               socket_unlock(so, 0);
-                       }
-                       error = filter->sfe_filter->sf_filter.
-                           sf_listen(filter->sfe_cookie, so);
-               }
-       }
-       if (filtered != 0) {
-               socket_lock(so, 0);
-               sflt_unuse(so);
-       }
+       error = sflt_listen(so);
 
        if (error == 0) {
                error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
@@ -771,9 +829,6 @@ sofreelastref(struct socket *so, int dealloc)
 
        /* Assume socket is locked */
 
-       /* Remove any filters - may be called more than once */
-       sflt_termsock(so);
-
        if ((!(so->so_flags & SOF_PCBCLEARING)) ||
            ((so->so_state & SS_NOFDREF) == 0)) {
 #ifdef __APPLE__
@@ -841,10 +896,10 @@ soclose_wait_locked(struct socket *so)
         * Double check here and return if there's no outstanding upcall;
         * otherwise proceed further only if SOF_UPCALLCLOSEWAIT is set.
         */
-       if (!(so->so_flags & SOF_UPCALLINUSE) ||
-           !(so->so_flags & SOF_UPCALLCLOSEWAIT))
+       if (!so->so_upcallusecount || !(so->so_flags & SOF_UPCALLCLOSEWAIT))
                return;
-
+       so->so_rcv.sb_flags &= ~SB_UPCALL;
+       so->so_snd.sb_flags &= ~SB_UPCALL;
        so->so_flags |= SOF_CLOSEWAIT;
        (void) msleep((caddr_t)&so->so_upcall, mutex_held, (PZERO - 1),
            "soclose_wait_locked", NULL);
@@ -980,6 +1035,15 @@ drop:
        if (so->so_usecount == 0)
                panic("soclose: usecount is zero so=%p\n", so);
        if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) {
+               /*
+                * Let NetworkStatistics know this PCB is going away
+                * before we detach it.
+                */
+               if (nstat_collect &&
+                   (so->so_proto->pr_domain->dom_family == AF_INET ||
+                   so->so_proto->pr_domain->dom_family == AF_INET6))
+                       nstat_pcb_detach(so->so_pcb);
+
                int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
                if (error == 0)
                        error = error2;
@@ -990,6 +1054,9 @@ discard:
        if (so->so_pcb && so->so_state & SS_NOFDREF)
                panic("soclose: NOFDREF");
        so->so_state |= SS_NOFDREF;
+       
+       if ((so->so_flags & SOF_KNOTE) != 0)
+               KNOTE(&so->so_klist, SO_FILT_HINT_LOCKED);
 #ifdef __APPLE__
        so->so_proto->pr_domain->dom_refs--;
        evsofree(so);
@@ -1005,7 +1072,7 @@ soclose(struct socket *so)
        int error = 0;
        socket_lock(so, 1);
 
-       if (so->so_flags & SOF_UPCALLINUSE)
+       if (so->so_upcallusecount)
                soclose_wait_locked(so);
 
        if (so->so_retaincnt == 0) {
@@ -1082,48 +1149,28 @@ int
 soacceptfilter(struct socket *so)
 {
        struct sockaddr *local = NULL, *remote = NULL;
-       struct socket_filter_entry *filter;
-       int error = 0, filtered = 0;
+       int error = 0;
        struct socket *head = so->so_head;
 
        /*
-        * There's no need to hold the lock; this socket
+        * Hold the lock even if this socket
         * has not been made visible to the filter(s).
+        * For sockets with global locks, this protect against the 
+        * head or peer going away
         */
-       if ((sock_getaddr(so, &remote, 1) != 0) ||
-           sock_getaddr(so, &local, 0) != 0) {
+       socket_lock(so, 1);
+       if (sogetaddr_locked(so, &remote, 1) != 0 ||
+           sogetaddr_locked(so, &local, 0) != 0) {
                so->so_state &= ~(SS_NOFDREF | SS_COMP);
                so->so_head = NULL;
+               socket_unlock(so, 1);
                soclose(so);
                /* Out of resources; try it again next time */
                error = ECONNABORTED;
                goto done;
        }
 
-       /*
-        * At this point, we have a reference on the listening socket
-        * so we know it won't be going away.  Do the same for the newly
-        * accepted socket while we invoke the accept callback routine.
-        */
-       socket_lock(so, 1);
-       for (filter = so->so_filt; filter != NULL && error == 0;
-           filter = filter->sfe_next_onsocket) {
-               if (filter->sfe_filter->sf_filter.sf_accept != NULL) {
-                       if (!filtered) {
-                               filtered = 1;
-                               sflt_use(so);
-                               socket_unlock(so, 0);
-                       }
-                       error = filter->sfe_filter->sf_filter.
-                           sf_accept(filter->sfe_cookie,
-                           head, so, local, remote);
-               }
-       }
-
-       if (filtered) {
-               socket_lock(so, 0);
-               sflt_unuse(so);
-       }
+       error = sflt_accept(head, so, local, remote);
 
        /*
         * If we get EJUSTRETURN from one of the filters, mark this socket
@@ -1132,10 +1179,8 @@ soacceptfilter(struct socket *so)
         */
        if (error == EJUSTRETURN) {
                error = 0;
-               so->so_flags |= SOF_DEFUNCT;
-               /* Prevent data from being appended to the socket buffers */
-               so->so_snd.sb_flags |= SB_DROP;
-               so->so_rcv.sb_flags |= SB_DROP;
+               (void) sosetdefunct(current_proc(), so,
+                   SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL, FALSE);
        }
 
        if (error != 0) {
@@ -1181,15 +1226,21 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
 
        if (dolock)
                socket_lock(so, 1);
-
+       
        /*
         * If this is a listening socket or if this is a previously-accepted
         * socket that has been marked as inactive, reject the connect request.
         */
        if ((so->so_options & SO_ACCEPTCONN) || (so->so_flags & SOF_DEFUNCT)) {
+               error = EOPNOTSUPP;
+               if (so->so_flags & SOF_DEFUNCT) {
+                       SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n",
+                           __func__, proc_pid(p), so, INP_SOCKAF(so),
+                           INP_SOCKTYPE(so), error));
+               }
                if (dolock)
                        socket_unlock(so, 1);
-               return (EOPNOTSUPP);
+               return (error);
        }
 
        if ((so->so_restrictions & SO_RESTRICT_DENYOUT) != 0) {
@@ -1213,36 +1264,14 @@ soconnectlock(struct socket *so, struct sockaddr *nam, int dolock)
                 * Run connect filter before calling protocol:
                 *  - non-blocking connect returns before completion;
                 */
-               struct socket_filter_entry *filter;
-               int filtered = 0;
-
-               error = 0;
-               for (filter = so->so_filt; filter && (error == 0);
-                   filter = filter->sfe_next_onsocket) {
-                       if (filter->sfe_filter->sf_filter.sf_connect_out) {
-                               if (filtered == 0) {
-                                       filtered = 1;
-                                       sflt_use(so);
-                                       socket_unlock(so, 0);
-                               }
-                               error = filter->sfe_filter->sf_filter.
-                                   sf_connect_out(filter->sfe_cookie, so, nam);
-                       }
-               }
-               if (filtered != 0) {
-                       socket_lock(so, 0);
-                       sflt_unuse(so);
-               }
+               error = sflt_connectout(so, nam);
 
                if (error) {
                        if (error == EJUSTRETURN)
                                error = 0;
-                       if (dolock)
-                               socket_unlock(so, 1);
-                       return (error);
+               } else {
+                       error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
                }
-
-               error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
        }
        if (dolock)
                socket_unlock(so, 1);
@@ -1330,11 +1359,11 @@ sodisconnect(struct socket *so)
  *     [so_error]:???
  */
 static int
-sosendcheck(struct socket *so, struct sockaddr *addr, long resid, long clen,
-    long atomic, int flags, int *sblocked)
+sosendcheck(struct socket *so, struct sockaddr *addr, int32_t resid, int32_t clen,
+    int32_t atomic, int flags, int *sblocked)
 {
-       int error = 0;
-       long space;
+       int     error = 0;
+       int32_t space;
        int     assumelock = 0;
 
 restart:
@@ -1352,6 +1381,8 @@ restart:
                } else {
                        error = sblock(&so->so_snd, SBLOCKWAIT(flags));
                        if (error) {
+                               if (so->so_flags & SOF_DEFUNCT)
+                                       goto defunct;
                                return (error);
                        }
                        *sblocked = 1;
@@ -1359,12 +1390,17 @@ restart:
        }
 
        /*
-        * If a send attempt is made on a previously-accepted socket
-        * that has been marked as inactive (disconnected), reject
-        * the request.
+        * If a send attempt is made on a socket that has been marked
+        * as inactive (disconnected), reject the request.
         */
-       if (so->so_flags & SOF_DEFUNCT)
-               return (ENOTCONN);
+       if (so->so_flags & SOF_DEFUNCT) {
+defunct:
+               error = EPIPE;
+               SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__,
+                   proc_selfpid(), so, INP_SOCKAF(so), INP_SOCKTYPE(so),
+                   error));
+               return (error);
+       }
 
        if (so->so_state & SS_CANTSENDMORE)
                return (EPIPE);
@@ -1391,15 +1427,19 @@ restart:
        if ((atomic && resid > so->so_snd.sb_hiwat) ||
            clen > so->so_snd.sb_hiwat)
                return (EMSGSIZE);
-       if (space < resid + clen &&
-           (atomic || space < (long)so->so_snd.sb_lowat || space < clen)) {
+       if ((space < resid + clen &&
+           (atomic || space < (int32_t)so->so_snd.sb_lowat || space < clen)) ||
+           (so->so_type == SOCK_STREAM && so_wait_for_if_feedback(so))) {
                if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) ||
                    assumelock) {
                        return (EWOULDBLOCK);
                }
                sbunlock(&so->so_snd, 1);
+               *sblocked = 0;
                error = sbwait(&so->so_snd);
                if (error) {
+                       if (so->so_flags & SOF_DEFUNCT)
+                               goto defunct;
                        return (error);
                }
                goto restart;
@@ -1474,7 +1514,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 {
        struct mbuf **mp;
        register struct mbuf *m, *freelist = NULL;
-       register long space, len, resid;
+       register int32_t space, len, resid;
        int clen = 0, error, dontroute, mlen, sendflags;
        int atomic = sosendallatonce(so) || top;
        int sblocked = 0;
@@ -1490,6 +1530,8 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
            so->so_snd.sb_cc, so->so_snd.sb_lowat, so->so_snd.sb_hiwat);
 
        socket_lock(so, 1);
+       so_update_last_owner_locked(so, p);
+       
        if (so->so_type != SOCK_STREAM && (flags & MSG_OOB) != 0) {
                error = EOPNOTSUPP;
                socket_unlock(so, 1);
@@ -1515,8 +1557,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
        dontroute =
            (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
            (so->so_proto->pr_flags & PR_ATOMIC);
-       if (p)
-               OSIncrementAtomic(&p->p_stats->p_ru.ru_msgsnd); 
+       OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
        if (control)
                clen = control->m_len;
 
@@ -1531,10 +1572,6 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                    1024 : 0);
 
                do {
-                       struct socket_filter_entry *filter;
-                       int filtered;
-                       boolean_t recursive;
-
                        if (uio == NULL) {
                                /*
                                 * Data is prepackaged in "top".
@@ -1547,7 +1584,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                int bytes_to_copy;
                                boolean_t jumbocl;
 
-                               bytes_to_copy = min(resid, space);
+                               bytes_to_copy = imin(resid, space);
 
                                if (sosendminchain > 0) {
                                        chainlength = 0;
@@ -1587,7 +1624,8 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                         * haven't yet consumed.
                                         */
                                        if (freelist == NULL &&
-                                           bytes_to_copy > NBPG && jumbocl) {
+                                           bytes_to_copy > MBIGCLBYTES &&
+                                           jumbocl) {
                                                num_needed =
                                                    bytes_to_copy / M16KCLBYTES;
 
@@ -1610,10 +1648,10 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                        if (freelist == NULL &&
                                            bytes_to_copy > MCLBYTES) {
                                                num_needed =
-                                                   bytes_to_copy / NBPG;
+                                                   bytes_to_copy / MBIGCLBYTES;
 
                                                if ((bytes_to_copy -
-                                                   (num_needed * NBPG)) >=
+                                                   (num_needed * MBIGCLBYTES)) >=
                                                    MINCLSIZE)
                                                        num_needed++;
 
@@ -1621,7 +1659,7 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                                    m_getpackets_internal(
                                                    (unsigned int *)&num_needed,
                                                    hdrs_needed, M_WAIT, 0,
-                                                   NBPG);
+                                                   MBIGCLBYTES);
                                                /*
                                                 * Fall back to cluster size
                                                 * if allocation failed
@@ -1684,16 +1722,15 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                                                    MHLEN - m_leadingspace(m);
                                        else
                                                mlen = MLEN;
-                                       len = min(mlen, bytes_to_copy);
+                                       len = imin(mlen, bytes_to_copy);
 
                                        chainlength += len;
 
                                        space -= len;
 
                                        error = uiomove(mtod(m, caddr_t),
-                                           (int)len, uio);
+                                           len, uio);
 
-                                       // LP64todo - fix this!
                                        resid = uio_resid(uio);
 
                                        m->m_len = len;
@@ -1760,65 +1797,24 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
                        /*
                         * Socket filter processing
                         */
-                       recursive = (so->so_send_filt_thread != NULL);
-                       filtered = 0;
-                       error = 0;
-                       for (filter = so->so_filt; filter && (error == 0);
-                           filter = filter->sfe_next_onsocket) {
-                               if (filter->sfe_filter->sf_filter.sf_data_out) {
-                                       int so_flags = 0;
-                                       if (filtered == 0) {
-                                               filtered = 1;
-                                               so->so_send_filt_thread =
-                                                   current_thread();
-                                               sflt_use(so);
-                                               socket_unlock(so, 0);
-                                               so_flags =
-                                                   (sendflags & MSG_OOB) ?
-                                                   sock_data_filt_flag_oob : 0;
-                                       }
-                                       error = filter->sfe_filter->sf_filter.
-                                           sf_data_out(filter->sfe_cookie, so,
-                                           addr, &top, &control, so_flags);
+                       error = sflt_data_out(so, addr, &top, &control,
+                                               (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0);
+                       if (error) {
+                               if (error == EJUSTRETURN) {
+                                       error = 0;
+                                       clen = 0;
+                                       control = 0;
+                                       top = 0;
                                }
-                       }
-
-                       if (filtered) {
-                               /*
-                                * At this point, we've run at least one
-                                * filter.  The socket is unlocked as is
-                                * the socket buffer.  Clear the recorded
-                                * filter thread only when we are outside
-                                * of a filter's context.  This allows for
-                                * a filter to issue multiple inject calls
-                                * from its sf_data_out callback routine.
-                                */
-                               socket_lock(so, 0);
-                               sflt_unuse(so);
-                               if (!recursive)
-                                       so->so_send_filt_thread = 0;
-                               if (error) {
-                                       if (error == EJUSTRETURN) {
-                                               error = 0;
-                                               clen = 0;
-                                               control = 0;
-                                               top = 0;
-                                       }
 
-                                       goto release;
-                               }
+                               goto release;
                        }
                        /*
                         * End Socket filter processing
                         */
 
-                       if (error == EJUSTRETURN) {
-                               /* A socket filter handled this data */
-                               error = 0;
-                       } else {
-                               error = (*so->so_proto->pr_usrreqs->pru_send)
-                                   (so, sendflags, top, addr, control, p);
-                       }
+                       error = (*so->so_proto->pr_usrreqs->pru_send)
+                               (so, sendflags, top, addr, control, p);
 #ifdef __APPLE__
                        if (flags & MSG_SEND)
                                so->so_temp = NULL;
@@ -1900,7 +1896,6 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
        struct protosw *pr = so->so_proto;
        struct mbuf *nextrecord;
        int moff, type = 0;
-               // LP64todo - fix this!
        int orig_resid = uio_resid(uio);
        struct mbuf *free_list;
        int delayed_copy_len;
@@ -1913,6 +1908,7 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
            so->so_rcv.sb_cc, so->so_rcv.sb_lowat, so->so_rcv.sb_hiwat);
 
        socket_lock(so, 1);
+       so_update_last_owner_locked(so, p);
 
 #ifdef MORE_LOCKING_DEBUG
        if (so->so_usecount == 1)
@@ -1936,14 +1932,18 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
        if (so->so_flags & SOF_DEFUNCT) {
                struct sockbuf *sb = &so->so_rcv;
 
+               error = ENOTCONN;
+               SODEFUNCTLOG(("%s[%d]: defunct so %p [%d,%d] (%d)\n", __func__,
+                   proc_pid(p), so, INP_SOCKAF(so), INP_SOCKTYPE(so), error));
                /*
                 * This socket should have been disconnected and flushed
-                * prior to being returned from accept; there should be
-                * no data on its receive list, so panic otherwise.
+                * prior to being returned from sodefunct(); there should
+                * be no data on its receive list, so panic otherwise.
                 */
-               sb_empty_assert(sb, __func__);
+               if (so->so_state & SS_DEFUNCT)
+                       sb_empty_assert(sb, __func__);
                socket_unlock(so, 1);
-               return (ENOTCONN);
+               return (error);
        }
 
        /*
@@ -1967,9 +1967,8 @@ soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
                        goto bad;
                socket_unlock(so, 0);
                do {
-               // LP64todo - fix this!
                        error = uiomove(mtod(m, caddr_t),
-                           (int)min(uio_resid(uio), m->m_len), uio);
+                           imin(uio_resid(uio), m->m_len), uio);
                        m = m_free(m);
                } while (uio_resid(uio) && error == 0 && m);
                socket_lock(so, 0);
@@ -2062,9 +2061,7 @@ restart:
                 * end up with false positives during select() or poll()
                 * which could put the application in a bad state.
                 */
-               if (m == NULL && so->so_rcv.sb_cc != 0)
-                       panic("soreceive corrupted so_rcv: m %p cc %lu",
-                           m, so->so_rcv.sb_cc);
+               SB_MB_CHECK(&so->so_rcv);
 
                if (so->so_error) {
                        if (m)
@@ -2122,19 +2119,7 @@ restart:
                goto restart;
        }
 dontblock:
-#ifndef __APPLE__
-       if (uio->uio_procp)
-               uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
-#else  /* __APPLE__ */
-       /*
-        * 2207985
-        * This should be uio->uio-procp; however, some callers of this
-        * function use auto variables with stack garbage, and fail to
-        * fill out the uio structure properly.
-        */
-       if (p)
-               OSIncrementAtomic(&p->p_stats->p_ru.ru_msgrcv);
-#endif /* __APPLE__ */
+       OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
        SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
        SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
        nextrecord = m->m_nextpkt;
@@ -2188,6 +2173,14 @@ dontblock:
                                goto restart;
                        }
                        socket_lock(so, 0);
+                       /*
+                        * If the socket has been defunct'd, drop it.
+                        */
+                       if (so->so_flags & SOF_DEFUNCT) {
+                               m_freem(m);
+                               error = ENOTCONN;
+                               goto release;
+                       }
                        /*
                         * Re-adjust the socket receive list and re-enqueue
                         * the record in front of any packets which may have
@@ -2244,6 +2237,7 @@ dontblock:
                struct mbuf *cm = NULL, *cmn;
                struct mbuf **cme = &cm;
                struct sockbuf *sb_rcv = &so->so_rcv;
+               struct mbuf **msgpcm = NULL;
 
                /*
                 * Externalizing the control messages would require us to
@@ -2256,7 +2250,23 @@ dontblock:
                do {
                        if (flags & MSG_PEEK) {
                                if (controlp != NULL) {
+                                       if (*controlp == NULL) {
+                                               msgpcm = controlp;
+                                       }
                                        *controlp = m_copy(m, 0, m->m_len);
+
+                                       /* If we failed to allocate an mbuf,
+                                        * release any previously allocated
+                                        * mbufs for control data. Return 
+                                        * an error. Keep the mbufs in the
+                                        * socket as this is using 
+                                        * MSG_PEEK flag.
+                                        */
+                                       if (*controlp == NULL) {
+                                               m_freem(*msgpcm);
+                                               error = ENOBUFS;
+                                               goto release;
+                                       }
                                        controlp = &(*controlp)->m_next;
                                }
                                m = m->m_next;
@@ -2324,11 +2334,16 @@ dontblock:
                        }
                        cm = cmn;
                }
-               orig_resid = 0;
-               if (sb_rcv->sb_mb != NULL)
+               /* 
+                * Update the value of nextrecord in case we received new
+                * records when the socket was unlocked above for 
+                * externalizing SCM_RIGHTS.
+                */
+               if (m != NULL)
                        nextrecord = sb_rcv->sb_mb->m_nextpkt;
                else
-                       nextrecord = NULL;
+                       nextrecord = sb_rcv->sb_mb;
+               orig_resid = 0;
        }
 
        if (m != NULL) {
@@ -2353,7 +2368,6 @@ dontblock:
                        flags |= MSG_OOB;
        } else {
                if (!(flags & MSG_PEEK)) {
-                       so->so_rcv.sb_mb = nextrecord;
                        SB_EMPTY_FIXUP(&so->so_rcv);
                }
        }
@@ -2387,7 +2401,6 @@ dontblock:
                        flags |= MSG_OOB;
                }
                so->so_state &= ~SS_RCVATMARK;
-               // LP64todo - fix this!
                len = uio_resid(uio) - delayed_copy_len;
                if (so->so_oobmark && len > so->so_oobmark - offset)
                        len = so->so_oobmark - offset;
@@ -2491,8 +2504,25 @@ dontblock:
                        if (flags & MSG_PEEK) {
                                moff += len;
                        } else {
-                               if (mp)
-                                       *mp = m_copym(m, 0, len, M_WAIT);
+                               if (mp != NULL) {
+                                       int copy_flag;
+
+                                       if (flags & MSG_DONTWAIT)
+                                               copy_flag = M_DONTWAIT;
+                                       else
+                                               copy_flag = M_WAIT;
+                                       *mp = m_copym(m, 0, len, copy_flag);
+                                       if (*mp == NULL) {
+                                               /*
+                                                * Failed to allocate an mbuf.
+                                                * Adjust uio_resid back, it was
+                                                * adjusted down by len bytes which
+                                                * we didn't copy over
+                                                */
+                                               uio_setresid(uio, (uio_resid(uio) + len));
+                                               break;
+                                       }
+                               }
                                m->m_data += len;
                                m->m_len -= len;
                                so->so_rcv.sb_cc -= len;
@@ -2584,6 +2614,7 @@ dontblock:
                        if (m) {
                                nextrecord = m->m_nextpkt;
                        }
+                       SB_MB_CHECK(&so->so_rcv);
                }
        }
 #ifdef MORE_LOCKING_DEBUG
@@ -2631,6 +2662,7 @@ dontblock:
                        } else if (nextrecord->m_nextpkt == NULL) {
                                so->so_rcv.sb_lastrecord = nextrecord;
                        }
+                       SB_MB_CHECK(&so->so_rcv);
                }
                SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
@@ -2825,18 +2857,7 @@ sorflush(struct socket *so)
        if (asb.sb_flags & SB_UNIX)
                sb->sb_flags |= SB_UNIX;
        if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) {
-               boolean_t unp = (pr->pr_domain->dom_dispose == unp_dispose);
-               /*
-                * Currently AF_UNIX domain uses a global domain mutex;
-                * unp_dispose() may end up calling soclose() on another
-                * AF_UNIX socket and therefore the lock must not be held
-                * across the call.
-                */
-               if (unp)
-                       socket_unlock(so, 0);
                (*pr->pr_domain->dom_dispose)(asb.sb_mb);
-               if (unp)
-                       socket_lock(so, 0);
        }
        sbrelease(&asb);
 }
@@ -2868,7 +2889,7 @@ sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
        if (valsize > len)
                sopt->sopt_valsize = valsize = len;
 
-       if (sopt->sopt_p != 0)
+       if (sopt->sopt_p != kernproc)
                return (copyin(sopt->sopt_val, buf, valsize));
 
        bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize);
@@ -2886,17 +2907,21 @@ static int
 sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p)
 {
        int                     error;
-               
+
        if (proc_is64bit(sopt->sopt_p)) {
-               struct timeval64        tv64;
+               struct user64_timeval   tv64;
 
                if (sopt->sopt_valsize < sizeof(tv64)) {
                        return (EINVAL);
                }
                sopt->sopt_valsize = sizeof(tv64);
-               error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
-               if (error != 0) {
-                       return (error);
+               if (sopt->sopt_p != kernproc) {
+                       error = copyin(sopt->sopt_val, &tv64, sizeof(tv64));
+                       if (error != 0)
+                               return (error);
+               } else {
+                       bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv64,
+                               sizeof(tv64));
                }
                if (tv64.tv_sec < 0 || tv64.tv_sec > LONG_MAX 
                    || tv64.tv_usec < 0 || tv64.tv_usec >= 1000000) {
@@ -2905,23 +2930,29 @@ sooptcopyin_timeval(struct sockopt *sopt, struct timeval * tv_p)
                tv_p->tv_sec = tv64.tv_sec;
                tv_p->tv_usec = tv64.tv_usec;
        } else {
-               if (sopt->sopt_valsize < sizeof(*tv_p)) {
+               struct user32_timeval   tv32;
+
+               if (sopt->sopt_valsize < sizeof(tv32)) {
                        return (EINVAL);
                }
-               sopt->sopt_valsize = sizeof(*tv_p);
-               if (sopt->sopt_p != 0) {
-                       error = copyin(sopt->sopt_val, tv_p, sizeof(*tv_p));
+               sopt->sopt_valsize = sizeof(tv32);
+               if (sopt->sopt_p != kernproc) {
+                       error = copyin(sopt->sopt_val, &tv32, sizeof(tv32));
                        if (error != 0) {
                                return (error);
                        }
                } else {
-                       bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), tv_p,
-                             sizeof(*tv_p));
+                       bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), &tv32,
+                             sizeof(tv32));
                }
-               if (tv_p->tv_sec < 0 || tv_p->tv_sec > LONG_MAX 
-                   || tv_p->tv_usec < 0 || tv_p->tv_usec >= 1000000) {
+#ifndef __LP64__ // K64todo "comparison is always false due to limited range of data type"
+               if (tv32.tv_sec < 0 || tv32.tv_sec > LONG_MAX 
+                   || tv32.tv_usec < 0 || tv32.tv_usec >= 1000000) {
                        return (EDOM);
                }
+#endif
+               tv_p->tv_sec = tv32.tv_sec;
+               tv_p->tv_usec = tv32.tv_usec;
        }
        return (0);
 }
@@ -2952,15 +2983,15 @@ sosetopt(struct socket *so, struct sockopt *sopt)
        int     error, optval;
        struct  linger l;
        struct  timeval tv;
-       struct socket_filter_entry *filter;
-       int filtered = 0;
 #if CONFIG_MACF_SOCKET
        struct mac extmac;
 #endif /* MAC_SOCKET */
 
        socket_lock(so, 1);
+       
        if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE))
-           == (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
+           == (SS_CANTRCVMORE | SS_CANTSENDMORE) && 
+           (so->so_flags & SOF_NPX_SETOPTSHUT) == 0) {
                /* the socket has been shutdown, no more sockopt's */
                error = EINVAL;
                goto bad;
@@ -2970,29 +3001,11 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                sopt->sopt_dir = SOPT_SET;
        }
 
-       error = 0;
-       for (filter = so->so_filt; filter && (error == 0);
-           filter = filter->sfe_next_onsocket) {
-               if (filter->sfe_filter->sf_filter.sf_setoption) {
-                       if (filtered == 0) {
-                               filtered = 1;
-                               sflt_use(so);
-                               socket_unlock(so, 0);
-                       }
-                       error = filter->sfe_filter->sf_filter.
-                           sf_setoption(filter->sfe_cookie, so, sopt);
-               }
-       }
-
-       if (filtered != 0) {
-               socket_lock(so, 0);
-               sflt_unuse(so);
-
-               if (error) {
-                       if (error == EJUSTRETURN)
-                               error = 0;
-                       goto bad;
-               }
+       error = sflt_setsockopt(so, sopt);
+       if (error) {
+               if (error == EJUSTRETURN)
+                       error = 0;
+               goto bad;
        }
 
        error = 0;
@@ -3028,6 +3041,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                case SO_REUSEPORT:
                case SO_OOBINLINE:
                case SO_TIMESTAMP:
+               case SO_TIMESTAMP_MONOTONIC:
 #ifdef __APPLE__
                case SO_DONTTRUNC:
                case SO_WANTMORE:
@@ -3064,17 +3078,18 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                        switch (sopt->sopt_name) {
                        case SO_SNDBUF:
                        case SO_RCVBUF:
-                               if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
-                                   &so->so_snd : &so->so_rcv,
-                                   (u_long) optval) == 0) {
+                       {
+                               struct sockbuf *sb = (sopt->sopt_name == SO_SNDBUF) ?
+                                       &so->so_snd : &so->so_rcv;
+                               if (sbreserve(sb, (u_int32_t) optval) == 0) {
                                        error = ENOBUFS;
                                        goto bad;
                                }
-                               if (sopt->sopt_name == SO_SNDBUF)
-                                       so->so_snd.sb_flags |= SB_USRSIZE;
-                               else
-                                       so->so_rcv.sb_flags |= SB_USRSIZE;
+                               sb->sb_flags |= SB_USRSIZE;
+                               sb->sb_flags &= ~SB_AUTOSIZE;
+                               sb->sb_idealsize = (u_int32_t)optval;
                                break;
+                       }
 
                        /*
                         * Make sure the low-water is never greater than
@@ -3118,8 +3133,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                        if (error)
                                goto bad;
 
-                       error = sflt_attach_private(so, NULL,
-                           nke.nke_handle, 1);
+                       error = sflt_attach_internal(so, nke.nke_handle);
                        break;
                }
 
@@ -3212,6 +3226,144 @@ sosetopt(struct socket *so, struct sockopt *sopt)
                        break;
 #endif
 
+               case SO_RANDOMPORT:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error)
+                               goto bad;
+                       if (optval)
+                               so->so_flags |= SOF_BINDRANDOMPORT;
+                       else
+                               so->so_flags &= ~SOF_BINDRANDOMPORT;
+                       break;
+
+               case SO_NP_EXTENSIONS: {
+                       struct so_np_extensions sonpx;
+
+                       error = sooptcopyin(sopt, &sonpx, sizeof(sonpx), sizeof(sonpx));
+                       if (error)
+                               goto bad;
+                       if (sonpx.npx_mask & ~SONPX_MASK_VALID) {
+                               error = EINVAL;
+                               goto bad;
+                       }
+                       /*
+                        * Only one bit defined for now
+                        */
+                       if ((sonpx.npx_mask & SONPX_SETOPTSHUT)) {
+                               if ((sonpx.npx_flags & SONPX_SETOPTSHUT))
+                                       so->so_flags |= SOF_NPX_SETOPTSHUT;
+                               else
+                                       so->so_flags &= ~SOF_NPX_SETOPTSHUT;
+                       }
+                       break;
+               }
+
+               case SO_TRAFFIC_CLASS: {
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                               sizeof (optval));
+                       if (error)
+                               goto bad;
+                       error = so_set_traffic_class(so, optval);
+                       if (error)
+                               goto bad;
+                       break;
+               }
+
+               case SO_RECV_TRAFFIC_CLASS: {
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                               sizeof (optval));
+                       if (error)
+                               goto bad;
+                       if (optval == 0)
+                               so->so_flags &= ~SOF_RECV_TRAFFIC_CLASS;
+                       else
+                               so->so_flags |= SOF_RECV_TRAFFIC_CLASS;
+                       break;
+               }
+
+               case SO_TRAFFIC_CLASS_DBG: {
+                       struct so_tcdbg so_tcdbg;
+
+                       error = sooptcopyin(sopt, &so_tcdbg,
+                           sizeof (struct so_tcdbg), sizeof (struct so_tcdbg));
+                       if (error)
+                               goto bad;
+                       error = so_set_tcdbg(so, &so_tcdbg);
+                       if (error)
+                               goto bad;
+                       break;
+               }
+
+               case SO_PRIVILEGED_TRAFFIC_CLASS:
+                       error = priv_check_cred(kauth_cred_get(),
+                           PRIV_NET_PRIVILEGED_TRAFFIC_CLASS, 0);
+                       if (error)
+                               goto bad;
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                               sizeof (optval));
+                       if (error)
+                               goto bad;
+                       if (optval == 0)
+                               so->so_flags &= ~SOF_PRIVILEGED_TRAFFIC_CLASS;
+                       else
+                               so->so_flags |= SOF_PRIVILEGED_TRAFFIC_CLASS;
+                       break;
+
+               case SO_DEFUNCTOK:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error != 0 || (so->so_flags & SOF_DEFUNCT)) {
+                               if (error == 0)
+                                       error = EBADF;
+                               goto bad;
+                       }
+                       /*
+                        * Any process can set SO_DEFUNCTOK (clear
+                        * SOF_NODEFUNCT), but only root can clear
+                        * SO_DEFUNCTOK (set SOF_NODEFUNCT).
+                        */
+                       if (optval == 0 &&
+                           kauth_cred_issuser(kauth_cred_get()) == 0) {
+                               error = EPERM;
+                               goto bad;
+                       }
+                       if (optval)
+                               so->so_flags &= ~SOF_NODEFUNCT;
+                       else
+                               so->so_flags |= SOF_NODEFUNCT;
+
+                       SODEFUNCTLOG(("%s[%d]: so %p [%d,%d] is now marked as "
+                           "%seligible for defunct\n", __func__,
+                           proc_selfpid(), so, INP_SOCKAF(so),
+                           INP_SOCKTYPE(so),
+                           (so->so_flags & SOF_NODEFUNCT) ? "not " : ""));
+                       break;
+
+               case SO_ISDEFUNCT:
+                       /* This option is not settable */
+                       error = EINVAL;
+                       break;
+
+               case SO_OPPORTUNISTIC:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error == 0)
+                               error = so_set_opportunistic(so, optval);
+                       break;
+
+               case SO_FLUSH:
+                       /* This option is handled by lower layer(s) */
+                       error = 0;
+                       break;
+
+               case SO_RECV_ANYIF:
+                       error = sooptcopyin(sopt, &optval, sizeof (optval),
+                           sizeof (optval));
+                       if (error == 0)
+                               error = so_set_recv_anyif(so, optval);
+                       break;
+
                default:
                        error = ENOPROTOOPT;
                        break;
@@ -3246,7 +3398,7 @@ sooptcopyout(struct sockopt *sopt, void *buf, size_t len)
        valsize = min(len, sopt->sopt_valsize);
        sopt->sopt_valsize = valsize;
        if (sopt->sopt_val != USER_ADDR_NULL) {
-               if (sopt->sopt_p != 0)
+               if (sopt->sopt_p != kernproc)
                        error = copyout(buf, sopt->sopt_val, valsize);
                else
                        bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
@@ -3259,24 +3411,27 @@ sooptcopyout_timeval(struct sockopt *sopt, const struct timeval * tv_p)
 {
        int                     error;
        size_t                  len;
-       struct timeval64        tv64;
+       struct user64_timeval   tv64;
+       struct user32_timeval   tv32;
        const void *            val;
        size_t                  valsize;
-               
+
        error = 0;
        if (proc_is64bit(sopt->sopt_p)) {
-               len = sizeof(struct timeval64);
+               len = sizeof(tv64);
                tv64.tv_sec = tv_p->tv_sec;
                tv64.tv_usec = tv_p->tv_usec;
                val = &tv64;
        } else {
-               len = sizeof(struct timeval);
-               val = tv_p;
+               len = sizeof(tv32);
+               tv32.tv_sec = tv_p->tv_sec;
+               tv32.tv_usec = tv_p->tv_usec;
+               val = &tv32;
        }
        valsize = min(len, sopt->sopt_valsize);
        sopt->sopt_valsize = valsize;
        if (sopt->sopt_val != USER_ADDR_NULL) {
-               if (sopt->sopt_p != 0)
+               if (sopt->sopt_p != kernproc)
                        error = copyout(val, sopt->sopt_val, valsize);
                else
                        bcopy(val, CAST_DOWN(caddr_t, sopt->sopt_val), valsize);
@@ -3297,8 +3452,6 @@ sogetopt(struct socket *so, struct sockopt *sopt)
        int     error, optval;
        struct  linger l;
        struct  timeval tv;
-       struct  socket_filter_entry *filter;
-       int     filtered = 0;
 #if CONFIG_MACF_SOCKET
        struct mac extmac;
 #endif /* MAC_SOCKET */
@@ -3309,31 +3462,14 @@ sogetopt(struct socket *so, struct sockopt *sopt)
 
        socket_lock(so, 1);
 
-       error = 0;
-       for (filter = so->so_filt; filter && (error == 0);
-           filter = filter->sfe_next_onsocket) {
-               if (filter->sfe_filter->sf_filter.sf_getoption) {
-                       if (filtered == 0) {
-                               filtered = 1;
-                               sflt_use(so);
-                               socket_unlock(so, 0);
-                       }
-                       error = filter->sfe_filter->sf_filter.
-                           sf_getoption(filter->sfe_cookie, so, sopt);
-               }
-       }
-       if (filtered != 0) {
-               socket_lock(so, 0);
-               sflt_unuse(so);
-
-               if (error) {
-                       if (error == EJUSTRETURN)
-                               error = 0;
-                       socket_unlock(so, 1);
-                       return (error);
-               }
+       error = sflt_getsockopt(so, sopt);
+       if (error) {
+               if (error == EJUSTRETURN)
+                       error = 0;
+               socket_unlock(so, 1);
+               return (error);
        }
-
+       
        error = 0;
        if (sopt->sopt_level != SOL_SOCKET) {
                if (so->so_proto && so->so_proto->pr_ctloutput) {
@@ -3363,6 +3499,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
                case SO_BROADCAST:
                case SO_OOBINLINE:
                case SO_TIMESTAMP:
+               case SO_TIMESTAMP_MONOTONIC:
 #ifdef __APPLE__
                case SO_DONTTRUNC:
                case SO_WANTMORE:
@@ -3485,6 +3622,60 @@ integer:
                        optval = (so->so_flags & SOF_UPCALLCLOSEWAIT);
                        goto integer;
 #endif
+               case SO_RANDOMPORT:
+                       optval = (so->so_flags & SOF_BINDRANDOMPORT);
+                       goto integer;
+
+               case SO_NP_EXTENSIONS: {
+                       struct so_np_extensions sonpx;
+
+                       sonpx.npx_flags = (so->so_flags & SOF_NPX_SETOPTSHUT) ? SONPX_SETOPTSHUT : 0;
+                       sonpx.npx_mask = SONPX_MASK_VALID;
+
+                       error = sooptcopyout(sopt, &sonpx, sizeof(struct so_np_extensions));
+                       break;  
+               }
+
+               case SO_TRAFFIC_CLASS:
+                       optval = so->so_traffic_class;
+                       goto integer;
+
+               case SO_RECV_TRAFFIC_CLASS:
+                       optval = (so->so_flags & SOF_RECV_TRAFFIC_CLASS);
+                       goto integer;
+
+               case SO_TRAFFIC_CLASS_STATS:
+                       error = sooptcopyout(sopt, &so->so_tc_stats, sizeof(so->so_tc_stats));
+                       break;
+
+               case SO_TRAFFIC_CLASS_DBG: 
+                       error = sogetopt_tcdbg(so, sopt);
+                       break;
+
+               case SO_PRIVILEGED_TRAFFIC_CLASS:
+                       optval = (so->so_flags & SOF_PRIVILEGED_TRAFFIC_CLASS);
+                       goto integer;
+
+               case SO_DEFUNCTOK:
+                       optval = !(so->so_flags & SOF_NODEFUNCT);
+                       goto integer;
+
+               case SO_ISDEFUNCT:
+                       optval = (so->so_flags & SOF_DEFUNCT);
+                       goto integer;
+
+               case SO_OPPORTUNISTIC:
+                       optval = so_get_opportunistic(so);
+                       goto integer;
+
+               case SO_FLUSH:
+                       /* This option is not gettable */
+                       error = EINVAL;
+                       break;
+
+               case SO_RECV_ANYIF:
+                       optval = so_get_recv_anyif(so);
+                       goto integer;
 
                default:
                        error = ENOPROTOOPT;
@@ -3494,22 +3685,26 @@ integer:
                return (error);
        }
 }
-
-/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
+/* The size limits on our soopt_getm is different from that on FreeBSD.
+ * We limit the size of options to MCLBYTES. This will have to change
+ * if we need to define options that need more space than MCLBYTES.
+ */
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
        struct mbuf *m, *m_prev;
        int sopt_size = sopt->sopt_valsize;
+       int how;
 
-       if (sopt_size > MAX_SOOPTGETM_SIZE)
+       if (sopt_size <= 0 || sopt_size > MCLBYTES)
                return (EMSGSIZE);
 
-       MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
+       how = sopt->sopt_p != kernproc ? M_WAIT : M_DONTWAIT;
+       MGET(m, how, MT_DATA);
        if (m == 0)
                return (ENOBUFS);
        if (sopt_size > MLEN) {
-               MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
+               MCLGET(m, how);
                if ((m->m_flags & M_EXT) == 0) {
                        m_free(m);
                        return (ENOBUFS);
@@ -3522,16 +3717,17 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
        *mp = m;
        m_prev = m;
 
-       while (sopt_size) {
-               MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA);
+       while (sopt_size > 0) {
+               MGET(m, how, MT_DATA);
                if (m == 0) {
                        m_freem(*mp);
                        return (ENOBUFS);
                }
                if (sopt_size > MLEN) {
-                       MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT);
+                       MCLGET(m, how);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_freem(*mp);
+                               m_freem(m);
                                return (ENOBUFS);
                        }
                        m->m_len = min(MCLBYTES, sopt_size);
@@ -3545,7 +3741,7 @@ soopt_getm(struct sockopt *sopt, struct mbuf **mp)
        return (0);
 }
 
-/* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */
+/* copyin sopt data into mbuf chain */
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
@@ -3554,7 +3750,7 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
        if (sopt->sopt_val == USER_ADDR_NULL)
                return (0);
        while (m != NULL && sopt->sopt_valsize >= m->m_len) {
-               if (sopt->sopt_p != NULL) {
+               if (sopt->sopt_p != kernproc) {
                        int error;
 
                        error = copyin(sopt->sopt_val, mtod(m, char *),
@@ -3576,7 +3772,7 @@ soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
        return (0);
 }
 
-/* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */
+/* copyout mbuf chain data into soopt */
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
@@ -3586,7 +3782,7 @@ soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
        if (sopt->sopt_val == USER_ADDR_NULL)
                return (0);
        while (m != NULL && sopt->sopt_valsize >= m->m_len) {
-               if (sopt->sopt_p != NULL) {
+               if (sopt->sopt_p != kernproc) {
                        int error;
 
                        error = copyout(mtod(m, char *), sopt->sopt_val,
@@ -3673,7 +3869,7 @@ soo_kqfilter(__unused struct fileproc *fp, struct knote *kn,
     __unused struct proc *p)
 {
        struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
-       struct sockbuf *sb;
+       struct klist *skl;
 
        socket_lock(so, 1);
 
@@ -3686,23 +3882,38 @@ soo_kqfilter(__unused struct fileproc *fp, struct knote *kn,
 
        switch (kn->kn_filter) {
        case EVFILT_READ:
-               if (so->so_options & SO_ACCEPTCONN)
-                       kn->kn_fop = &solisten_filtops;
-               else
-                       kn->kn_fop = &soread_filtops;
-               sb = &so->so_rcv;
+               kn->kn_fop = &soread_filtops;
+               skl = &so->so_rcv.sb_sel.si_note;
                break;
        case EVFILT_WRITE:
                kn->kn_fop = &sowrite_filtops;
-               sb = &so->so_snd;
+               skl = &so->so_snd.sb_sel.si_note;
+               break;
+       case EVFILT_SOCK:
+               kn->kn_fop = &sock_filtops;
+               skl = &so->so_klist;
                break;
        default:
                socket_unlock(so, 1);
                return (1);
        }
 
-       if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn))
-               sb->sb_flags |= SB_KNOTE;
+       if (KNOTE_ATTACH(skl, kn)) {
+               switch(kn->kn_filter) {
+               case EVFILT_READ:
+                       so->so_rcv.sb_flags |= SB_KNOTE;
+                       break;
+               case EVFILT_WRITE:
+                       so->so_snd.sb_flags |= SB_KNOTE;
+                       break;
+               case EVFILT_SOCK:
+                       so->so_flags |= SOF_KNOTE;
+                       break;
+               default:
+                       socket_unlock(so, 1);
+                       return (1);
+               }
+       }
        socket_unlock(so, 1);
        return (0);
 }
@@ -3728,6 +3939,25 @@ filt_soread(struct knote *kn, long hint)
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_lock(so, 1);
 
+       if (so->so_options & SO_ACCEPTCONN) {
+               int isempty;
+
+               /* Radar 6615193 handle the listen case dynamically
+                * for kqueue read filter. This allows to call listen() after registering
+                * the kqueue EVFILT_READ.
+                */
+
+               kn->kn_data = so->so_qlen;
+               isempty = ! TAILQ_EMPTY(&so->so_comp);
+
+               if ((hint & SO_FILT_HINT_LOCKED) == 0)
+                       socket_unlock(so, 1);
+
+               return (isempty);
+       }
+
+       /* socket isn't a listener */
+
        kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
 
        if (so->so_oobmark) {
@@ -3769,12 +3999,19 @@ filt_soread(struct knote *kn, long hint)
                return (1);
        }
 
+       int64_t lowwat = so->so_rcv.sb_lowat;
+       if (kn->kn_sfflags & NOTE_LOWAT)
+       {
+               if (kn->kn_sdata > so->so_rcv.sb_hiwat)
+                       lowwat = so->so_rcv.sb_hiwat;
+               else if (kn->kn_sdata > lowwat)
+                       lowwat = kn->kn_sdata;
+       }
+       
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_unlock(so, 1);
-
-       return ((kn->kn_flags & EV_OOBAND) ||
-           kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ?
-           kn->kn_sdata : so->so_rcv.sb_lowat));
+       
+       return ((kn->kn_flags & EV_OOBAND) || kn->kn_data >= lowwat);
 }
 
 static void
@@ -3789,11 +4026,25 @@ filt_sowdetach(struct knote *kn)
        socket_unlock(so, 1);
 }
 
+int
+so_wait_for_if_feedback(struct socket *so)
+{
+       if ((so->so_proto->pr_domain->dom_family == AF_INET ||
+           so->so_proto->pr_domain->dom_family == AF_INET6) &&
+           (so->so_state & SS_ISCONNECTED)) {
+               struct inpcb *inp = sotoinpcb(so);
+               if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+                       return (1);
+       }
+       return (0);
+}
+
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
        struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+       int ret = 0;
 
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_lock(so, 1);
@@ -3802,51 +4053,165 @@ filt_sowrite(struct knote *kn, long hint)
        if (so->so_state & SS_CANTSENDMORE) {
                kn->kn_flags |= EV_EOF;
                kn->kn_fflags = so->so_error;
-               if ((hint & SO_FILT_HINT_LOCKED) == 0)
-                       socket_unlock(so, 1);
-               return (1);
+               ret = 1;
+               goto out;
        }
        if (so->so_error) {     /* temporary udp error */
-               if ((hint & SO_FILT_HINT_LOCKED) == 0)
-                       socket_unlock(so, 1);
-               return (1);
+               ret = 1;
+               goto out;
        }
        if (((so->so_state & SS_ISCONNECTED) == 0) &&
            (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
-               if ((hint & SO_FILT_HINT_LOCKED) == 0)
-                       socket_unlock(so, 1);
-               return (0);
+               ret = 0;
+               goto out;
+       }
+       int64_t lowwat = so->so_snd.sb_lowat;
+       if (kn->kn_sfflags & NOTE_LOWAT)
+       {
+               if (kn->kn_sdata > so->so_snd.sb_hiwat)
+                       lowwat = so->so_snd.sb_hiwat;
+               else if (kn->kn_sdata > lowwat)
+                       lowwat = kn->kn_sdata;
        }
+       if (kn->kn_data >= lowwat) {
+               if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
+                       ret = tcp_notsent_lowat_check(so);
+               } else {
+                       ret = 1;
+               }
+       }
+       if (so_wait_for_if_feedback(so))
+               ret = 0;
+out:
        if ((hint & SO_FILT_HINT_LOCKED) == 0)
                socket_unlock(so, 1);
-       if (kn->kn_sfflags & NOTE_LOWAT)
-               return (kn->kn_data >= kn->kn_sdata);
-       return (kn->kn_data >= so->so_snd.sb_lowat);
+       return(ret);
+}
+
+static void
+filt_sockdetach(struct knote *kn)
+{
+       struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
+       socket_lock(so, 1);
+       
+       if ((so->so_flags & SOF_KNOTE) != 0)
+               if (KNOTE_DETACH(&so->so_klist, kn))
+                       so->so_flags &= ~SOF_KNOTE;
+       socket_unlock(so, 1);
 }
 
-/*ARGSUSED*/
 static int
-filt_solisten(struct knote *kn, long hint)
+filt_sockev(struct knote *kn, long hint)
 {
+       int ret = 0, locked = 0;
        struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data;
-       int isempty;
 
-       if ((hint & SO_FILT_HINT_LOCKED) == 0)
+       if ((hint & SO_FILT_HINT_LOCKED) == 0) {
                socket_lock(so, 1);
-       kn->kn_data = so->so_qlen;
-       isempty = ! TAILQ_EMPTY(&so->so_comp);
-       if ((hint & SO_FILT_HINT_LOCKED) == 0)
+               locked = 1;
+       }
+
+       switch (hint & SO_FILT_HINT_EV) {
+       case SO_FILT_HINT_CONNRESET:
+               if (kn->kn_sfflags & NOTE_CONNRESET)
+                       kn->kn_fflags |= NOTE_CONNRESET;
+               break;
+       case SO_FILT_HINT_TIMEOUT:
+               if (kn->kn_sfflags & NOTE_TIMEOUT)
+                       kn->kn_fflags |= NOTE_TIMEOUT;
+               break;
+       case SO_FILT_HINT_NOSRCADDR:
+               if (kn->kn_sfflags & NOTE_NOSRCADDR)
+                       kn->kn_fflags |= NOTE_NOSRCADDR;
+               break;
+       case SO_FILT_HINT_IFDENIED:
+               if ((kn->kn_sfflags & NOTE_IFDENIED))
+                       kn->kn_fflags |= NOTE_IFDENIED;
+               break;
+       case SO_FILT_HINT_KEEPALIVE:
+               if (kn->kn_sfflags & NOTE_KEEPALIVE)
+                       kn->kn_fflags |= NOTE_KEEPALIVE;
+       }
+
+       if ((kn->kn_sfflags & NOTE_READCLOSED) &&
+               (so->so_state & SS_CANTRCVMORE))
+               kn->kn_fflags |= NOTE_READCLOSED;
+
+       if ((kn->kn_sfflags & NOTE_WRITECLOSED) &&
+               (so->so_state & SS_CANTSENDMORE))
+               kn->kn_fflags |= NOTE_WRITECLOSED;
+
+       if ((kn->kn_sfflags & NOTE_SUSPEND) &&
+           ((hint & SO_FILT_HINT_SUSPEND) ||
+           (so->so_flags & SOF_SUSPENDED))) {
+               kn->kn_fflags &=
+                       ~(NOTE_SUSPEND | NOTE_RESUME);
+               kn->kn_fflags |= NOTE_SUSPEND;
+       }
+
+       if ((kn->kn_sfflags & NOTE_RESUME) &&
+           ((hint & SO_FILT_HINT_RESUME) ||
+           (so->so_flags & SOF_SUSPENDED) == 0)) {
+               kn->kn_fflags &=
+                       ~(NOTE_SUSPEND | NOTE_RESUME);
+               kn->kn_fflags |= NOTE_RESUME;
+       }
+
+       if (so->so_error != 0) {
+               ret = 1;
+               kn->kn_data = so->so_error;
+               kn->kn_flags |= EV_EOF;
+       } else {
+               get_sockev_state(so, (u_int32_t *)&(kn->kn_data));
+       }
+
+       if (kn->kn_fflags != 0)
+               ret = 1;
+
+       if (locked)
                socket_unlock(so, 1);
-       return (isempty);
+
+       return(ret);
 }
 
+void
+get_sockev_state(struct socket *so, u_int32_t *statep) {
+       u_int32_t state = *(statep);
+
+       if (so->so_state & SS_ISCONNECTED)      
+               state |= SOCKEV_CONNECTED;
+       else 
+               state &= ~(SOCKEV_CONNECTED);
+       state |= ((so->so_state & SS_ISDISCONNECTED) ?
+               SOCKEV_DISCONNECTED : 0);
+       *(statep) = state;
+       return;
+}
+
+#define SO_LOCK_HISTORY_STR_LEN (2 * SO_LCKDBG_MAX * (2 + (2 * sizeof(void *)) + 1) + 1)
+
+__private_extern__ const char * solockhistory_nr(struct socket *so)
+{
+        size_t n = 0;
+        int i;
+        static char lock_history_str[SO_LOCK_HISTORY_STR_LEN];
+
+       bzero(lock_history_str, sizeof(lock_history_str));
+        for (i = SO_LCKDBG_MAX - 1; i >= 0; i--) {
+                n += snprintf(lock_history_str + n, SO_LOCK_HISTORY_STR_LEN - n, "%lx:%lx ",
+                        (uintptr_t) so->lock_lr[(so->next_lock_lr + i) % SO_LCKDBG_MAX],
+                        (uintptr_t) so->unlock_lr[(so->next_unlock_lr + i) % SO_LCKDBG_MAX]);
+       }
+        return lock_history_str;
+}
 
 int
 socket_lock(struct socket *so, int refcount)
 {
-       int error = 0, lr_saved;
+       int error = 0;
+       void *lr_saved;
 
-       lr_saved = (unsigned int) __builtin_return_address(0);
+       lr_saved = __builtin_return_address(0);
 
        if (so->so_proto->pr_lock) {
                error = (*so->so_proto->pr_lock)(so, refcount, lr_saved);
@@ -3858,7 +4223,7 @@ socket_lock(struct socket *so, int refcount)
                lck_mtx_lock(so->so_proto->pr_domain->dom_mtx);
                if (refcount)
                        so->so_usecount++;
-               so->lock_lr[so->next_lock_lr] = (u_int32_t)lr_saved;
+               so->lock_lr[so->next_lock_lr] = lr_saved;
                so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX;
        }
 
@@ -3868,10 +4233,11 @@ socket_lock(struct socket *so, int refcount)
 int
 socket_unlock(struct socket *so, int refcount)
 {
-       int error = 0, lr_saved;
+       int error = 0;
+       void *lr_saved;
        lck_mtx_t *mutex_held;
 
-       lr_saved = (unsigned int) __builtin_return_address(0);
+       lr_saved = __builtin_return_address(0);
 
        if (so->so_proto == NULL)
                panic("socket_unlock null so_proto so=%p\n", so);
@@ -3883,13 +4249,16 @@ socket_unlock(struct socket *so, int refcount)
 #ifdef MORE_LOCKING_DEBUG
                lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
 #endif
-               so->unlock_lr[so->next_unlock_lr] = (u_int32_t)lr_saved;
+               so->unlock_lr[so->next_unlock_lr] = lr_saved;
                so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX;
 
                if (refcount) {
                        if (so->so_usecount <= 0)
-                               panic("socket_unlock: bad refcount so=%p "
-                                   "value=%d\n", so, so->so_usecount);
+                               panic("socket_unlock: bad refcount=%d so=%p (%d, %d, %d) lrh=%s",
+                                   so->so_usecount, so, so->so_proto->pr_domain->dom_family,
+                                   so->so_type, so->so_proto->pr_protocol, 
+                                   solockhistory_nr(so));
+                       
                        so->so_usecount--;
                        if (so->so_usecount == 0) {
                                sofreelastref(so, 1);
@@ -3943,3 +4312,157 @@ somultipages(struct socket *so, boolean_t set)
        else
                so->so_flags &= ~SOF_MULTIPAGES;
 }
+
+int
+so_isdstlocal(struct socket *so) {
+
+       struct inpcb *inp = (struct inpcb *)so->so_pcb;
+
+       if (so->so_proto->pr_domain->dom_family == AF_INET) {
+               return inaddr_local(inp->inp_faddr);
+       } else if (so->so_proto->pr_domain->dom_family == AF_INET6) {
+               return in6addr_local(&inp->in6p_faddr);
+       } 
+       return 0;
+}
+
+int
+sosetdefunct(struct proc *p, struct socket *so, int level, boolean_t noforce)
+{
+       int err = 0, defunct;
+
+       defunct = (so->so_flags & SOF_DEFUNCT);
+       if (defunct) {
+               if (!(so->so_snd.sb_flags & so->so_rcv.sb_flags & SB_DROP))
+                       panic("%s: SB_DROP not set", __func__);
+               goto done;
+       }
+
+       if (so->so_flags & SOF_NODEFUNCT) {
+               if (noforce) {
+                       err = EOPNOTSUPP;
+                       SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p "
+                           "[%d,%d] is not eligible for defunct (%d)\n",
+                           __func__, proc_selfpid(), proc_pid(p), level, so,
+                           INP_SOCKAF(so), INP_SOCKTYPE(so), err));
+                       return (err);
+               }
+               so->so_flags &= ~SOF_NODEFUNCT;
+               SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] "
+                   "defunct by force\n", __func__, proc_selfpid(), proc_pid(p),
+                   level, so, INP_SOCKAF(so), INP_SOCKTYPE(so)));
+       }
+
+       so->so_flags |= SOF_DEFUNCT;
+       /* Prevent further data from being appended to the socket buffers */
+       so->so_snd.sb_flags |= SB_DROP;
+       so->so_rcv.sb_flags |= SB_DROP;
+
+done:
+       SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] %s "
+           "defunct\n", __func__, proc_selfpid(), proc_pid(p), level, so,
+           INP_SOCKAF(so), INP_SOCKTYPE(so),
+           defunct ? "is already" : "marked as"));
+
+       return (err);
+}
+
+int
+sodefunct(struct proc *p, struct socket *so, int level)
+{
+       struct sockbuf *rcv, *snd;
+
+       if (!(so->so_flags & SOF_DEFUNCT))
+               panic("%s improperly called", __func__);
+
+       if (so->so_state & SS_DEFUNCT)
+               goto done;
+
+       rcv = &so->so_rcv;
+       snd = &so->so_snd;
+
+       SODEFUNCTLOG(("%s[%d]: (target pid %d level %d) so %p [%d,%d] is now "
+           "defunct [rcv_si 0x%x, snd_si 0x%x, rcv_fl 0x%x, snd_fl 0x%x]\n",
+           __func__, proc_selfpid(), proc_pid(p), level, so,
+           INP_SOCKAF(so), INP_SOCKTYPE(so),
+           (uint32_t)rcv->sb_sel.si_flags, (uint32_t)snd->sb_sel.si_flags,
+           (uint16_t)rcv->sb_flags, (uint16_t)snd->sb_flags));
+
+       /*
+        * Unwedge threads blocked on sbwait() and sb_lock().
+        */
+       sbwakeup(rcv);
+       sbwakeup(snd);
+
+       if (rcv->sb_flags & SB_LOCK)
+               sbunlock(rcv, 1);
+       if (snd->sb_flags & SB_LOCK)
+               sbunlock(snd, 1);
+
+       /*
+        * Flush the buffers and disconnect.  We explicitly call shutdown
+        * on both data directions to ensure that SS_CANT{RCV,SEND}MORE
+        * states are set for the socket.  This would also flush out data
+        * hanging off the receive list of this socket.
+        */
+       (void) soshutdownlock(so, SHUT_RD);
+       (void) soshutdownlock(so, SHUT_WR);
+       (void) sodisconnectlocked(so);
+
+       /*
+        * Explicitly handle connectionless-protocol disconnection
+        * and release any remaining data in the socket buffers.
+        */
+       if (!(so->so_flags & SS_ISDISCONNECTED))
+               (void) soisdisconnected(so);
+
+       if (so->so_error == 0)
+               so->so_error = EBADF;
+
+       if (rcv->sb_cc != 0)
+               sbrelease(rcv);
+       if (snd->sb_cc != 0)
+               sbrelease(snd);
+
+       so->so_state |= SS_DEFUNCT;
+
+done:
+       return (0);
+}
+
+__private_extern__ int
+so_set_recv_anyif(struct socket *so, int optval)
+{
+       int ret = 0;
+
+#if INET6
+       if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+       if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+               if (optval)
+                       sotoinpcb(so)->inp_flags |= INP_RECV_ANYIF;
+               else
+                       sotoinpcb(so)->inp_flags &= ~INP_RECV_ANYIF;
+       } else {
+               ret = EPROTONOSUPPORT;
+       }
+
+       return (ret);
+}
+
+__private_extern__ int
+so_get_recv_anyif(struct socket *so)
+{
+       int ret = 0;
+
+#if INET6
+       if (INP_SOCKAF(so) == AF_INET || INP_SOCKAF(so) == AF_INET6) {
+#else
+       if (INP_SOCKAF(so) == AF_INET) {
+#endif /* !INET6 */
+               ret = (sotoinpcb(so)->inp_flags & INP_RECV_ANYIF) ? 1 : 0;
+       }
+
+       return (ret);
+}