]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/nfs/nfs_syscalls.c
xnu-3247.1.106.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
index d6de219ba6f386330ca32ef6ddfa9221fce5d34f..12daa55889dcfeb91ed97bdbad8088320de2b23f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2010 Apple Inc.  All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc.  All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -131,7 +131,10 @@ extern int nfsrv_wg_delay;
 extern int nfsrv_wg_delay_v3;
 
 static int nfsrv_require_resv_port = 0;
-static int nfsrv_deadsock_timer_on = 0;
+static time_t  nfsrv_idlesock_timer_on = 0;
+static int nfsrv_sock_tcp_cnt = 0;
+#define NFSD_MIN_IDLE_TIMEOUT 30
+static int nfsrv_sock_idle_timeout = 3600; /* One hour */
 
 int    nfssvc_export(user_addr_t argp);
 int    nfssvc_nfsd(void);
@@ -166,6 +169,11 @@ SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLA
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
 #endif /* NFSCLIENT */
 
 #if NFSSERVER
@@ -184,11 +192,81 @@ SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOC
 #endif
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
 SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
+#ifdef NFS_UC_Q_DEBUG
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
+#endif
 #endif /* NFSSERVER */
 
 
 #if NFSCLIENT
 
+static int
+mapname2id(struct nfs_testmapid *map)
+{
+       int error;
+
+       error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
+       if (error)
+               return (error);
+
+       if (map->ntm_grpflag)
+               error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
+       else
+               error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
+
+       return (error);
+}
+
+static int
+mapid2name(struct nfs_testmapid *map)
+{
+       int error;
+       int len = sizeof(map->ntm_name);
+       
+       if (map->ntm_grpflag)
+               error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
+       else
+               error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
+
+       if (error)
+               return (error);
+       
+       error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
+
+       return (error);
+       
+}
+
+
+static int
+nfsclnt_testidmap(proc_t p, user_addr_t argp)
+{
+       struct nfs_testmapid mapid;
+       int error, coerror;
+               
+        /* Let root make this call. */
+       error = proc_suser(p);
+        if (error)
+                return (error);
+
+       error = copyin(argp, &mapid, sizeof(mapid));
+       if (error)
+               return (error);
+       if (mapid.ntm_name2id)
+               error = mapname2id(&mapid);
+       else
+               error = mapid2name(&mapid);
+
+       coerror = copyout(&mapid, argp, sizeof(mapid));
+
+       return (error ? error : coerror);
+}
+
 int
 nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
 {
@@ -204,12 +282,16 @@ nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
        case NFSCLNT_LOCKDNOTIFY:
                error = nfslockdnotify(p, uap->argp);
                break;
+       case NFSCLNT_TESTIDMAP:
+               error = nfsclnt_testidmap(p, uap->argp);
+               break;
        default:
                error = EINVAL;
        }
        return (error);
 }
 
+
 /*
  * Asynchronous I/O threads for client NFS.
  * They do read-ahead and write-behind operations on the block I/O cache.
@@ -326,6 +408,11 @@ nfsiod_continue(int error)
 
 worktodo:
        while ((nmp = niod->niod_nmp)) {
+               if (nmp == NULL){
+                       niod->niod_nmp = NULL;
+                       break;
+               }
+
                /* 
                 * Service this mount's async I/O queue.
                 *
@@ -339,6 +426,13 @@ worktodo:
                /* grab the current contents of the queue */
                TAILQ_INIT(&iodq);
                TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
+               /* Mark each iod request as being managed by an iod */
+               TAILQ_FOREACH(req, &iodq, r_achain) {
+                       lck_mtx_lock(&req->r_mtx);
+                       assert(!(req->r_flags & R_IOD));
+                       req->r_flags |= R_IOD;
+                       lck_mtx_unlock(&req->r_mtx);
+               }
                lck_mtx_unlock(nfsiod_mutex);
 
                /* process the queue */
@@ -352,8 +446,11 @@ worktodo:
                lck_mtx_lock(nfsiod_mutex);
                morework = !TAILQ_EMPTY(&nmp->nm_iodq);
                if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
-                       /* we're going to stop working on this mount */
-                       if (morework) /* mount still needs more work so queue it up */
+                       /* 
+                        * we're going to stop working on this mount but if the 
+                        * mount still needs more work so queue it up
+                        */
+                       if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST)
                                TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
                        nmp->nm_niod = NULL;
                        niod->niod_nmp = NULL;
@@ -364,6 +461,7 @@ worktodo:
        if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
                niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
                TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
+               niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
        }
        if (niod->niod_nmp)
                goto worktodo;
@@ -487,7 +585,7 @@ out:
        return (error);
 }
 
-extern struct fileops vnops;
+extern const struct fileops vnops;
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into
@@ -597,7 +695,6 @@ fhopen( proc_t p,
        fp = nfp;
 
        fp->f_fglob->fg_flag = fmode & FMASK;
-       fp->f_fglob->fg_type = DTYPE_VNODE;
        fp->f_fglob->fg_ops = &vnops;
        fp->f_fglob->fg_data = (caddr_t)vp;
 
@@ -613,7 +710,7 @@ fhopen( proc_t p,
                type = F_FLOCK;
                if ((fmode & FNONBLOCK) == 0)
                        type |= F_WAIT;
-               if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) {
+               if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
                        struct vfs_context context = *vfs_context_current();
                        /* Modify local copy (to not damage thread copy) */
                        context.vc_ucred = fp->f_fglob->fg_cred;
@@ -724,6 +821,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
 {
        struct nfsrv_sock *slp;
        int error = 0, sodomain, sotype, soprotocol, on = 1;
+       int first;
        struct timeval timeo;
 
        /* make sure mbuf constants are set up */
@@ -743,8 +841,12 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        }
 
        /* Set protocol options and reserve some space (for UDP). */
-       if (sotype == SOCK_STREAM)
+       if (sotype == SOCK_STREAM) {
+               error = nfsrv_check_exports_allow_address(mynam);
+               if (error)
+                       return (error);
                sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+       }
        if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
                sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
        if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
@@ -808,21 +910,72 @@ nfssvc_addsock(socket_t so, mbuf_t mynam)
        }
 
        /* add the socket to the list */
+       first = TAILQ_EMPTY(&nfsrv_socklist);
        TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+       if (soprotocol == IPPROTO_TCP) {
+               nfsrv_sock_tcp_cnt++;
+               if (nfsrv_sock_idle_timeout < 0)
+                       nfsrv_sock_idle_timeout = 0;
+               if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
+                       nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
+               /*
+                * Possibly start or stop the idle timer. We only start the idle timer when
+                * we have more than 2 * nfsd_thread_max connections. If the idle timer is
+                * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
+                * the number of connections.
+                */
+               if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
+                       if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+                               if (nfsrv_idlesock_timer_on) {
+                                       thread_call_cancel(nfsrv_idlesock_timer_call);
+                                       nfsrv_idlesock_timer_on = 0;
+                               }
+                       } else {
+                               struct nfsrv_sock *old_slp;
+                               struct timeval now;
+                               time_t time_to_wait = nfsrv_sock_idle_timeout;
+                               /*
+                                * Get the oldest tcp socket and calculate the
+                                * earliest time for the next idle timer to fire
+                                * based on the possibly updated nfsrv_sock_idle_timeout
+                                */
+                               TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
+                                       if (old_slp->ns_sotype == SOCK_STREAM) {
+                                               microuptime(&now);
+                                               time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
+                                               if (time_to_wait < 1)
+                                                       time_to_wait = 1;
+                                               break;
+                                       }
+                               }
+                               /*
+                                * If we have a timer scheduled, but if its going to fire too late,
+                                * turn it off.
+                                */
+                               if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
+                                       thread_call_cancel(nfsrv_idlesock_timer_call);
+                                       nfsrv_idlesock_timer_on = 0;
+                               }
+                               /* Schedule the idle thread if it isn't already */
+                               if (!nfsrv_idlesock_timer_on) {
+                                       nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+                                       nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+                               }
+                       }
+               }
+       }
 
        sock_retain(so); /* grab a retain count on the socket */
        slp->ns_so = so;
        slp->ns_sotype = sotype;
        slp->ns_nam = mynam;
 
-       /* set up the socket upcall */
-       sock_setupcall(so, nfsrv_rcv, slp);
-       /* just playin' it safe */
-       sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
+       /* set up the socket up-call */
+       nfsrv_uc_addsock(slp, first);
 
        /* mark that the socket is not in the nfsrv_sockwg list */
        slp->ns_wgq.tqe_next = SLPNOLIST;
-       
+
        slp->ns_flag = SLP_VALID | SLP_NEEDQ;
 
        nfsrv_wakenfsd(slp);
@@ -878,6 +1031,7 @@ nfssvc_nfsd(void)
        u_quad_t cur_usec;
        struct timeval now;
        struct vfs_context context;
+       struct timespec to;
 
 #ifndef nolint
        cacherep = RC_DOIT;
@@ -891,11 +1045,16 @@ nfssvc_nfsd(void)
        lck_mtx_lock(nfsd_mutex);
        if (nfsd_thread_count++ == 0)
                nfsrv_initcache();              /* Init the server request cache */
+       
        TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
        lck_mtx_unlock(nfsd_mutex);
 
        context.vc_thread = current_thread();
 
+       /* Set time out so that nfsd threads can wake up a see if they are still needed. */
+       to.tv_sec = 5;
+       to.tv_nsec = 0;
+
        /*
         * Loop getting rpc requests until SIGKILL.
         */
@@ -923,12 +1082,14 @@ nfssvc_nfsd(void)
                                }
                                nfsd->nfsd_flag |= NFSD_WAITING;
                                TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
-                               error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
+                               error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
                                if (error) {
                                        if (nfsd->nfsd_flag & NFSD_WAITING) {
                                                TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
                                                nfsd->nfsd_flag &= ~NFSD_WAITING;
                                        }
+                                       if (error == EWOULDBLOCK)
+                                               continue;
                                        goto done;
                                }
                        }
@@ -962,6 +1123,11 @@ nfssvc_nfsd(void)
                        if (!nfsd->nfsd_slp && slp) {
                                /* we found a socket to work on, grab a reference */
                                slp->ns_sref++;
+                               microuptime(&now);
+                               slp->ns_timestamp = now.tv_sec;
+                               /* We keep the socket list in least recently used order for reaping idle sockets */
+                               TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+                               TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
                                nfsd->nfsd_slp = slp;
                                opcnt = 0;
                                /* and put it at the back of the work queue */
@@ -1083,7 +1249,7 @@ nfssvc_nfsd(void)
 
                        }
                        if (error) {
-                               OSAddAtomic(1, &nfsstats.srv_errs);
+                               OSAddAtomic64(1, &nfsstats.srv_errs);
                                nfsrv_updatecache(nd, FALSE, mrep);
                                if (nd->nd_nam2) {
                                        mbuf_freem(nd->nd_nam2);
@@ -1091,7 +1257,7 @@ nfssvc_nfsd(void)
                                }
                                break;
                        }
-                       OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
+                       OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
                        nfsrv_updatecache(nd, TRUE, mrep);
                        /* FALLTHRU */
 
@@ -1281,16 +1447,13 @@ nfsrv_zapsock(struct nfsrv_sock *slp)
        if (so == NULL)
                return;
 
+       sock_setupcall(so, NULL, NULL);
+       sock_shutdown(so, SHUT_RDWR);
+
        /*
-        * Attempt to deter future upcalls, but leave the
-        * upcall info in place to avoid a race with the
-        * networking code.
+        * Remove from the up-call queue
         */
-       socket_lock(so, 1);
-       so->so_rcv.sb_flags &= ~SB_UPCALL;
-       socket_unlock(so, 1);
-
-       sock_shutdown(so, SHUT_RDWR);
+       nfsrv_uc_dequeue(slp);
 }
 
 /*
@@ -1341,12 +1504,9 @@ nfsrv_slpfree(struct nfsrv_sock *slp)
  * Derefence a server socket structure. If it has no more references and
  * is no longer valid, you can throw it away.
  */
-void
-nfsrv_slpderef(struct nfsrv_sock *slp)
+static void
+nfsrv_slpderef_locked(struct nfsrv_sock *slp)
 {
-       struct timeval now;
-
-       lck_mtx_lock(nfsd_mutex);
        lck_rw_lock_exclusive(&slp->ns_rwlock);
        slp->ns_sref--;
 
@@ -1360,7 +1520,6 @@ nfsrv_slpderef(struct nfsrv_sock *slp)
                        slp->ns_flag &= ~SLP_QUEUED;
                }
                lck_rw_done(&slp->ns_rwlock);
-               lck_mtx_unlock(nfsd_mutex);
                return;
        }
 
@@ -1373,66 +1532,88 @@ nfsrv_slpderef(struct nfsrv_sock *slp)
                        TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
                slp->ns_flag &= ~SLP_QUEUED;
        }
+       lck_rw_done(&slp->ns_rwlock);
 
-       /*
-        * Queue the socket up for deletion
-        * and start the timer to delete it
-        * after it has been in limbo for
-        * a while.
-        */
-       microuptime(&now);
-       slp->ns_timestamp = now.tv_sec;
        TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
-       TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
-       if (!nfsrv_deadsock_timer_on) {
-               nfsrv_deadsock_timer_on = 1;
-               nfs_interval_timer_start(nfsrv_deadsock_timer_call,
-                               NFSRV_DEADSOCKDELAY * 1000);
-       }
+       if (slp->ns_sotype == SOCK_STREAM)
+               nfsrv_sock_tcp_cnt--;
 
-       lck_rw_done(&slp->ns_rwlock);
        /* now remove from the write gather socket list */ 
        if (slp->ns_wgq.tqe_next != SLPNOLIST) {
                TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
                slp->ns_wgq.tqe_next = SLPNOLIST;
        }
+       nfsrv_slpfree(slp);
+}
+
+void
+nfsrv_slpderef(struct nfsrv_sock *slp)
+{
+       lck_mtx_lock(nfsd_mutex);
+       nfsrv_slpderef_locked(slp);
        lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
- * Check periodically for dead sockets pending delete.
- * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
- * seconds then we assume it's safe to free.
+ * Check periodically for idle sockest if needed and
+ * zap them.
  */
 void
-nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
+nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
 {
-       struct nfsrv_sock *slp;
+       struct nfsrv_sock *slp, *tslp;
        struct timeval now;
-       time_t time_to_wait;
+       time_t time_to_wait = nfsrv_sock_idle_timeout;
 
        microuptime(&now);
        lck_mtx_lock(nfsd_mutex);
 
-       while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
-               if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
-                       break;
-               TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
-               nfsrv_slpfree(slp);
-       }
-       if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
-               nfsrv_deadsock_timer_on = 0;
+       /* Turn off the timer if we're suppose to and get out */
+       if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
+           nfsrv_sock_idle_timeout = 0;
+       if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
+               nfsrv_idlesock_timer_on = 0;
                lck_mtx_unlock(nfsd_mutex);
                return;
        }
-       time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
-       if (time_to_wait < 1)
-               time_to_wait = 1;
 
-       lck_mtx_unlock(nfsd_mutex);
+       TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
+               lck_rw_lock_exclusive(&slp->ns_rwlock);
+               /* Skip udp and referenced sockets */
+               if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
+                       lck_rw_done(&slp->ns_rwlock);
+                       continue;
+               }
+               /*
+                * If this is the first non-referenced socket that hasn't idle out,
+                * use its time stamp to calculate the earlist time in the future
+                * to start the next invocation of the timer. Since the nfsrv_socklist
+                * is sorted oldest access to newest. Once we find the first one,
+                * we're done and break out of the loop.
+                */
+               if (((slp->ns_timestamp + nfsrv_sock_idle_timeout)  >  now.tv_sec) ||
+                       nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+                       time_to_wait -= now.tv_sec - slp->ns_timestamp;
+                       if (time_to_wait < 1)
+                               time_to_wait = 1;
+                       lck_rw_done(&slp->ns_rwlock);
+                       break;
+               }
+               /*
+                * Bump the ref count. nfsrv_slpderef below will destroy
+                * the socket, since nfsrv_zapsock has closed it.
+                */
+               slp->ns_sref++;
+               nfsrv_zapsock(slp);
+               lck_rw_done(&slp->ns_rwlock);
+               nfsrv_slpderef_locked(slp);
+       }
 
-       nfs_interval_timer_start(nfsrv_deadsock_timer_call,
-               time_to_wait * 1000);
+       /* Start ourself back up */
+       nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+       /* Remember when the next timer will fire for nfssvc_addsock. */
+       nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+       lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
@@ -1451,33 +1632,14 @@ nfsrv_cleanup(void)
        microuptime(&now);
        for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
                nslp = TAILQ_NEXT(slp, ns_chain);
-               if (slp->ns_flag & SLP_VALID) {
-                       lck_rw_lock_exclusive(&slp->ns_rwlock);
+               lck_rw_lock_exclusive(&slp->ns_rwlock);
+               slp->ns_sref++;
+               if (slp->ns_flag & SLP_VALID)
                        nfsrv_zapsock(slp);
-                       lck_rw_done(&slp->ns_rwlock);
-               }
-               if (slp->ns_flag & SLP_QUEUED) {
-                       if (slp->ns_flag & SLP_WAITQ)
-                               TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
-                       else
-                               TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
-                       slp->ns_flag &= ~SLP_QUEUED;
-               }
-               if (slp->ns_wgq.tqe_next != SLPNOLIST) {
-                       TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
-                       slp->ns_wgq.tqe_next = SLPNOLIST;
-               }
-               /* queue the socket up for deletion */
-               slp->ns_timestamp = now.tv_sec;
-               TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
-               TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
-               if (!nfsrv_deadsock_timer_on) {
-                       nfsrv_deadsock_timer_on = 1;
-                       nfs_interval_timer_start(nfsrv_deadsock_timer_call,
-                               NFSRV_DEADSOCKDELAY * 1000);
-               }
+               lck_rw_done(&slp->ns_rwlock);
+               nfsrv_slpderef_locked(slp);
        }
-
+#
 #if CONFIG_FSE
        /*
         * Flush pending file write fsevents
@@ -1506,6 +1668,8 @@ nfsrv_cleanup(void)
        lck_mtx_unlock(nfsrv_fmod_mutex);
 #endif
 
+       nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
+       
        nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
 
        nfsrv_cleancache();     /* And clear out server cache */