]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/nfs/nfs_syscalls.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_syscalls.c
index ec39c5f2d568a9d4d10c369e1c01357247ab4085..d4dead82571dde57f5932914879ab7fe3ea043b6 100644 (file)
@@ -1,31 +1,29 @@
 /*
- * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
+ * Copyright (c) 2000-2014 Apple Inc.  All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This file contains Original Code and/or Modifications of Original Code 
- * as defined in and that are subject to the Apple Public Source License 
- * Version 2.0 (the 'License'). You may not use this file except in 
- * compliance with the License.  The rights granted to you under the 
- * License may not be used to create, or enable the creation or 
- * redistribution of, unlawful or unlicensed copies of an Apple operating 
- * system, or to circumvent, violate, or enable the circumvention or 
- * violation of, any terms of an Apple operating system software license 
- * agreement.
- *
- * Please obtain a copy of the License at 
- * http://www.opensource.apple.com/apsl/ and read it before using this 
- * file.
- *
- * The Original Code and all software distributed under the License are 
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
- * Please see the License for the specific language governing rights and 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
  * limitations under the License.
- *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
 /*
  *     @(#)nfs_syscalls.c      8.5 (Berkeley) 3/30/95
  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
  */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
 
 #include <sys/param.h>
 #include <sys/systm.h>
-/* XXX CSM 11/25/97 FreeBSD's generated syscall prototypes */
-#ifdef notyet
-#include <sys/sysproto.h>
-#endif
 #include <sys/kernel.h>
 #include <sys/file_internal.h>
 #include <sys/filedesc.h>
 #include <sys/user.h>
 #include <sys/sysproto.h>
 #include <sys/kpi_socket.h>
+#include <sys/fsevents.h>
 #include <libkern/OSAtomic.h>
+#include <kern/thread_call.h>
+#include <kern/task.h>
 
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
-#if ISO
-#include <netiso/iso.h>
-#endif
 #include <nfs/xdr_subs.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfsrvcache.h>
+#include <nfs/nfs_gss.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nfsnode.h>
-#include <nfs/nfsrtt.h>
 #include <nfs/nfs_lock.h>
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+kern_return_t  thread_terminate(thread_t); /* XXX */
+
+#if NFSSERVER
+
+extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
+                                           struct nfsrv_sock *slp,
+                                           vfs_context_t ctx,
+                                           mbuf_t *mrepp);
+extern int nfsrv_wg_delay;
+extern int nfsrv_wg_delay_v3;
+
+static int nfsrv_require_resv_port = 0;
+static time_t  nfsrv_idlesock_timer_on = 0;
+static int nfsrv_sock_tcp_cnt = 0;
+#define NFSD_MIN_IDLE_TIMEOUT 30
+static int nfsrv_sock_idle_timeout = 3600; /* One hour */
 
-extern void unix_syscall_return(int);
-
-/* Global defs. */
-extern int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
-                                           struct nfssvc_sock *slp,
-                                           proc_t procp,
-                                           mbuf_t *mreqp);
-extern int nfs_numasync;
-extern int nfs_ioddelwri;
-extern int nfsrtton;
-extern struct nfsstats nfsstats;
-extern int nfsrvw_procrastinate;
-extern int nfsrvw_procrastinate_v3;
-
-struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
-static int nuidhash_max = NFS_MAXUIDHASH;
-
-static void    nfsrv_zapsock(struct nfssvc_sock *slp);
-static int     nfssvc_iod(proc_t);
-static int     nfskerb_clientd(struct nfsmount *, struct nfsd_cargs *, int, user_addr_t, proc_t);
-
-static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
-
-#ifndef NFS_NOSERVER
-int nfsd_waiting = 0;
-static struct nfsdrt nfsdrt;
-int nfs_numnfsd = 0;
-static void    nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep);
-static int     nfssvc_addsock(socket_t, mbuf_t, proc_t);
-static int     nfssvc_nfsd(struct nfsd_srvargs *,user_addr_t, proc_t);
-static int     nfssvc_export(user_addr_t, proc_t);
-
-static int nfs_privport = 0;
-/* XXX CSM 11/25/97 Upgrade sysctl.h someday */
-#ifdef notyet
-SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
+int    nfssvc_export(user_addr_t argp);
+int    nfssvc_nfsd(void);
+int    nfssvc_addsock(socket_t, mbuf_t);
+void   nfsrv_zapsock(struct nfsrv_sock *);
+void   nfsrv_slpderef(struct nfsrv_sock *);
+void   nfsrv_slpfree(struct nfsrv_sock *);
+
+#endif /* NFSSERVER */
+
+/*
+ * sysctl stuff
+ */
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
+
+#if NFSCLIENT
+SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
+SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_domain, sizeof(nfs4_domain), "");
+#endif /* NFSCLIENT */
+
+#if NFSSERVER
+SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
+#if CONFIG_FSE
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
 #endif
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
+#ifdef NFS_UC_Q_DEBUG
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
+#endif
+#endif /* NFSSERVER */
+
+
+#if NFSCLIENT
+
+static int
+mapname2id(struct nfs_testmapid *map)
+{
+       int error;
+
+       error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
+       if (error)
+               return (error);
+
+       if (map->ntm_grpflag)
+               error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
+       else
+               error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
+
+       return (error);
+}
+
+static int
+mapid2name(struct nfs_testmapid *map)
+{
+       int error;
+       int len = sizeof(map->ntm_name);
+       
+       if (map->ntm_grpflag)
+               error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
+       else
+               error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
+
+       if (error)
+               return (error);
+       
+       error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
+
+       return (error);
+       
+}
+
+
+static int
+nfsclnt_testidmap(proc_t p, user_addr_t argp)
+{
+       struct nfs_testmapid mapid;
+       int error, coerror;
+               
+        /* Let root make this call. */
+       error = proc_suser(p);
+        if (error)
+                return (error);
+
+       error = copyin(argp, &mapid, sizeof(mapid));
+       if (error)
+               return (error);
+       if (mapid.ntm_name2id)
+               error = mapname2id(&mapid);
+       else
+               error = mapid2name(&mapid);
+
+       coerror = copyout(&mapid, argp, sizeof(mapid));
+
+       return (error ? error : coerror);
+}
+
+int
+nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
+{
+       struct lockd_ans la;
+       int error;
+
+       switch (uap->flag) {
+       case NFSCLNT_LOCKDANS:
+               error = copyin(uap->argp, &la, sizeof(la));
+               if (!error)
+                       error = nfslockdans(p, &la);
+               break;
+       case NFSCLNT_LOCKDNOTIFY:
+               error = nfslockdnotify(p, uap->argp);
+               break;
+       case NFSCLNT_TESTIDMAP:
+               error = nfsclnt_testidmap(p, uap->argp);
+               break;
+       default:
+               error = EINVAL;
+       }
+       return (error);
+}
+
+
+/*
+ * Asynchronous I/O threads for client NFS.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ *
+ * The pool of up to nfsiod_thread_max threads is launched on demand and exit
+ * when unused for a while.  There are as many nfsiod structs as there are
+ * nfsiod threads; however there's no strict tie between a thread and a struct.
+ * Each thread puts an nfsiod on the free list and sleeps on it.  When it wakes
+ * up, it removes the next struct nfsiod from the queue and services it.  Then
+ * it will put the struct at the head of free list and sleep on it.
+ * Async requests will pull the next struct nfsiod from the head of the free list,
+ * put it on the work queue, and wake whatever thread is waiting on that struct.
+ */
+
+/*
+ * nfsiod thread exit routine
+ *
+ * Must be called with nfsiod_mutex held so that the
+ * decision to terminate is atomic with the termination.
+ */
+void
+nfsiod_terminate(struct nfsiod *niod)
+{
+       nfsiod_thread_count--;
+       lck_mtx_unlock(nfsiod_mutex);
+       if (niod)
+               FREE(niod, M_TEMP);
+       else
+               printf("nfsiod: terminating without niod\n");
+       thread_terminate(current_thread());
+       /*NOTREACHED*/
+}
+
+/* nfsiod thread startup routine */
+void
+nfsiod_thread(void)
+{
+       struct nfsiod *niod;
+       int error;
+
+       MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
+       if (!niod) {
+               lck_mtx_lock(nfsiod_mutex);
+               nfsiod_thread_count--;
+               wakeup(current_thread());
+               lck_mtx_unlock(nfsiod_mutex);
+               thread_terminate(current_thread());
+               /*NOTREACHED*/
+       }
+       bzero(niod, sizeof(*niod));
+       lck_mtx_lock(nfsiod_mutex);
+       TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
+       wakeup(current_thread());
+       error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
+       /* shouldn't return... so we have an error */
+       /* remove an old nfsiod struct and terminate */
+       lck_mtx_lock(nfsiod_mutex);
+       if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+               TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+       nfsiod_terminate(niod);
+       /*NOTREACHED*/
+}
+
+/*
+ * Start up another nfsiod thread.
+ * (unless we're already maxed out and there are nfsiods running)
+ */
+int
+nfsiod_start(void)
+{
+       thread_t thd = THREAD_NULL;
+
+       lck_mtx_lock(nfsiod_mutex);
+       if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
+               lck_mtx_unlock(nfsiod_mutex);
+               return (EBUSY);
+       }
+       nfsiod_thread_count++;
+       if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
+               lck_mtx_unlock(nfsiod_mutex);
+               return (EBUSY);
+       }
+       /* wait for the thread to complete startup */
+       msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
+       thread_deallocate(thd);
+       return (0);
+}
+
+/*
+ * Continuation for Asynchronous I/O threads for NFS client.
+ *
+ * Grab an nfsiod struct to work on, do some work, then drop it
+ */
+int
+nfsiod_continue(int error)
+{
+       struct nfsiod *niod;
+       struct nfsmount *nmp;
+       struct nfsreq *req, *treq;
+       struct nfs_reqqhead iodq;
+       int morework;
+
+       lck_mtx_lock(nfsiod_mutex);
+       niod = TAILQ_FIRST(&nfsiodwork);
+       if (!niod) {
+               /* there's no work queued up */
+               /* remove an old nfsiod struct and terminate */
+               if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+                       TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+               nfsiod_terminate(niod);
+               /*NOTREACHED*/
+       }
+       TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
+
+worktodo:
+       while ((nmp = niod->niod_nmp)) {
+               if (nmp == NULL){
+                       niod->niod_nmp = NULL;
+                       break;
+               }
+
+               /* 
+                * Service this mount's async I/O queue.
+                *
+                * In order to ensure some level of fairness between mounts,
+                * we grab all the work up front before processing it so any
+                * new work that arrives will be serviced on a subsequent
+                * iteration - and we have a chance to see if other work needs
+                * to be done (e.g. the delayed write queue needs to be pushed
+                * or other mounts are waiting for an nfsiod).
+                */
+               /* grab the current contents of the queue */
+               TAILQ_INIT(&iodq);
+               TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
+               /* Mark each iod request as being managed by an iod */
+               TAILQ_FOREACH(req, &iodq, r_achain) {
+                       lck_mtx_lock(&req->r_mtx);
+                       assert(!(req->r_flags & R_IOD));
+                       req->r_flags |= R_IOD;
+                       lck_mtx_unlock(&req->r_mtx);
+               }
+               lck_mtx_unlock(nfsiod_mutex);
+
+               /* process the queue */
+               TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
+                       TAILQ_REMOVE(&iodq, req, r_achain);
+                       req->r_achain.tqe_next = NFSREQNOLIST;
+                       req->r_callback.rcb_func(req);
+               }
+
+               /* now check if there's more/other work to be done */
+               lck_mtx_lock(nfsiod_mutex);
+               morework = !TAILQ_EMPTY(&nmp->nm_iodq);
+               if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
+                       /* 
+                        * we're going to stop working on this mount but if the 
+                        * mount still needs more work so queue it up
+                        */
+                       if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST)
+                               TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
+                       nmp->nm_niod = NULL;
+                       niod->niod_nmp = NULL;
+               }
+       }
+
+       /* loop if there's still a mount to work on */
+       if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
+               niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
+               TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
+               niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
+       }
+       if (niod->niod_nmp)
+               goto worktodo;
+
+       /* queue ourselves back up - if there aren't too many threads running */
+       if (nfsiod_thread_count <= NFSIOD_MAX) {
+               TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
+               error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
+               /* shouldn't return... so we have an error */
+               /* remove an old nfsiod struct and terminate */
+               lck_mtx_lock(nfsiod_mutex);
+               if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+                       TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+       }
+       nfsiod_terminate(niod);
+       /*NOTREACHED*/
+       return (0);
+}
+
+#endif /* NFSCLIENT */
+
+
+#if NFSSERVER
 
 /*
  * NFS server system calls
@@ -169,17 +499,13 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
 {
        vnode_t vp;
        struct nfs_filehandle nfh;
-       int error;
+       int error, fhlen, fidlen;
        struct nameidata nd;
-       struct vfs_context context;
        char path[MAXPATHLEN], *ptr;
-       u_int pathlen;
+       size_t pathlen;
        struct nfs_exportfs *nxfs;
        struct nfs_export *nx;
 
-       context.vc_proc = p;
-       context.vc_ucred = kauth_cred_get();
-
        /*
         * Must be super user
         */
@@ -187,12 +513,21 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
        if (error)
                return (error);
 
-       error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
+       error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
+       if (!error)
+               error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
        if (error)
                return (error);
+       /* limit fh size to length specified (or v3 size by default) */
+       if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
+               fhlen = NFSV3_MAX_FH_SIZE;
+       fidlen = fhlen - sizeof(struct nfs_exphandle);
+
+       if (!nfsrv_is_initialized())
+               return (EINVAL);
 
-       NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 
-                       UIO_SYSSPACE, path, &context);
+       NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 
+                       UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
        error = namei(&nd);
        if (error)
                return (error);
@@ -201,10 +536,10 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
        vp = nd.ni_vp;
 
        // find exportfs that matches f_mntonname
-       lck_rw_lock_shared(&nfs_export_rwlock);
+       lck_rw_lock_shared(&nfsrv_export_rwlock);
        ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
-       LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) {
-               if (!strcmp(nxfs->nxfs_path, ptr))
+       LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
+               if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
                        break;
        }
        if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
@@ -228,30 +563,29 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
        }
 
        bzero(&nfh, sizeof(nfh));
-       nfh.nfh_xh.nxh_version = NFS_FH_VERSION;
-       nfh.nfh_xh.nxh_fsid = nxfs->nxfs_id;
-       nfh.nfh_xh.nxh_expid = nx->nx_id;
+       nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
+       nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
+       nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
        nfh.nfh_xh.nxh_flags = 0;
        nfh.nfh_xh.nxh_reserved = 0;
-       nfh.nfh_len = NFS_MAX_FID_SIZE;
-       error = VFS_VPTOFH(vp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL);
-       if (nfh.nfh_len > (int)NFS_MAX_FID_SIZE)
+       nfh.nfh_len = fidlen;
+       error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
+       if (nfh.nfh_len > (uint32_t)fidlen)
                error = EOVERFLOW;
        nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
        nfh.nfh_len += sizeof(nfh.nfh_xh);
+       nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
 
 out:
-       lck_rw_done(&nfs_export_rwlock);
+       lck_rw_done(&nfsrv_export_rwlock);
        vnode_put(vp);
        if (error)
                return (error);
-       error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
+       error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
        return (error);
 }
 
-#endif /* NFS_NOSERVER */
-
-extern struct fileops vnops;
+extern const struct fileops vnops;
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into
@@ -263,7 +597,7 @@ extern struct fileops vnops;
 int
 fhopen( proc_t p,
        struct fhopen_args *uap,
-       register_t *retval)
+       int32_t *retval)
 {
        vnode_t vp;
        struct nfs_filehandle nfh;
@@ -273,19 +607,20 @@ fhopen( proc_t p,
        struct fileproc *fp, *nfp;
        int fmode, error, type;
        int indx;
-       kauth_cred_t cred = proc_ucred(p);
-       struct vfs_context context;
+       vfs_context_t ctx = vfs_context_current();
        kauth_action_t action;
 
-       context.vc_proc = p;
-       context.vc_ucred = cred;
-
        /*
         * Must be super user
         */
-       error = suser(cred, 0);
-       if (error)
+       error = suser(vfs_context_ucred(ctx), 0);
+       if (error) {
                return (error);
+       }
+
+       if (!nfsrv_is_initialized()) {
+               return (EINVAL);
+       }
 
        fmode = FFLAGS(uap->flags);
        /* why not allow a non-read/write open for our lockd? */
@@ -296,18 +631,22 @@ fhopen( proc_t p,
        if (error)
                return (error);
        if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
-           (nfh.nfh_len > (int)NFS_MAX_FH_SIZE))
+           (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
                return (EINVAL);
        error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
        if (error)
                return (error);
+       nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
 
-       lck_rw_lock_shared(&nfs_export_rwlock);
+       lck_rw_lock_shared(&nfsrv_export_rwlock);
        /* now give me my vnode, it gets returned to me with a reference */
-       error = nfsrv_fhtovp(&nfh, NULL, TRUE, &vp, &nx, &nxo);
-       lck_rw_done(&nfs_export_rwlock);
-       if (error)
+       error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
+       lck_rw_done(&nfsrv_export_rwlock);
+       if (error) {
+               if (error == NFSERR_TRYLATER)
+                       error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
                return (error);
+       }
 
        /*
         * From now on we have to make sure not
@@ -330,18 +669,23 @@ fhopen( proc_t p,
                goto bad;
        }
 
+#if CONFIG_MACF
+       if ((error = mac_vnode_check_open(ctx, vp, fmode)))
+               goto bad;
+#endif
+
        /* compute action to be authorized */
        action = 0;
        if (fmode & FREAD)
                action |= KAUTH_VNODE_READ_DATA;
        if (fmode & (FWRITE | O_TRUNC))
                action |= KAUTH_VNODE_WRITE_DATA;
-       if ((error = vnode_authorize(vp, NULL, action, &context)) != 0)
+       if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
                goto bad;
 
-       if ((error = VNOP_OPEN(vp, fmode, &context)))
+       if ((error = VNOP_OPEN(vp, fmode, ctx)))
                goto bad;
-       if ((error = vnode_ref_ext(vp, fmode)))
+       if ((error = vnode_ref_ext(vp, fmode, 0)))
                goto bad;
 
        /*
@@ -349,14 +693,13 @@ fhopen( proc_t p,
         */
 
        // starting here... error paths should call vn_close/vnode_put
-       if ((error = falloc(p, &nfp, &indx)) != 0) {
-               vn_close(vp, fmode & FMASK, cred, p);
+       if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
+               vn_close(vp, fmode & FMASK, ctx);
                goto bad;
        }
        fp = nfp;
 
        fp->f_fglob->fg_flag = fmode & FMASK;
-       fp->f_fglob->fg_type = DTYPE_VNODE;
        fp->f_fglob->fg_ops = &vnops;
        fp->f_fglob->fg_data = (caddr_t)vp;
 
@@ -372,8 +715,12 @@ fhopen( proc_t p,
                type = F_FLOCK;
                if ((fmode & FNONBLOCK) == 0)
                        type |= F_WAIT;
-               if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, &context))) {
-                       vn_close(vp, fp->f_fglob->fg_flag, fp->f_fglob->fg_cred, p);
+               if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
+                       struct vfs_context context = *vfs_context_current();
+                       /* Modify local copy (to not damage thread copy) */
+                       context.vc_ucred = fp->f_fglob->fg_cred;
+
+                       vn_close(vp, fp->f_fglob->fg_flag, &context);
                        fp_free(p, indx, fp);
                        return (error);
                }
@@ -383,7 +730,7 @@ fhopen( proc_t p,
        vnode_put(vp);
 
        proc_fdlock(p);
-       *fdflags(p, indx) &= ~UF_RESERVED;
+       procfdtbl_releasefd(p, indx, NULL);
        fp_drop(p, indx, fp, 1);
        proc_fdunlock(p);
 
@@ -396,76 +743,33 @@ bad:
 }
 
 /*
- * Nfs server psuedo system call for the nfsd's
- * Based on the flag value it either:
- * - adds a socket to the selection list
- * - remains in the kernel as an nfsd
- * - remains in the kernel as an nfsiod
+ * NFS server pseudo system call
  */
 int
 nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
 {
-#ifndef NFS_NOSERVER
-       struct nameidata nd;
        mbuf_t nam;
        struct user_nfsd_args user_nfsdarg;
-       struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
-       struct nfsd_cargs ncd;
-       struct nfsd *nfsd;
-       struct nfssvc_sock *slp;
-       struct nfsuid *nuidp;
-       struct nfsmount *nmp;
-       struct timeval now;
        socket_t so;
-       struct vfs_context context;
-       struct ucred temp_cred;
-#endif /* NFS_NOSERVER */
        int error;
 
        AUDIT_ARG(cmd, uap->flag);
 
        /*
-        * Must be super user
+        * Must be super user for most operations (export ops checked later).
         */
-       error = proc_suser(p);
-       if(error)
+       if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
                return (error);
-       if (uap->flag & NFSSVC_BIOD)
-               error = nfssvc_iod(p);
-#ifdef NFS_NOSERVER
-       else
-               error = ENXIO;
-#else /* !NFS_NOSERVER */
-       else if (uap->flag & NFSSVC_MNTD) {
-
-               context.vc_proc = p;
-               context.vc_ucred = kauth_cred_get();
-
-               error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
-               if (error)
-                       return (error);
-
-               NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 
-                       (proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
-                       CAST_USER_ADDR_T(ncd.ncd_dirp), &context);
-               error = namei(&nd);
-               if (error)
-                       return (error);
-               nameidone(&nd);
+#if CONFIG_MACF
+       error = mac_system_check_nfsd(kauth_cred_get());
+       if (error)
+               return (error);
+#endif
 
-               if (vnode_isvroot(nd.ni_vp) == 0)
-                       error = EINVAL;
-               nmp = VFSTONFS(vnode_mount(nd.ni_vp));
-               vnode_put(nd.ni_vp);
-               if (error)
-                       return (error);
+       /* make sure NFS server data structures have been initialized */
+       nfsrv_init();
 
-               if ((nmp->nm_state & NFSSTA_MNTD) &&
-                       (uap->flag & NFSSVC_GOTAUTH) == 0)
-                       return (0);
-               nmp->nm_state |= NFSSTA_MNTD;
-               error = nfskerb_clientd(nmp, &ncd, uap->flag, uap->argp, p);
-       } else if (uap->flag & NFSSVC_ADDSOCK) {
+       if (uap->flag & NFSSVC_ADDSOCK) {
                if (IS_64BIT_PROCESS(p)) {
                        error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
                } else {
@@ -499,332 +803,183 @@ nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
                 * to keep the socket from being closed when nfsd closes its
                 * file descriptor for it.
                 */
-               error = nfssvc_addsock(so, nam, p);
+               error = nfssvc_addsock(so, nam);
                /* drop the iocount file_socket() grabbed on the file descriptor */
                file_drop(user_nfsdarg.sock);
        } else if (uap->flag & NFSSVC_NFSD) {
-               error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
-               if (error)
-                       return (error);
-
-               if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
-                       (nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
-                       slp = nfsd->nfsd_slp;
-
-                       /*
-                        * First check to see if another nfsd has already
-                        * added this credential.
-                        */
-                       for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
-                           nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
-                               if (kauth_cred_getuid(nuidp->nu_cr) == nsd->nsd_cr.cr_uid &&
-                                   (!nfsd->nfsd_nd->nd_nam2 ||
-                                    netaddr_match(NU_NETFAM(nuidp),
-                                    &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
-                                       break;
-                       }
-                       if (nuidp) {
-                           nfsrv_setcred(nuidp->nu_cr,nfsd->nfsd_nd->nd_cr);
-                           nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
-                       } else {
-                           /*
-                            * Nope, so we will.
-                            */
-                           if (slp->ns_numuids < nuidhash_max) {
-                               slp->ns_numuids++;
-                               nuidp = (struct nfsuid *)
-                                  _MALLOC_ZONE(sizeof (struct nfsuid),
-                                                       M_NFSUID, M_WAITOK);
-                           } else
-                               nuidp = (struct nfsuid *)0;
-                           if ((slp->ns_flag & SLP_VALID) == 0) {
-                               if (nuidp) {
-                                   FREE_ZONE((caddr_t)nuidp,
-                                       sizeof (struct nfsuid), M_NFSUID);
-                                   slp->ns_numuids--;
-                               }
-                           } else {
-                               if (nuidp == (struct nfsuid *)0) {
-                                   nuidp = slp->ns_uidlruhead.tqh_first;
-                                   if (!nuidp)
-                                       return (ENOMEM);
-                                   LIST_REMOVE(nuidp, nu_hash);
-                                   TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
-                                       nu_lru);
-                                   if (nuidp->nu_flag & NU_NAM)
-                                       mbuf_freem(nuidp->nu_nam);
-                                   kauth_cred_rele(nuidp->nu_cr);
-                               }
-                               nuidp->nu_flag = 0;
-
-                               if (nsd->nsd_cr.cr_ngroups > NGROUPS)
-                                   nsd->nsd_cr.cr_ngroups = NGROUPS;
-
-                               nfsrv_setcred(&nsd->nsd_cr, &temp_cred);
-                               nuidp->nu_cr = kauth_cred_create(&temp_cred);
-
-                               if (!nuidp->nu_cr) {
-                                       FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID);
-                                       slp->ns_numuids--;
-                                       return (ENOMEM);
-                               }
-                               nuidp->nu_timestamp = nsd->nsd_timestamp;
-                               microtime(&now);
-                               nuidp->nu_expire = now.tv_sec + nsd->nsd_ttl;
-                               /*
-                                * and save the session key in nu_key.
-                                */
-                               bcopy(nsd->nsd_key, nuidp->nu_key,
-                                   sizeof (nsd->nsd_key));
-                               if (nfsd->nfsd_nd->nd_nam2) {
-                                   struct sockaddr_in *saddr;
-
-                                   saddr = mbuf_data(nfsd->nfsd_nd->nd_nam2);
-                                   switch (saddr->sin_family) {
-                                   case AF_INET:
-                                       nuidp->nu_flag |= NU_INETADDR;
-                                       nuidp->nu_inetaddr =
-                                            saddr->sin_addr.s_addr;
-                                       break;
-                                   case AF_ISO:
-                                   default:
-                                       nuidp->nu_flag |= NU_NAM;
-                                       error = mbuf_copym(nfsd->nfsd_nd->nd_nam2, 0,
-                                                       MBUF_COPYALL, MBUF_WAITOK,
-                                                       &nuidp->nu_nam);
-                                       if (error) {
-                                               kauth_cred_rele(nuidp->nu_cr);
-                                               FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID);
-                                               slp->ns_numuids--;
-                                               return (error);
-                                       }
-                                       break;
-                                   };
-                               }
-                               TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
-                                       nu_lru);
-                               LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
-                                       nuidp, nu_hash);
-                               nfsrv_setcred(nuidp->nu_cr,
-                                   nfsd->nfsd_nd->nd_cr);
-                               nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
-                           }
-                       }
-               }
-               if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
-                       nfsd->nfsd_flag |= NFSD_AUTHFAIL;
-               error = nfssvc_nfsd(nsd, uap->argp, p);
+               error = nfssvc_nfsd();
        } else if (uap->flag & NFSSVC_EXPORT) {
-               error = nfssvc_export(uap->argp, p);
+               error = nfssvc_export(uap->argp);
        } else {
                error = EINVAL;
        }
-#endif /* NFS_NOSERVER */
        if (error == EINTR || error == ERESTART)
                error = 0;
        return (error);
 }
 
-/*
- * NFSKERB client helper daemon.
- * Gets authorization strings for "kerb" mounts.
- */
-static int
-nfskerb_clientd(
-       struct nfsmount *nmp,
-       struct nfsd_cargs *ncd,
-       int flag,
-       user_addr_t argp,
-       proc_t p)
-{
-       struct nfsuid *nuidp, *nnuidp;
-       int error = 0;
-       struct nfsreq *rp;
-       struct timeval now;
-
-       /*
-        * First initialize some variables
-        */
-       microtime(&now);
-
-       /*
-        * If an authorization string is being passed in, get it.
-        */
-       if ((flag & NFSSVC_GOTAUTH) && (nmp->nm_state & NFSSTA_MOUNTED) &&
-           ((nmp->nm_state & NFSSTA_WAITAUTH) == 0)) {
-           if (nmp->nm_state & NFSSTA_HASAUTH)
-               panic("cld kerb");
-           if ((flag & NFSSVC_AUTHINFAIL) == 0) {
-               if (ncd->ncd_authlen <= nmp->nm_authlen &&
-                   ncd->ncd_verflen <= nmp->nm_verflen &&
-                   !copyin(CAST_USER_ADDR_T(ncd->ncd_authstr),nmp->nm_authstr,ncd->ncd_authlen)&&
-                   !copyin(CAST_USER_ADDR_T(ncd->ncd_verfstr),nmp->nm_verfstr,ncd->ncd_verflen)){
-                   nmp->nm_authtype = ncd->ncd_authtype;
-                   nmp->nm_authlen = ncd->ncd_authlen;
-                   nmp->nm_verflen = ncd->ncd_verflen;
-#if NFSKERB
-                   nmp->nm_key = ncd->ncd_key;
-#endif
-               } else
-                   nmp->nm_state |= NFSSTA_AUTHERR;
-           } else
-               nmp->nm_state |= NFSSTA_AUTHERR;
-           nmp->nm_state |= NFSSTA_HASAUTH;
-           wakeup((caddr_t)&nmp->nm_authlen);
-       } else {
-           nmp->nm_state |= NFSSTA_WAITAUTH;
-       }
-
-       /*
-        * Loop every second updating queue until there is a termination sig.
-        */
-       while (nmp->nm_state & NFSSTA_MOUNTED) {
-           /* Get an authorization string, if required. */
-           if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH)) == 0) {
-               ncd->ncd_authuid = nmp->nm_authuid;
-               if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs)))
-                       nmp->nm_state |= NFSSTA_WAITAUTH;
-               else
-                       return (ENEEDAUTH);
-           }
-           /* Wait a bit (no pun) and do it again. */
-           if ((nmp->nm_state & NFSSTA_MOUNTED) &&
-               (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) {
-                   error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH,
-                       "nfskrbtimr", hz / 3);
-                   if (error == EINTR || error == ERESTART)
-                           dounmount(nmp->nm_mountp, 0, p);
-           }
-       }
-
-       /*
-        * Finally, we can free up the mount structure.
-        */
-       for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) {
-               nnuidp = nuidp->nu_lru.tqe_next;
-               LIST_REMOVE(nuidp, nu_hash);
-               TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
-               kauth_cred_rele(nuidp->nu_cr);
-               FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID);
-       }
-       /*
-        * Loop through outstanding request list and remove dangling
-        * references to defunct nfsmount struct
-        */
-       for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next)
-               if (rp->r_nmp == nmp)
-                       rp->r_nmp = (struct nfsmount *)0;
-       /* Need to wake up any rcvlock waiters so they notice the unmount. */
-       if (nmp->nm_state & NFSSTA_WANTRCV) {
-               nmp->nm_state &= ~NFSSTA_WANTRCV;
-               wakeup(&nmp->nm_state);
-       }
-       FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
-       if (error == EWOULDBLOCK)
-               error = 0;
-       return (error);
-}
-
-#ifndef NFS_NOSERVER
 /*
  * Adds a socket to the list for servicing by nfsds.
  */
-static int
-nfssvc_addsock(
-       socket_t so,
-       mbuf_t mynam,
-       __unused proc_t p)
+int
+nfssvc_addsock(socket_t so, mbuf_t mynam)
 {
-       int siz;
-       struct nfssvc_sock *slp;
-       struct nfssvc_sock *tslp = NULL;
-       int error, sodomain, sotype, soprotocol, on = 1;
+       struct nfsrv_sock *slp;
+       int error = 0, sodomain, sotype, soprotocol, on = 1;
+       int first;
        struct timeval timeo;
 
        /* make sure mbuf constants are set up */
-       if (!nfs_mbuf_mlen)
+       if (!nfs_mbuf_mhlen)
                nfs_mbuf_init();
 
        sock_gettype(so, &sodomain, &sotype, &soprotocol);
 
-       /*
-        * Add it to the list, as required.
-        */
-       if (soprotocol == IPPROTO_UDP) {
-               tslp = nfs_udpsock;
-               if (!tslp || (tslp->ns_flag & SLP_VALID)) {
-                       mbuf_freem(mynam);
-                       return (EPERM);
-               }
-#if ISO
-       } else if (soprotocol == ISOPROTO_CLTP) {
-               tslp = nfs_cltpsock;
-               if (!tslp || (tslp->ns_flag & SLP_VALID)) {
-                       mbuf_freem(mynam);
-                       return (EPERM);
-               }
-#endif /* ISO */
+       /* There should be only one UDP socket for each of IPv4 and IPv6 */
+       if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+               mbuf_freem(mynam);
+               return (EEXIST);
        }
-       /* reserve buffer space for 2 maximally-sized packets */
-       siz = NFS_MAXPACKET;
-       if (sotype == SOCK_STREAM)
-               siz += sizeof (u_long);
-       siz *= 2;
-       if (siz > NFS_MAXSOCKBUF)
-               siz = NFS_MAXSOCKBUF;
-       if ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &siz, sizeof(siz))) ||
-           (error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &siz, sizeof(siz)))) {
+       if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
                mbuf_freem(mynam);
-               return (error);
+               return (EEXIST);
        }
 
-       /*
-        * Set protocol specific options { for now TCP only } and
-        * reserve some space. For datagram sockets, this can get called
-        * repeatedly for the same socket, but that isn't harmful.
-        */
+       /* Set protocol options and reserve some space (for UDP). */
        if (sotype == SOCK_STREAM) {
+               error = nfsrv_check_exports_allow_address(mynam);
+               if (error)
+                       return (error);
                sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
        }
-       if (sodomain == AF_INET && soprotocol == IPPROTO_TCP) {
+       if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
                sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+       if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
+               int reserve = NFS_UDPSOCKBUF;
+               error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
+               error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
+               if (error) {
+                       log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
+                       error = 0;
+               }
        }
-
        sock_nointerrupt(so, 0);
 
+       /*
+        * Set socket send/receive timeouts.
+        * Receive timeout shouldn't matter, but setting the send timeout
+        * will make sure that an unresponsive client can't hang the server.
+        */
        timeo.tv_usec = 0;
-       timeo.tv_sec = 0;
-       error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
-       error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+       timeo.tv_sec = 1;
+       error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+       timeo.tv_sec = 30;
+       error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+       if (error) {
+               log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
+               error = 0;
+       }
 
-       if (tslp) {
-               slp = tslp;
-               lck_mtx_lock(nfsd_mutex);
-       } else {
-               MALLOC(slp, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
-                               M_NFSSVC, M_WAITOK);
-               if (!slp) {
-                       mbuf_freem(mynam);
-                       return (ENOMEM);
-               }
-               bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
-               lck_rw_init(&slp->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr);
-               lck_mtx_init(&slp->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr);
-               TAILQ_INIT(&slp->ns_uidlruhead);
-               lck_mtx_lock(nfsd_mutex);
-               TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
+       MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
+       if (!slp) {
+               mbuf_freem(mynam);
+               return (ENOMEM);
        }
+       bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
+       lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
+       lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
 
-       sock_retain(so); /* grab a retain count on the socket */
-       slp->ns_so = so;
-       slp->ns_sotype = sotype;
-       slp->ns_nam = mynam;
+       lck_mtx_lock(nfsd_mutex);
+
+       if (soprotocol == IPPROTO_UDP) {
+               if (sodomain == AF_INET) {
+                       /* There should be only one UDP/IPv4 socket */
+                       if (nfsrv_udpsock) {
+                               lck_mtx_unlock(nfsd_mutex);
+                               nfsrv_slpfree(slp);
+                               mbuf_freem(mynam);
+                               return (EEXIST);
+                       }
+                       nfsrv_udpsock = slp;
+               }
+               if (sodomain == AF_INET6) {
+                       /* There should be only one UDP/IPv6 socket */
+                       if (nfsrv_udp6sock) {
+                               lck_mtx_unlock(nfsd_mutex);
+                               nfsrv_slpfree(slp);
+                               mbuf_freem(mynam);
+                               return (EEXIST);
+                       }
+                       nfsrv_udp6sock = slp;
+               }
+       }
+
+       /* add the socket to the list */
+       first = TAILQ_EMPTY(&nfsrv_socklist);
+       TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+       if (soprotocol == IPPROTO_TCP) {
+               nfsrv_sock_tcp_cnt++;
+               if (nfsrv_sock_idle_timeout < 0)
+                       nfsrv_sock_idle_timeout = 0;
+               if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
+                       nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
+               /*
+                * Possibly start or stop the idle timer. We only start the idle timer when
+                * we have more than 2 * nfsd_thread_max connections. If the idle timer is
+                * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
+                * the number of connections.
+                */
+               if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
+                       if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+                               if (nfsrv_idlesock_timer_on) {
+                                       thread_call_cancel(nfsrv_idlesock_timer_call);
+                                       nfsrv_idlesock_timer_on = 0;
+                               }
+                       } else {
+                               struct nfsrv_sock *old_slp;
+                               struct timeval now;
+                               time_t time_to_wait = nfsrv_sock_idle_timeout;
+                               /*
+                                * Get the oldest tcp socket and calculate the
+                                * earliest time for the next idle timer to fire
+                                * based on the possibly updated nfsrv_sock_idle_timeout
+                                */
+                               TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
+                                       if (old_slp->ns_sotype == SOCK_STREAM) {
+                                               microuptime(&now);
+                                               time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
+                                               if (time_to_wait < 1)
+                                                       time_to_wait = 1;
+                                               break;
+                                       }
+                               }
+                               /*
+                                * If we have a timer scheduled, but if its going to fire too late,
+                                * turn it off.
+                                */
+                               if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
+                                       thread_call_cancel(nfsrv_idlesock_timer_call);
+                                       nfsrv_idlesock_timer_on = 0;
+                               }
+                               /* Schedule the idle thread if it isn't already */
+                               if (!nfsrv_idlesock_timer_on) {
+                                       nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+                                       nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+                               }
+                       }
+               }
+       }
+
+       sock_retain(so); /* grab a retain count on the socket */
+       slp->ns_so = so;
+       slp->ns_sotype = sotype;
+       slp->ns_nam = mynam;
+
+       /* set up the socket up-call */
+       nfsrv_uc_addsock(slp, first);
 
-       socket_lock(so, 1);
-       so->so_upcallarg = (caddr_t)slp;
-       so->so_upcall = nfsrv_rcv;
-       so->so_rcv.sb_flags |= SB_UPCALL; /* required for freebsd merge */
-       socket_unlock(so, 1);
+       /* mark that the socket is not in the nfsrv_sockwg list */
+       slp->ns_wgq.tqe_next = SLPNOLIST;
 
        slp->ns_flag = SLP_VALID | SLP_NEEDQ;
 
@@ -835,88 +990,158 @@ nfssvc_addsock(
 }
 
 /*
- * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
- * until it is killed by a signal.
+ * nfssvc_nfsd()
+ *
+ * nfsd theory of operation:
+ *
+ * The first nfsd thread stays in user mode accepting new TCP connections
+ * which are then added via the "addsock" call.  The rest of the nfsd threads
+ * simply call into the kernel and remain there in a loop handling NFS
+ * requests until killed by a signal.
+ * 
+ * There's a list of nfsd threads (nfsd_head).
+ * There's an nfsd queue that contains only those nfsds that are
+ *   waiting for work to do (nfsd_queue).
+ *
+ * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
+ *   managing the work on the sockets:
+ *   nfsrv_sockwait - sockets w/new data waiting to be worked on
+ *   nfsrv_sockwork - sockets being worked on which may have more work to do
+ *   nfsrv_sockwg -- sockets which have pending write gather data
+ * When a socket receives data, if it is not currently queued, it
+ *   will be placed at the end of the "wait" queue.
+ * Whenever a socket needs servicing we make sure it is queued and
+ *   wake up a waiting nfsd (if there is one).
+ *
+ * nfsds will service at most 8 requests from the same socket before
+ *   defecting to work on another socket.
+ * nfsds will defect immediately if there are any sockets in the "wait" queue
+ * nfsds looking for a socket to work on check the "wait" queue first and
+ *   then check the "work" queue.
+ * When an nfsd starts working on a socket, it removes it from the head of
+ *   the queue it's currently on and moves it to the end of the "work" queue.
+ * When nfsds are checking the queues for work, any sockets found not to 
+ *   have any work are simply dropped from the queue.
+ *
  */
-static int
-nfssvc_nfsd(nsd, argp, p)
-       struct nfsd_srvargs *nsd;
-       user_addr_t argp;
-       proc_t p;
+int
+nfssvc_nfsd(void)
 {
-       mbuf_t m, mreq;
-       struct nfssvc_sock *slp;
-       struct nfsd *nfsd = nsd->nsd_nfsd;
+       mbuf_t m, mrep;
+       struct nfsrv_sock *slp;
+       struct nfsd *nfsd;
        struct nfsrv_descript *nd = NULL;
        int error = 0, cacherep, writes_todo;
-       int siz, procrastinate;
+       int siz, procrastinate, opcnt = 0;
        u_quad_t cur_usec;
        struct timeval now;
-       boolean_t funnel_state;
+       struct vfs_context context;
+       struct timespec to;
 
 #ifndef nolint
        cacherep = RC_DOIT;
        writes_todo = 0;
 #endif
-       if (nfsd == (struct nfsd *)0) {
-               MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
-               if (!nfsd)
-                       return (ENOMEM);
-               nsd->nsd_nfsd = nfsd;
-               bzero((caddr_t)nfsd, sizeof (struct nfsd));
-               nfsd->nfsd_procp = p;
-               lck_mtx_lock(nfsd_mutex);
-               TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
-               nfs_numnfsd++;
-               lck_mtx_unlock(nfsd_mutex);
-       }
 
-       funnel_state = thread_funnel_set(kernel_flock, FALSE);
+       MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
+       if (!nfsd)
+               return (ENOMEM);
+       bzero(nfsd, sizeof(struct nfsd));
+       lck_mtx_lock(nfsd_mutex);
+       if (nfsd_thread_count++ == 0)
+               nfsrv_initcache();              /* Init the server request cache */
+       
+       TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
+       lck_mtx_unlock(nfsd_mutex);
+
+       context.vc_thread = current_thread();
+
+       /* Set time out so that nfsd threads can wake up a see if they are still needed. */
+       to.tv_sec = 5;
+       to.tv_nsec = 0;
 
        /*
         * Loop getting rpc requests until SIGKILL.
         */
        for (;;) {
-               if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
+               if (nfsd_thread_max <= 0) {
+                       /* NFS server shutting down, get out ASAP */
+                       error = EINTR;
+                       slp = nfsd->nfsd_slp;
+               } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
+                       /* already have some work to do */
+                       error = 0;
+                       slp = nfsd->nfsd_slp;
+               } else {
+                       /* need to find work to do */
+                       error = 0;
                        lck_mtx_lock(nfsd_mutex);
-                       while ((nfsd->nfsd_slp == NULL) && !(nfsd_head_flag & NFSD_CHECKSLP)) {
+                       while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
+                               if (nfsd_thread_count > nfsd_thread_max) {
+                                       /*
+                                        * If we have no socket and there are more
+                                        * nfsd threads than configured, let's exit.
+                                        */
+                                       error = 0;
+                                       goto done;
+                               }
                                nfsd->nfsd_flag |= NFSD_WAITING;
-                               nfsd_waiting++;
-                               error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", 0);
-                               nfsd_waiting--;
+                               TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
+                               error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
                                if (error) {
-                                       lck_mtx_unlock(nfsd_mutex);
+                                       if (nfsd->nfsd_flag & NFSD_WAITING) {
+                                               TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
+                                               nfsd->nfsd_flag &= ~NFSD_WAITING;
+                                       }
+                                       if (error == EWOULDBLOCK)
+                                               continue;
                                        goto done;
                                }
                        }
-                       if ((nfsd->nfsd_slp == NULL) && (nfsd_head_flag & NFSD_CHECKSLP)) {
-                               TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
-                                   lck_rw_lock_shared(&slp->ns_rwlock);
-                                   if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
-                                       == (SLP_VALID | SLP_DOREC)) {
-                                           if (lck_rw_lock_shared_to_exclusive(&slp->ns_rwlock)) {
-                                               /* upgrade failed and we lost the lock; take exclusive and recheck */
-                                               lck_rw_lock_exclusive(&slp->ns_rwlock);
-                                               if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
-                                                   != (SLP_VALID | SLP_DOREC)) {
-                                                   /* flags no longer set, so skip this socket */
-                                                   lck_rw_done(&slp->ns_rwlock);
-                                                   continue;
-                                               }
-                                           }
-                                           slp->ns_flag &= ~SLP_DOREC;
-                                           slp->ns_sref++;
-                                           nfsd->nfsd_slp = slp;
-                                           lck_rw_done(&slp->ns_rwlock);
-                                           break;
-                                   }
-                                   lck_rw_done(&slp->ns_rwlock);
+                       slp = nfsd->nfsd_slp;
+                       if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
+                               /* look for a socket to work on in the wait queue */
+                               while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
+                                       lck_rw_lock_exclusive(&slp->ns_rwlock);
+                                       /* remove from the head of the queue */
+                                       TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+                                       slp->ns_flag &= ~SLP_WAITQ;
+                                       if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
+                                               break;
+                                       /* nothing to do, so skip this socket */
+                                       lck_rw_done(&slp->ns_rwlock);
                                }
-                               if (slp == 0)
-                                       nfsd_head_flag &= ~NFSD_CHECKSLP;
+                       }
+                       if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
+                               /* look for a socket to work on in the work queue */
+                               while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
+                                       lck_rw_lock_exclusive(&slp->ns_rwlock);
+                                       /* remove from the head of the queue */
+                                       TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+                                       slp->ns_flag &= ~SLP_WORKQ;
+                                       if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
+                                               break;
+                                       /* nothing to do, so skip this socket */
+                                       lck_rw_done(&slp->ns_rwlock);
+                               }
+                       }
+                       if (!nfsd->nfsd_slp && slp) {
+                               /* we found a socket to work on, grab a reference */
+                               slp->ns_sref++;
+                               microuptime(&now);
+                               slp->ns_timestamp = now.tv_sec;
+                               /* We keep the socket list in least recently used order for reaping idle sockets */
+                               TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+                               TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+                               nfsd->nfsd_slp = slp;
+                               opcnt = 0;
+                               /* and put it at the back of the work queue */
+                               TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
+                               slp->ns_flag |= SLP_WORKQ;
+                               lck_rw_done(&slp->ns_rwlock);
                        }
                        lck_mtx_unlock(nfsd_mutex);
-                       if ((slp = nfsd->nfsd_slp) == NULL)
+                       if (!slp)
                                continue;
                        lck_rw_lock_exclusive(&slp->ns_rwlock);
                        if (slp->ns_flag & SLP_VALID) {
@@ -927,35 +1152,44 @@ nfssvc_nfsd(nsd, argp, p)
                                if (slp->ns_flag & SLP_DISCONN)
                                        nfsrv_zapsock(slp);
                                error = nfsrv_dorec(slp, nfsd, &nd);
-                               microuptime(&now);
-                               cur_usec = (u_quad_t)now.tv_sec * 1000000 +
-                                       (u_quad_t)now.tv_usec;
-                               if (error && slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) {
-                                       error = 0;
-                                       cacherep = RC_DOIT;
-                                       writes_todo = 1;
-                               } else
-                                       writes_todo = 0;
+                               if (error == EINVAL) {  // RPCSEC_GSS drop
+                                       if (slp->ns_sotype == SOCK_STREAM)
+                                               nfsrv_zapsock(slp); // drop connection
+                               }
+                               writes_todo = 0;
+                               if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
+                                       microuptime(&now);
+                                       cur_usec = (u_quad_t)now.tv_sec * 1000000 +
+                                               (u_quad_t)now.tv_usec;
+                                       if (slp->ns_wgtime <= cur_usec) {
+                                               error = 0;
+                                               cacherep = RC_DOIT;
+                                               writes_todo = 1;
+                                       }
+                                       slp->ns_flag &= ~SLP_DOWRITES;
+                               }
                                nfsd->nfsd_flag |= NFSD_REQINPROG;
                        }
                        lck_rw_done(&slp->ns_rwlock);
-               } else {
-                       error = 0;
-                       slp = nfsd->nfsd_slp;
                }
-               if (error || (slp->ns_flag & SLP_VALID) == 0) {
+               if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
                        if (nd) {
+                               nfsm_chain_cleanup(&nd->nd_nmreq);
                                if (nd->nd_nam2)
                                        mbuf_freem(nd->nd_nam2);
-                               if (nd->nd_cr)
-                                       kauth_cred_rele(nd->nd_cr);
-                               FREE_ZONE((caddr_t)nd,
-                                               sizeof *nd, M_NFSRVDESC);
+                               if (IS_VALID_CRED(nd->nd_cr))
+                                       kauth_cred_unref(&nd->nd_cr);
+                               if (nd->nd_gss_context)
+                                       nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+                               FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
                                nd = NULL;
                        }
                        nfsd->nfsd_slp = NULL;
                        nfsd->nfsd_flag &= ~NFSD_REQINPROG;
-                       nfsrv_slpderef(slp);
+                       if (slp)
+                               nfsrv_slpderef(slp);
+                       if (nfsd_thread_max <= 0)
+                               break;
                        continue;
                }
                if (nd) {
@@ -965,86 +1199,89 @@ nfssvc_nfsd(nsd, argp, p)
                    else
                        nd->nd_nam = slp->ns_nam;
 
-                   /*
-                    * Check to see if authorization is needed.
-                    */
-                   if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
-                       nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
-                       nsd->nsd_haddr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr;
-                       nsd->nsd_authlen = nfsd->nfsd_authlen;
-                       nsd->nsd_verflen = nfsd->nfsd_verflen;
-                       if (!copyout(nfsd->nfsd_authstr,CAST_USER_ADDR_T(nsd->nsd_authstr),
-                               nfsd->nfsd_authlen) &&
-                           !copyout(nfsd->nfsd_verfstr, CAST_USER_ADDR_T(nsd->nsd_verfstr),
-                               nfsd->nfsd_verflen) &&
-                           !copyout((caddr_t)nsd, argp, sizeof (*nsd))) {
-                           thread_funnel_set(kernel_flock, funnel_state);
-                           return (ENEEDAUTH);
-                       }
-                       cacherep = RC_DROPIT;
-                   } else
-                       cacherep = nfsrv_getcache(nd, slp, &mreq);
-
-                   if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
-                       nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
-                       nd->nd_procnum = NFSPROC_NOOP;
-                       nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
-                       cacherep = RC_DOIT;
-                   } else if (nfs_privport) {
-                       /* Check if source port is privileged */
-                       u_short port;
-                       struct sockaddr *nam = mbuf_data(nd->nd_nam);
-                       struct sockaddr_in *sin;
-
-                       sin = (struct sockaddr_in *)nam;
-                       port = ntohs(sin->sin_port);
-                       if (port >= IPPORT_RESERVED && 
-                           nd->nd_procnum != NFSPROC_NULL) {
-                           char strbuf[MAX_IPv4_STR_LEN];
+                   cacherep = nfsrv_getcache(nd, slp, &mrep);
+
+                   if (nfsrv_require_resv_port) {
+                       /* Check if source port is a reserved port */
+                       in_port_t port = 0;
+                       struct sockaddr *saddr = mbuf_data(nd->nd_nam);
+
+                       if (saddr->sa_family == AF_INET)
+                               port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+                       else if (saddr->sa_family == AF_INET6)
+                               port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+                       if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
                            nd->nd_procnum = NFSPROC_NOOP;
                            nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
                            cacherep = RC_DOIT;
-                           printf("NFS request from unprivileged port (%s:%d)\n",
-                               inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
-                               port);
                        }
                    }
 
                }
 
                /*
-                * Loop to get all the write rpc relies that have been
+                * Loop to get all the write RPC replies that have been
                 * gathered together.
                 */
                do {
                    switch (cacherep) {
                    case RC_DOIT:
-                       if (nd && (nd->nd_flag & ND_NFSV3))
-                           procrastinate = nfsrvw_procrastinate_v3;
+                       if (nd && (nd->nd_vers == NFS_VER3))
+                           procrastinate = nfsrv_wg_delay_v3;
                        else
-                           procrastinate = nfsrvw_procrastinate;
-                       lck_rw_lock_shared(&nfs_export_rwlock);
+                           procrastinate = nfsrv_wg_delay;
+                       lck_rw_lock_shared(&nfsrv_export_rwlock);
+                       context.vc_ucred = NULL;
                        if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
-                           error = nfsrv_writegather(&nd, slp, nfsd->nfsd_procp, &mreq);
+                               error = nfsrv_writegather(&nd, slp, &context, &mrep);
                        else
-                           error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, slp, nfsd->nfsd_procp, &mreq);
-                       lck_rw_done(&nfs_export_rwlock);
-                       if (mreq == NULL)
+                               error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
+                       lck_rw_done(&nfsrv_export_rwlock);
+                       if (mrep == NULL) {
+                               /*
+                                * If this is a stream socket and we are not going
+                                * to send a reply we better close the connection
+                                * so the client doesn't hang.
+                                */
+                               if (error && slp->ns_sotype == SOCK_STREAM) {
+                                       lck_rw_lock_exclusive(&slp->ns_rwlock);
+                                       nfsrv_zapsock(slp);
+                                       lck_rw_done(&slp->ns_rwlock);
+                                       printf("NFS server: NULL reply from proc = %d error = %d\n",
+                                               nd->nd_procnum, error);
+                               }
                                break;
+
+                       }
                        if (error) {
-                               OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs);
-                               nfsrv_updatecache(nd, FALSE, mreq);
+                               OSAddAtomic64(1, &nfsstats.srv_errs);
+                               nfsrv_updatecache(nd, FALSE, mrep);
                                if (nd->nd_nam2) {
                                        mbuf_freem(nd->nd_nam2);
                                        nd->nd_nam2 = NULL;
                                }
                                break;
                        }
-                       OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]);
-                       nfsrv_updatecache(nd, TRUE, mreq);
-                       nd->nd_mrep = NULL;
+                       OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
+                       nfsrv_updatecache(nd, TRUE, mrep);
+                       /* FALLTHRU */
+
                    case RC_REPLY:
-                       m = mreq;
+                       if (nd->nd_gss_mb != NULL) {    // It's RPCSEC_GSS
+                               /*
+                                * Need to checksum or encrypt the reply
+                                */
+                               error = nfs_gss_svc_protect_reply(nd, mrep);
+                               if (error) {
+                                       mbuf_freem(mrep);
+                                       break;
+                               }
+                       }
+
+                       /*
+                        * Get the total size of the reply
+                        */
+                       m = mrep;
                        siz = 0;
                        while (m) {
                                siz += mbuf_len(m);
@@ -1054,7 +1291,7 @@ nfssvc_nfsd(nsd, argp, p)
                                printf("mbuf siz=%d\n",siz);
                                panic("Bad nfs svc reply");
                        }
-                       m = mreq;
+                       m = mrep;
                        mbuf_pkthdr_setlen(m, siz);
                        error = mbuf_pkthdr_setrcvif(m, NULL);
                        if (error)
@@ -1066,11 +1303,11 @@ nfssvc_nfsd(nsd, argp, p)
                        if (slp->ns_sotype == SOCK_STREAM) {
                                error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
                                if (!error)
-                                       *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz);
+                                       *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
                        }
                        if (!error) {
                                if (slp->ns_flag & SLP_VALID) {
-                                   error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL);
+                                   error = nfsrv_send(slp, nd->nd_nam2, m);
                                } else {
                                    error = EPIPE;
                                    mbuf_freem(m);
@@ -1078,46 +1315,43 @@ nfssvc_nfsd(nsd, argp, p)
                        } else {
                                mbuf_freem(m);
                        }
-                       mreq = NULL;
-                       if (nfsrtton)
-                               nfsd_rt(slp->ns_sotype, nd, cacherep);
+                       mrep = NULL;
                        if (nd->nd_nam2) {
                                mbuf_freem(nd->nd_nam2);
                                nd->nd_nam2 = NULL;
                        }
-                       if (nd->nd_mrep) {
-                               mbuf_freem(nd->nd_mrep);
-                               nd->nd_mrep = NULL;
-                       }
                        if (error == EPIPE) {
                                lck_rw_lock_exclusive(&slp->ns_rwlock);
                                nfsrv_zapsock(slp);
                                lck_rw_done(&slp->ns_rwlock);
                        }
                        if (error == EINTR || error == ERESTART) {
-                               if (nd->nd_cr)
-                                       kauth_cred_rele(nd->nd_cr);
-                               FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
+                               nfsm_chain_cleanup(&nd->nd_nmreq);
+                               if (IS_VALID_CRED(nd->nd_cr))
+                                       kauth_cred_unref(&nd->nd_cr);
+                               if (nd->nd_gss_context)
+                                       nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+                               FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
                                nfsrv_slpderef(slp);
+                               lck_mtx_lock(nfsd_mutex);
                                goto done;
                        }
                        break;
                    case RC_DROPIT:
-                       if (nfsrtton)
-                               nfsd_rt(slp->ns_sotype, nd, cacherep);
-                       mbuf_freem(nd->nd_mrep);
                        mbuf_freem(nd->nd_nam2);
-                       nd->nd_mrep = nd->nd_nam2 = NULL;
+                       nd->nd_nam2 = NULL;
                        break;
                    };
+                   opcnt++;
                    if (nd) {
-                       if (nd->nd_mrep)
-                               mbuf_freem(nd->nd_mrep);
+                       nfsm_chain_cleanup(&nd->nd_nmreq);
                        if (nd->nd_nam2)
                                mbuf_freem(nd->nd_nam2);
-                       if (nd->nd_cr)
-                               kauth_cred_rele(nd->nd_cr);
-                       FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
+                       if (IS_VALID_CRED(nd->nd_cr))
+                               kauth_cred_unref(&nd->nd_cr);
+                       if (nd->nd_gss_context)
+                               nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+                       FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
                        nd = NULL;
                    }
 
@@ -1125,48 +1359,53 @@ nfssvc_nfsd(nsd, argp, p)
                     * Check to see if there are outstanding writes that
                     * need to be serviced.
                     */
-                   microuptime(&now);
-                   cur_usec = (u_quad_t)now.tv_sec * 1000000 +
-                       (u_quad_t)now.tv_usec;
-                   if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) {
-                       cacherep = RC_DOIT;
-                       writes_todo = 1;
-                   } else {
-                       writes_todo = 0;
+                   writes_todo = 0;
+                   if (slp->ns_wgtime) {
+                       microuptime(&now);
+                       cur_usec = (u_quad_t)now.tv_sec * 1000000 +
+                               (u_quad_t)now.tv_usec;
+                       if (slp->ns_wgtime <= cur_usec) {
+                           cacherep = RC_DOIT;
+                           writes_todo = 1;
+                       }
                    }
                } while (writes_todo);
-               lck_rw_lock_exclusive(&slp->ns_rwlock);
-               if (nfsrv_dorec(slp, nfsd, &nd)) {
+
+               nd = NULL;
+               if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
+                       lck_rw_lock_exclusive(&slp->ns_rwlock);
+                       error = nfsrv_dorec(slp, nfsd, &nd);
+                       if (error == EINVAL) {  // RPCSEC_GSS drop
+                               if (slp->ns_sotype == SOCK_STREAM)
+                                       nfsrv_zapsock(slp); // drop connection
+                       }
                        lck_rw_done(&slp->ns_rwlock);
+               }
+               if (!nd) {
+                       /* drop our reference on the socket */
                        nfsd->nfsd_flag &= ~NFSD_REQINPROG;
                        nfsd->nfsd_slp = NULL;
                        nfsrv_slpderef(slp);
-               } else {
-                       lck_rw_done(&slp->ns_rwlock);
                }
        }
-done:
-       thread_funnel_set(kernel_flock, funnel_state);
        lck_mtx_lock(nfsd_mutex);
+done:
        TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
        FREE(nfsd, M_NFSD);
-       nsd->nsd_nfsd = (struct nfsd *)0;
-       if (--nfs_numnfsd == 0)
-               nfsrv_init(TRUE);       /* Reinitialize everything */
+       if (--nfsd_thread_count == 0)
+               nfsrv_cleanup();
        lck_mtx_unlock(nfsd_mutex);
        return (error);
 }
 
-static int
-nfssvc_export(user_addr_t argp, proc_t p)
+int
+nfssvc_export(user_addr_t argp)
 {
        int error = 0, is_64bit;
        struct user_nfs_export_args unxa;
-       struct vfs_context context;
+       vfs_context_t ctx = vfs_context_current();
 
-       context.vc_proc = p;
-       context.vc_ucred = kauth_cred_get();
-       is_64bit = IS_64BIT_PROCESS(p);
+       is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
 
        /* copy in pointers to path and export args */
        if (is_64bit) {
@@ -1188,204 +1427,20 @@ nfssvc_export(user_addr_t argp, proc_t p)
        if (error)
                return (error);
 
-       error = nfsrv_export(&unxa, &context);
+       error = nfsrv_export(&unxa, ctx);
 
        return (error);
 }
 
-#endif /* NFS_NOSERVER */
-
-int nfs_defect = 0;
-/* XXX CSM 11/25/97 Upgrade sysctl.h someday */
-#ifdef notyet
-SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
-#endif
-
-int
-nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
-{
-       struct lockd_ans la;
-       int error;
-
-       if (uap->flag == NFSCLNT_LOCKDWAIT) {
-               return (nfslockdwait(p));
-       }
-       if (uap->flag == NFSCLNT_LOCKDANS) {
-               error = copyin(uap->argp, &la, sizeof(la));
-               return (error != 0 ? error : nfslockdans(p, &la));
-       }
-       if (uap->flag == NFSCLNT_LOCKDFD)
-               return (nfslockdfd(p, CAST_DOWN(int, uap->argp)));
-       return EINVAL;
-}
-
-
-static int nfssvc_iod_continue(int);
-
 /*
- * Asynchronous I/O daemons for client nfs.
- * They do read-ahead and write-behind operations on the block I/O cache.
- * Never returns unless it fails or gets killed.
- */
-static int
-nfssvc_iod(__unused proc_t p)
-{
-       register int i, myiod;
-       struct uthread *ut;
-
-       /*
-        * Assign my position or return error if too many already running
-        */
-       myiod = -1;
-       for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
-               if (nfs_asyncdaemon[i] == 0) {
-                       nfs_asyncdaemon[i]++;
-                       myiod = i;
-                       break;
-               }
-       if (myiod == -1)
-               return (EBUSY);
-       nfs_numasync++;
-
-       /* stuff myiod into uthread to get off local stack for continuation */
-
-       ut = (struct uthread *)get_bsdthread_info(current_thread());
-       ut->uu_state.uu_nfs_myiod = myiod;  /* squirrel away for continuation */
-
-       nfssvc_iod_continue(0);
-       /* NOTREACHED */
-       return (0);
-}
-
-/*
- * Continuation for Asynchronous I/O daemons for client nfs.
- */
-static int
-nfssvc_iod_continue(int error)
-{
-       register struct nfsbuf *bp;
-       register int i, myiod;
-       struct nfsmount *nmp;
-       struct uthread *ut;
-       proc_t p;
-
-       /*
-        * real myiod is stored in uthread, recover it
-        */
-       ut = (struct uthread *)get_bsdthread_info(current_thread());
-       myiod = ut->uu_state.uu_nfs_myiod;
-       p = current_proc(); // XXX
-
-       /*
-        * Just loop around doin our stuff until SIGKILL
-        *  - actually we don't loop with continuations...
-        */
-       lck_mtx_lock(nfs_iod_mutex);
-       for (;;) {
-           while (((nmp = nfs_iodmount[myiod]) == NULL
-                   || nmp->nm_bufq.tqh_first == NULL)
-                  && error == 0 && nfs_ioddelwri == 0) {
-               if (nmp)
-                   nmp->nm_bufqiods--;
-               nfs_iodwant[myiod] = p; // XXX this doesn't need to be a proc_t
-               nfs_iodmount[myiod] = NULL;
-               error = msleep0((caddr_t)&nfs_iodwant[myiod], nfs_iod_mutex,
-                       PWAIT | PCATCH | PDROP, "nfsidl", 0, nfssvc_iod_continue);
-               lck_mtx_lock(nfs_iod_mutex);
-           }
-           if (error) {
-               nfs_asyncdaemon[myiod] = 0;
-               if (nmp) nmp->nm_bufqiods--;
-               nfs_iodwant[myiod] = NULL;
-               nfs_iodmount[myiod] = NULL;
-               lck_mtx_unlock(nfs_iod_mutex);
-               nfs_numasync--;
-               if (error == EINTR || error == ERESTART)
-                 error = 0;
-               unix_syscall_return(error);
-           }
-           if (nmp != NULL) {
-               while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) {
-                   /* Take one off the front of the list */
-                   TAILQ_REMOVE(&nmp->nm_bufq, bp, nb_free);
-                   bp->nb_free.tqe_next = NFSNOLIST;
-                   nmp->nm_bufqlen--;
-                   if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
-                       nmp->nm_bufqwant = FALSE;
-                       lck_mtx_unlock(nfs_iod_mutex);
-                       wakeup(&nmp->nm_bufq);
-                   } else {
-                       lck_mtx_unlock(nfs_iod_mutex);
-                   }
-
-                   SET(bp->nb_flags, NB_IOD);
-                   if (ISSET(bp->nb_flags, NB_READ))
-                       nfs_doio(bp, bp->nb_rcred, NULL);
-                   else
-                       nfs_doio(bp, bp->nb_wcred, NULL);
-
-                   lck_mtx_lock(nfs_iod_mutex);
-                   /*
-                    * If there are more than one iod on this mount, then defect
-                    * so that the iods can be shared out fairly between the mounts
-                    */
-                   if (nfs_defect && nmp->nm_bufqiods > 1) {
-                       nfs_iodmount[myiod] = NULL;
-                       nmp->nm_bufqiods--;
-                       break;
-                   }
-               }
-           }
-           lck_mtx_unlock(nfs_iod_mutex);
-
-           if (nfs_ioddelwri) {
-               i = 0;
-               nfs_ioddelwri = 0;
-               lck_mtx_lock(nfs_buf_mutex);
-               while (i < 8 && (bp = TAILQ_FIRST(&nfsbufdelwri)) != NULL) {
-                       struct nfsnode *np = VTONFS(bp->nb_vp);
-                       nfs_buf_remfree(bp);
-                       nfs_buf_refget(bp);
-                       while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN);
-                       nfs_buf_refrele(bp);
-                       if (error)
-                               break;
-                       if (!bp->nb_vp) {
-                               /* buffer is no longer valid */
-                               nfs_buf_drop(bp);
-                               continue;
-                       }
-                       if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
-                               /* put buffer at end of delwri list */
-                               TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free);
-                               nfsbufdelwricnt++;
-                               nfs_buf_drop(bp);
-                               lck_mtx_unlock(nfs_buf_mutex);
-                               nfs_flushcommits(np->n_vnode, NULL, 1);
-                       } else {
-                               SET(bp->nb_flags, (NB_ASYNC | NB_IOD));
-                               lck_mtx_unlock(nfs_buf_mutex);
-                               nfs_buf_write(bp);
-                       }
-                       i++;
-                       lck_mtx_lock(nfs_buf_mutex);
-               }
-               lck_mtx_unlock(nfs_buf_mutex);
-           }
-
-           lck_mtx_lock(nfs_iod_mutex);
-       }
-}
-
-/*
- * Shut down a socket associated with an nfssvc_sock structure.
+ * Shut down a socket associated with an nfsrv_sock structure.
  * Should be called with the send lock set, if required.
  * The trick here is to increment the sref at the start, so that the nfsds
  * will stop using it and clear ns_flag at the end so that it will not be
  * reassigned during cleanup.
  */
-static void
-nfsrv_zapsock(struct nfssvc_sock *slp)
+void
+nfsrv_zapsock(struct nfsrv_sock *slp)
 {
        socket_t so;
 
@@ -1397,248 +1452,21 @@ nfsrv_zapsock(struct nfssvc_sock *slp)
        if (so == NULL)
                return;
 
-       /*
-        * Attempt to deter future upcalls, but leave the
-        * upcall info in place to avoid a race with the
-        * networking code.
-        */
-       socket_lock(so, 1);
-       so->so_rcv.sb_flags &= ~SB_UPCALL;
-       socket_unlock(so, 1);
-
+       sock_setupcall(so, NULL, NULL);
        sock_shutdown(so, SHUT_RDWR);
-}
-
-/*
- * Get an authorization string for the uid by having the mount_nfs sitting
- * on this mount point porpous out of the kernel and do it.
- */
-int
-nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
-       register struct nfsmount *nmp;
-       struct nfsreq *rep;
-       kauth_cred_t cred;
-       char **auth_str;
-       int *auth_len;
-       char *verf_str;
-       int *verf_len;
-       NFSKERBKEY_T key;               /* return session key */
-{
-       int error = 0;
-
-       while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) {
-               nmp->nm_state |= NFSSTA_WANTAUTH;
-               (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
-                       "nfsauth1", 2 * hz);
-               error = nfs_sigintr(nmp, rep, rep->r_procp);
-               if (error) {
-                       nmp->nm_state &= ~NFSSTA_WANTAUTH;
-                       return (error);
-               }
-       }
-       nmp->nm_state &= ~NFSSTA_WANTAUTH;
-       MALLOC(*auth_str, char *, RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
-       if (!*auth_str)
-               return (ENOMEM);
-       nmp->nm_authstr = *auth_str;
-       nmp->nm_authlen = RPCAUTH_MAXSIZ;
-       nmp->nm_verfstr = verf_str;
-       nmp->nm_verflen = *verf_len;
-       nmp->nm_authuid = kauth_cred_getuid(cred);
-       nmp->nm_state &= ~NFSSTA_WAITAUTH;
-       wakeup((caddr_t)&nmp->nm_authstr);
 
        /*
-        * And wait for mount_nfs to do its stuff.
+        * Remove from the up-call queue
         */
-       while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) {
-               (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
-                       "nfsauth2", 2 * hz);
-               error = nfs_sigintr(nmp, rep, rep->r_procp);
-       }
-       if (nmp->nm_state & NFSSTA_AUTHERR) {
-               nmp->nm_state &= ~NFSSTA_AUTHERR;
-               error = EAUTH;
-       }
-       if (error)
-               FREE(*auth_str, M_TEMP);
-       else {
-               *auth_len = nmp->nm_authlen;
-               *verf_len = nmp->nm_verflen;
-               bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
-       }
-       nmp->nm_state &= ~NFSSTA_HASAUTH;
-       nmp->nm_state |= NFSSTA_WAITAUTH;
-       if (nmp->nm_state & NFSSTA_WANTAUTH) {
-               nmp->nm_state &= ~NFSSTA_WANTAUTH;
-               wakeup((caddr_t)&nmp->nm_authtype);
-       }
-       return (error);
-}
-
-/*
- * Get a nickname authenticator and verifier.
- */
-int
-nfs_getnickauth(
-       struct nfsmount *nmp,
-       kauth_cred_t cred,
-       char **auth_str,
-       int *auth_len,
-       char *verf_str,
-       __unused int verf_len)
-{
-       register struct nfsuid *nuidp;
-       register u_long *nickp, *verfp;
-       struct timeval ktvin, ktvout, now;
-
-#if DIAGNOSTIC
-       if (verf_len < (4 * NFSX_UNSIGNED))
-               panic("nfs_getnickauth verf too small");
-#endif
-       for (nuidp = NMUIDHASH(nmp, kauth_cred_getuid(cred))->lh_first;
-           nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
-               if (kauth_cred_getuid(nuidp->nu_cr) == kauth_cred_getuid(cred))
-                       break;
-       }
-       microtime(&now);
-       if (!nuidp || nuidp->nu_expire < now.tv_sec)
-               return (EACCES);
-
-       MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
-       if (!nickp)
-               return (ENOMEM);
-
-       /*
-        * Move to the end of the lru list (end of lru == most recently used).
-        */
-       TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
-       TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
-
-       *nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
-       *nickp = txdr_unsigned(nuidp->nu_nickname);
-       *auth_str = (char *)nickp;
-       *auth_len = 2 * NFSX_UNSIGNED;
-
-       /*
-        * Now we must encrypt the verifier and package it up.
-        */
-       verfp = (u_long *)verf_str;
-       *verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
-       microtime(&now);
-       if (now.tv_sec > nuidp->nu_timestamp.tv_sec ||
-           (now.tv_sec == nuidp->nu_timestamp.tv_sec &&
-            now.tv_usec > nuidp->nu_timestamp.tv_usec))
-               nuidp->nu_timestamp = now;
-       else
-               nuidp->nu_timestamp.tv_usec++;
-       ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
-       ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
-
-       /*
-        * Now encrypt the timestamp verifier in ecb mode using the session
-        * key.
-        */
-#if NFSKERB
-       XXX
-#endif
-
-       *verfp++ = ktvout.tv_sec;
-       *verfp++ = ktvout.tv_usec;
-       *verfp = 0;
-       return (0);
-}
-
-/*
- * Save the current nickname in a hash list entry on the mount point.
- */
-int
-nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
-       register struct nfsmount *nmp;
-       kauth_cred_t cred;
-       int len;
-       NFSKERBKEY_T key;
-       mbuf_t *mdp;
-       char **dposp;
-       mbuf_t mrep;
-{
-       register struct nfsuid *nuidp;
-       register u_long *tl;
-       register long t1;
-       mbuf_t md = *mdp;
-       struct timeval ktvin, ktvout, now;
-       u_long nick;
-       char *dpos = *dposp, *cp2;
-       int deltasec, error = 0;
-
-       if (len == (3 * NFSX_UNSIGNED)) {
-               nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
-               ktvin.tv_sec = *tl++;
-               ktvin.tv_usec = *tl++;
-               nick = fxdr_unsigned(u_long, *tl);
-
-               /*
-                * Decrypt the timestamp in ecb mode.
-                */
-#if NFSKERB
-               XXX
-#endif
-               ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
-               ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
-               microtime(&now);
-               deltasec = now.tv_sec - ktvout.tv_sec;
-               if (deltasec < 0)
-                       deltasec = -deltasec;
-               /*
-                * If ok, add it to the hash list for the mount point.
-                */
-               if (deltasec <= NFS_KERBCLOCKSKEW) {
-                       if (nmp->nm_numuids < nuidhash_max) {
-                               nmp->nm_numuids++;
-                               MALLOC_ZONE(nuidp, struct nfsuid *,
-                                               sizeof (struct nfsuid),
-                                                       M_NFSUID, M_WAITOK);
-                       } else {
-                               nuidp = NULL;
-                       }
-                       if (!nuidp) {
-                               nuidp = nmp->nm_uidlruhead.tqh_first;
-                               if (!nuidp) {
-                                       error = ENOMEM;
-                                       goto nfsmout;
-                               }
-                               LIST_REMOVE(nuidp, nu_hash);
-                               TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
-                               kauth_cred_rele(nuidp->nu_cr);
-                       }
-                       nuidp->nu_flag = 0;
-                       kauth_cred_ref(cred);
-                       nuidp->nu_cr = cred;
-                       nuidp->nu_expire = now.tv_sec + NFS_KERBTTL;
-                       nuidp->nu_timestamp = ktvout;
-                       nuidp->nu_nickname = nick;
-                       bcopy(key, nuidp->nu_key, sizeof (key));
-                       TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
-                       LIST_INSERT_HEAD(NMUIDHASH(nmp, kauth_cred_getuid(cred)),
-                               nuidp, nu_hash);
-               }
-       } else
-               nfsm_adv(nfsm_rndup(len));
-nfsmout:
-       *mdp = md;
-       *dposp = dpos;
-       return (error);
+       nfsrv_uc_dequeue(slp);
 }
 
-#ifndef NFS_NOSERVER
-
 /*
  * cleanup and release a server socket structure.
  */
 void
-nfsrv_slpfree(struct nfssvc_sock *slp)
+nfsrv_slpfree(struct nfsrv_sock *slp)
 {
-       struct nfsuid *nuidp, *nnuidp;
        struct nfsrv_descript *nwp, *nnwp;
 
        if (slp->ns_so) {
@@ -1651,31 +1479,29 @@ nfsrv_slpfree(struct nfssvc_sock *slp)
                mbuf_freem(slp->ns_raw);
        if (slp->ns_rec)
                mbuf_freem(slp->ns_rec);
-       slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL;
-
-       for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
-           nuidp = nnuidp) {
-               nnuidp = nuidp->nu_lru.tqe_next;
-               LIST_REMOVE(nuidp, nu_hash);
-               TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
-               if (nuidp->nu_flag & NU_NAM)
-                       mbuf_freem(nuidp->nu_nam);
-               kauth_cred_rele(nuidp->nu_cr);
-               FREE_ZONE((caddr_t)nuidp,
-                               sizeof (struct nfsuid), M_NFSUID);
-       }
+       if (slp->ns_frag)
+               mbuf_freem(slp->ns_frag);
+       slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
+       slp->ns_reccnt = 0;
 
        for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
                nnwp = nwp->nd_tq.le_next;
                LIST_REMOVE(nwp, nd_tq);
-               if (nwp->nd_cr)
-                       kauth_cred_rele(nwp->nd_cr);
-               FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC);
+               nfsm_chain_cleanup(&nwp->nd_nmreq);
+               if (nwp->nd_mrep)
+                       mbuf_freem(nwp->nd_mrep);
+               if (nwp->nd_nam2)
+                       mbuf_freem(nwp->nd_nam2);
+               if (IS_VALID_CRED(nwp->nd_cr))
+                       kauth_cred_unref(&nwp->nd_cr);
+               if (nwp->nd_gss_context)
+                       nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
+               FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
        }
        LIST_INIT(&slp->ns_tq);
 
-       lck_rw_destroy(&slp->ns_rwlock, nfs_slp_rwlock_group);
-       lck_mtx_destroy(&slp->ns_wgmutex, nfs_slp_mutex_group);
+       lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
+       lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
        FREE(slp, M_NFSSVC);
 }
 
@@ -1683,143 +1509,178 @@ nfsrv_slpfree(struct nfssvc_sock *slp)
  * Derefence a server socket structure. If it has no more references and
  * is no longer valid, you can throw it away.
  */
-void
-nfsrv_slpderef(struct nfssvc_sock *slp)
+static void
+nfsrv_slpderef_locked(struct nfsrv_sock *slp)
 {
-       struct timeval now;
-
-       lck_mtx_lock(nfsd_mutex);
        lck_rw_lock_exclusive(&slp->ns_rwlock);
        slp->ns_sref--;
+
        if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
+               if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
+                       /* remove socket from queue since there's no work */
+                       if (slp->ns_flag & SLP_WAITQ)
+                               TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+                       else
+                               TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+                       slp->ns_flag &= ~SLP_QUEUED;
+               }
                lck_rw_done(&slp->ns_rwlock);
-               lck_mtx_unlock(nfsd_mutex);
                return;
        }
 
-       /* queue the socket up for deletion */
-       microuptime(&now);
-       slp->ns_timestamp = now.tv_sec;
-       TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
-       TAILQ_INSERT_TAIL(&nfssvc_deadsockhead, slp, ns_chain);
+       /* This socket is no longer valid, so we'll get rid of it */
+
+       if (slp->ns_flag & SLP_QUEUED) {
+               if (slp->ns_flag & SLP_WAITQ)
+                       TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+               else
+                       TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+               slp->ns_flag &= ~SLP_QUEUED;
+       }
        lck_rw_done(&slp->ns_rwlock);
-       if (slp == nfs_udpsock)
-               nfs_udpsock = NULL;
-#if ISO
-       else if (slp == nfs_cltpsock)
-               nfs_cltpsock = NULL;
-#endif
-       lck_mtx_unlock(nfsd_mutex);
+
+       TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+       if (slp->ns_sotype == SOCK_STREAM)
+               nfsrv_sock_tcp_cnt--;
+
+       /* now remove from the write gather socket list */ 
+       if (slp->ns_wgq.tqe_next != SLPNOLIST) {
+               TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
+               slp->ns_wgq.tqe_next = SLPNOLIST;
+       }
+       nfsrv_slpfree(slp);
 }
 
+void
+nfsrv_slpderef(struct nfsrv_sock *slp)
+{
+       lck_mtx_lock(nfsd_mutex);
+       nfsrv_slpderef_locked(slp);
+       lck_mtx_unlock(nfsd_mutex);
+}
 
 /*
- * Initialize the data structures for the server.
- * Handshake with any new nfsds starting up to avoid any chance of
- * corruption.
+ * Check periodically for idle sockest if needed and
+ * zap them.
  */
 void
-nfsrv_init(terminating)
-       int terminating;
+nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
 {
-       struct nfssvc_sock *slp, *nslp;
+       struct nfsrv_sock *slp, *tslp;
        struct timeval now;
+       time_t time_to_wait = nfsrv_sock_idle_timeout;
 
-       if (terminating) {
-               microuptime(&now);
-               for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != 0; slp = nslp) {
-                       nslp = TAILQ_NEXT(slp, ns_chain);
-                       if (slp->ns_flag & SLP_VALID) {
-                               lck_rw_lock_exclusive(&slp->ns_rwlock);
-                               nfsrv_zapsock(slp);
-                               lck_rw_done(&slp->ns_rwlock);
-                       }
-                       /* queue the socket up for deletion */
-                       slp->ns_timestamp = now.tv_sec;
-                       TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
-                       TAILQ_INSERT_TAIL(&nfssvc_deadsockhead, slp, ns_chain);
-                       if (slp == nfs_udpsock)
-                               nfs_udpsock = NULL;
-#if ISO
-                       else if (slp == nfs_cltpsock)
-                               nfs_cltpsock = NULL;
-#endif
-               }
-               nfsrv_cleancache();     /* And clear out server cache */
-/* XXX Revisit when enabling WebNFS */
-#ifdef WEBNFS_ENABLED
-       } else
-               nfs_pub.np_valid = 0;
-#else
-       }
-#endif
+       microuptime(&now);
+       lck_mtx_lock(nfsd_mutex);
 
-       if (!terminating) {
-               TAILQ_INIT(&nfssvc_sockhead);
-               TAILQ_INIT(&nfssvc_deadsockhead);
-               TAILQ_INIT(&nfsd_head);
-               nfsd_head_flag &= ~NFSD_CHECKSLP;
+       /* Turn off the timer if we're suppose to and get out */
+       if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
+           nfsrv_sock_idle_timeout = 0;
+       if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
+               nfsrv_idlesock_timer_on = 0;
+               lck_mtx_unlock(nfsd_mutex);
+               return;
        }
 
-       MALLOC(nfs_udpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
-                       M_NFSSVC, M_WAITOK);
-       if (nfs_udpsock) {
-               bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
-               lck_rw_init(&nfs_udpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr);
-               lck_mtx_init(&nfs_udpsock->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr);
-               TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
-               TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
-       } else {
-               printf("nfsrv_init() failed to allocate UDP socket\n");
+       TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
+               lck_rw_lock_exclusive(&slp->ns_rwlock);
+               /* Skip udp and referenced sockets */
+               if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
+                       lck_rw_done(&slp->ns_rwlock);
+                       continue;
+               }
+               /*
+                * If this is the first non-referenced socket that hasn't idle out,
+                * use its time stamp to calculate the earlist time in the future
+                * to start the next invocation of the timer. Since the nfsrv_socklist
+                * is sorted oldest access to newest. Once we find the first one,
+                * we're done and break out of the loop.
+                */
+               if (((slp->ns_timestamp + nfsrv_sock_idle_timeout)  >  now.tv_sec) ||
+                       nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+                       time_to_wait -= now.tv_sec - slp->ns_timestamp;
+                       if (time_to_wait < 1)
+                               time_to_wait = 1;
+                       lck_rw_done(&slp->ns_rwlock);
+                       break;
+               }
+               /*
+                * Bump the ref count. nfsrv_slpderef below will destroy
+                * the socket, since nfsrv_zapsock has closed it.
+                */
+               slp->ns_sref++;
+               nfsrv_zapsock(slp);
+               lck_rw_done(&slp->ns_rwlock);
+               nfsrv_slpderef_locked(slp);
        }
 
-#if ISO
-       MALLOC(nfs_cltpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
-                       M_NFSSVC, M_WAITOK);
-       if (nfs_cltpsock) {
-               bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
-               lck_rw_init(&nfs_cltpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr);
-               lck_mtx_init(&nfs_cltpsock->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr);
-               TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
-               TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
-       } else {
-               printf("nfsrv_init() failed to allocate CLTP socket\n");
-       }
-#endif
+       /* Start ourself back up */
+       nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+       /* Remember when the next timer will fire for nfssvc_addsock. */
+       nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+       lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
- * Add entries to the server monitor log.
+ * Clean up the data structures for the server.
  */
-static void
-nfsd_rt(sotype, nd, cacherep)
-       int sotype;
-       register struct nfsrv_descript *nd;
-       int cacherep;
+void
+nfsrv_cleanup(void)
 {
-       register struct drt *rt;
+       struct nfsrv_sock *slp, *nslp;
        struct timeval now;
+#if CONFIG_FSE
+       struct nfsrv_fmod *fp, *nfp;
+       int i;
+#endif
 
-       rt = &nfsdrt.drt[nfsdrt.pos];
-       if (cacherep == RC_DOIT)
-               rt->flag = 0;
-       else if (cacherep == RC_REPLY)
-               rt->flag = DRT_CACHEREPLY;
-       else
-               rt->flag = DRT_CACHEDROP;
-       if (sotype == SOCK_STREAM)
-               rt->flag |= DRT_TCP;
-       else if (nd->nd_flag & ND_NFSV3)
-               rt->flag |= DRT_NFSV3;
-       rt->proc = nd->nd_procnum;
-       if (((struct sockaddr *)mbuf_data(nd->nd_nam))->sa_family == AF_INET)
-           rt->ipadr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr;
-       else
-           rt->ipadr = INADDR_ANY;
        microuptime(&now);
-       rt->resptime = ((now.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
-               (now.tv_usec - nd->nd_starttime.tv_usec);
-       microtime(&rt->tstamp); // XXX unused
-       nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+       for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
+               nslp = TAILQ_NEXT(slp, ns_chain);
+               lck_rw_lock_exclusive(&slp->ns_rwlock);
+               slp->ns_sref++;
+               if (slp->ns_flag & SLP_VALID)
+                       nfsrv_zapsock(slp);
+               lck_rw_done(&slp->ns_rwlock);
+               nfsrv_slpderef_locked(slp);
+       }
+#
+#if CONFIG_FSE
+       /*
+        * Flush pending file write fsevents
+        */
+       lck_mtx_lock(nfsrv_fmod_mutex);
+       for (i = 0; i < NFSRVFMODHASHSZ; i++) {
+               for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
+                       /*
+                        * Fire off the content modified fsevent for each
+                        * entry, remove it from the list, and free it.
+                        */
+                       if (nfsrv_fsevents_enabled) {
+                               fp->fm_context.vc_thread = current_thread();
+                               add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
+                                               FSE_ARG_VNODE, fp->fm_vp,
+                                               FSE_ARG_DONE);
+                       }
+                       vnode_put(fp->fm_vp);
+                       kauth_cred_unref(&fp->fm_context.vc_ucred);
+                       nfp = LIST_NEXT(fp, fm_link);
+                       LIST_REMOVE(fp, fm_link);
+                       FREE(fp, M_TEMP);
+               }
+       }
+       nfsrv_fmod_pending = 0;
+       lck_mtx_unlock(nfsrv_fmod_mutex);
+#endif
+
+       nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
+       
+       nfs_gss_svc_cleanup();  /* Remove any RPCSEC_GSS contexts */
+
+       nfsrv_cleancache();     /* And clear out server cache */
+
+       nfsrv_udpsock = NULL;
+       nfsrv_udp6sock = NULL;
 }
+
 #endif /* NFS_NOSERVER */