X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/de355530ae67247cbd0da700edb3a2a1dae884c2..3e170ce000f1506b7b5d2c5c7faec85ceabb573d:/bsd/nfs/nfs_syscalls.c

diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c
index 852257d43..12daa5588 100644
--- a/bsd/nfs/nfs_syscalls.c
+++ b/bsd/nfs/nfs_syscalls.c
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc.  All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
 /*
@@ -58,93 +64,427 @@
  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
  * FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
  */
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections.  This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
 
 #include <sys/param.h>
 #include <sys/systm.h>
-/* XXX CSM 11/25/97 FreeBSD's generated syscall prototypes */
-#ifdef notyet
-#include <sys/sysproto.h>
-#endif
 #include <sys/kernel.h>
-#include <sys/file.h>
+#include <sys/file_internal.h>
 #include <sys/filedesc.h>
 #include <sys/stat.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/proc.h>
+#include <sys/vnode_internal.h>
+#include <sys/mount_internal.h>
+#include <sys/proc_internal.h> /* for fdflags */
+#include <sys/kauth.h>
 #include <sys/sysctl.h>
+#include <sys/ubc.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
-#include <sys/buf.h>
-#include <sys/mbuf.h>
+#include <sys/kpi_mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
-#include <sys/namei.h>
+#include <sys/fcntl.h>
+#include <sys/lockf.h>
 #include <sys/syslog.h>
 #include <sys/user.h>
-#include <machine/spl.h>
+#include <sys/sysproto.h>
+#include <sys/kpi_socket.h>
+#include <sys/fsevents.h>
+#include <libkern/OSAtomic.h>
+#include <kern/thread_call.h>
+#include <kern/task.h>
+
+#include <security/audit/audit.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
-#if ISO
-#include <netiso/iso.h>
-#endif
 #include <nfs/xdr_subs.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfsrvcache.h>
+#include <nfs/nfs_gss.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nfsnode.h>
-#include <nfs/nqnfs.h>
-#include <nfs/nfsrtt.h>
-
-
-/* Global defs. */
-extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd,
-					    struct nfssvc_sock *slp,
-					    struct proc *procp,
-					    struct mbuf **mreqp));
-extern int nfs_numasync;
-extern time_t nqnfsstarttime;
-extern int nqsrv_writeslack;
-extern int nfsrtton;
-extern struct nfsstats nfsstats;
-extern int nfsrvw_procrastinate;
-extern int nfsrvw_procrastinate_v3;
-struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
-static int nuidhash_max = NFS_MAXUIDHASH;
-
-static void	nfsrv_zapsock __P((struct nfssvc_sock *slp));
-static int	nfssvc_iod __P((struct proc *));
-
-#define	TRUE	1
-#define	FALSE	0
-
-static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
-
-#ifndef NFS_NOSERVER
-int nfsd_waiting = 0;
-static struct nfsdrt nfsdrt;
-static int nfs_numnfsd = 0;
-static int notstarted = 1;
-static int modify_flag = 0;
-static void	nfsd_rt __P((int sotype, struct nfsrv_descript *nd,
-			     int cacherep));
-static int	nfssvc_addsock __P((struct file *, struct mbuf *,
-				    struct proc *));
-static int	nfssvc_nfsd __P((struct nfsd_srvargs *,caddr_t,struct proc *));
-
-static int nfs_privport = 0;
-/* XXX CSM 11/25/97 Upgrade sysctl.h someday */
-#ifdef notyet
-SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
+#include <nfs/nfs_lock.h>
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+kern_return_t	thread_terminate(thread_t); /* XXX */
+
+#if NFSSERVER
+
+extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
+					    struct nfsrv_sock *slp,
+					    vfs_context_t ctx,
+					    mbuf_t *mrepp);
+extern int nfsrv_wg_delay;
+extern int nfsrv_wg_delay_v3;
+
+static int nfsrv_require_resv_port = 0;
+static time_t  nfsrv_idlesock_timer_on = 0;
+static int nfsrv_sock_tcp_cnt = 0;
+#define NFSD_MIN_IDLE_TIMEOUT 30
+static int nfsrv_sock_idle_timeout = 3600; /* One hour */
+
+int	nfssvc_export(user_addr_t argp);
+int	nfssvc_nfsd(void);
+int	nfssvc_addsock(socket_t, mbuf_t);
+void	nfsrv_zapsock(struct nfsrv_sock *);
+void	nfsrv_slpderef(struct nfsrv_sock *);
+void	nfsrv_slpfree(struct nfsrv_sock *);
+
+#endif /* NFSSERVER */
+
+/*
+ * sysctl stuff
+ */
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
+
+#if NFSCLIENT
+SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
+#endif /* NFSCLIENT */
+
+#if NFSSERVER
+SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
+#if CONFIG_FSE
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
 #endif
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
+#ifdef NFS_UC_Q_DEBUG
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
+#endif
+#endif /* NFSSERVER */
+
+
+#if NFSCLIENT
+
+static int
+mapname2id(struct nfs_testmapid *map)
+{
+	int error;
+
+	error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
+	if (error)
+		return (error);
+
+	if (map->ntm_grpflag)
+		error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
+	else
+		error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
+
+	return (error);
+}
+
+static int
+mapid2name(struct nfs_testmapid *map)
+{
+	int error;
+	int len = sizeof(map->ntm_name);
+	
+	if (map->ntm_grpflag)
+		error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
+	else
+		error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
+
+	if (error)
+		return (error);
+	
+	error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
+
+	return (error);
+	
+}
+
+
+static int
+nfsclnt_testidmap(proc_t p, user_addr_t argp)
+{
+	struct nfs_testmapid mapid;
+	int error, coerror;
+		
+        /* Let root make this call. */
+	error = proc_suser(p);
+        if (error)
+                return (error);
+
+	error = copyin(argp, &mapid, sizeof(mapid));
+	if (error)
+		return (error);
+	if (mapid.ntm_name2id)
+		error = mapname2id(&mapid);
+	else
+		error = mapid2name(&mapid);
+
+	coerror = copyout(&mapid, argp, sizeof(mapid));
+
+	return (error ? error : coerror);
+}
+
+int
+nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
+{
+	struct lockd_ans la;
+	int error;
+
+	switch (uap->flag) {
+	case NFSCLNT_LOCKDANS:
+		error = copyin(uap->argp, &la, sizeof(la));
+		if (!error)
+			error = nfslockdans(p, &la);
+		break;
+	case NFSCLNT_LOCKDNOTIFY:
+		error = nfslockdnotify(p, uap->argp);
+		break;
+	case NFSCLNT_TESTIDMAP:
+		error = nfsclnt_testidmap(p, uap->argp);
+		break;
+	default:
+		error = EINVAL;
+	}
+	return (error);
+}
+
+
+/*
+ * Asynchronous I/O threads for client NFS.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ *
+ * The pool of up to nfsiod_thread_max threads is launched on demand and exit
+ * when unused for a while.  There are as many nfsiod structs as there are
+ * nfsiod threads; however there's no strict tie between a thread and a struct.
+ * Each thread puts an nfsiod on the free list and sleeps on it.  When it wakes
+ * up, it removes the next struct nfsiod from the queue and services it.  Then
+ * it will put the struct at the head of free list and sleep on it.
+ * Async requests will pull the next struct nfsiod from the head of the free list,
+ * put it on the work queue, and wake whatever thread is waiting on that struct.
+ */
+
+/*
+ * nfsiod thread exit routine
+ *
+ * Must be called with nfsiod_mutex held so that the
+ * decision to terminate is atomic with the termination.
+ */
+void
+nfsiod_terminate(struct nfsiod *niod)
+{
+	nfsiod_thread_count--;
+	lck_mtx_unlock(nfsiod_mutex);
+	if (niod)
+		FREE(niod, M_TEMP);
+	else
+		printf("nfsiod: terminating without niod\n");
+	thread_terminate(current_thread());
+	/*NOTREACHED*/
+}
+
+/* nfsiod thread startup routine */
+void
+nfsiod_thread(void)
+{
+	struct nfsiod *niod;
+	int error;
+
+	MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
+	if (!niod) {
+		lck_mtx_lock(nfsiod_mutex);
+		nfsiod_thread_count--;
+		wakeup(current_thread());
+		lck_mtx_unlock(nfsiod_mutex);
+		thread_terminate(current_thread());
+		/*NOTREACHED*/
+	}
+	bzero(niod, sizeof(*niod));
+	lck_mtx_lock(nfsiod_mutex);
+	TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
+	wakeup(current_thread());
+	error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
+	/* shouldn't return... so we have an error */
+	/* remove an old nfsiod struct and terminate */
+	lck_mtx_lock(nfsiod_mutex);
+	if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+		TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+	nfsiod_terminate(niod);
+	/*NOTREACHED*/
+}
+
+/*
+ * Start up another nfsiod thread.
+ * (unless we're already maxed out and there are nfsiods running)
+ */
+int
+nfsiod_start(void)
+{
+	thread_t thd = THREAD_NULL;
+
+	lck_mtx_lock(nfsiod_mutex);
+	if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
+		lck_mtx_unlock(nfsiod_mutex);
+		return (EBUSY);
+	}
+	nfsiod_thread_count++;
+	if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
+		lck_mtx_unlock(nfsiod_mutex);
+		return (EBUSY);
+	}
+	/* wait for the thread to complete startup */
+	msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
+	thread_deallocate(thd);
+	return (0);
+}
+
+/*
+ * Continuation for Asynchronous I/O threads for NFS client.
+ *
+ * Grab an nfsiod struct to work on, do some work, then drop it
+ */
+int
+nfsiod_continue(int error)
+{
+	struct nfsiod *niod;
+	struct nfsmount *nmp;
+	struct nfsreq *req, *treq;
+	struct nfs_reqqhead iodq;
+	int morework;
+
+	lck_mtx_lock(nfsiod_mutex);
+	niod = TAILQ_FIRST(&nfsiodwork);
+	if (!niod) {
+		/* there's no work queued up */
+		/* remove an old nfsiod struct and terminate */
+		if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+			TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+		nfsiod_terminate(niod);
+		/*NOTREACHED*/
+	}
+	TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
+
+worktodo:
+	while ((nmp = niod->niod_nmp)) {
+		if (nmp == NULL){
+			niod->niod_nmp = NULL;
+			break;
+		}
+
+		/* 
+		 * Service this mount's async I/O queue.
+		 *
+		 * In order to ensure some level of fairness between mounts,
+		 * we grab all the work up front before processing it so any
+		 * new work that arrives will be serviced on a subsequent
+		 * iteration - and we have a chance to see if other work needs
+		 * to be done (e.g. the delayed write queue needs to be pushed
+		 * or other mounts are waiting for an nfsiod).
+		 */
+		/* grab the current contents of the queue */
+		TAILQ_INIT(&iodq);
+		TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
+		/* Mark each iod request as being managed by an iod */
+		TAILQ_FOREACH(req, &iodq, r_achain) {
+			lck_mtx_lock(&req->r_mtx);
+			assert(!(req->r_flags & R_IOD));
+			req->r_flags |= R_IOD;
+			lck_mtx_unlock(&req->r_mtx);
+		}
+		lck_mtx_unlock(nfsiod_mutex);
+
+		/* process the queue */
+		TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
+			TAILQ_REMOVE(&iodq, req, r_achain);
+			req->r_achain.tqe_next = NFSREQNOLIST;
+			req->r_callback.rcb_func(req);
+		}
+
+		/* now check if there's more/other work to be done */
+		lck_mtx_lock(nfsiod_mutex);
+		morework = !TAILQ_EMPTY(&nmp->nm_iodq);
+		if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
+			/* 
+			 * we're going to stop working on this mount but if the 
+			 * mount still needs more work so queue it up
+			 */
+			if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST)
+				TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
+			nmp->nm_niod = NULL;
+			niod->niod_nmp = NULL;
+		}
+	}
+
+	/* loop if there's still a mount to work on */
+	if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
+		niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
+		TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
+		niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
+	}
+	if (niod->niod_nmp)
+		goto worktodo;
+
+	/* queue ourselves back up - if there aren't too many threads running */
+	if (nfsiod_thread_count <= NFSIOD_MAX) {
+		TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
+		error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
+		/* shouldn't return... so we have an error */
+		/* remove an old nfsiod struct and terminate */
+		lck_mtx_lock(nfsiod_mutex);
+		if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+			TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+	}
+	nfsiod_terminate(niod);
+	/*NOTREACHED*/
+	return (0);
+}
+
+#endif /* NFSCLIENT */
+
+
+#if NFSSERVER
 
 /*
  * NFS server system calls
@@ -154,626 +494,859 @@ SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate
 /*
  * Get file handle system call
  */
-#ifndef _SYS_SYSPROTO_H_
-struct getfh_args {
-	char	*fname;
-	fhandle_t *fhp;
-};
-#endif
 int
-getfh(p, uap)
-	struct proc *p;
-	register struct getfh_args *uap;
+getfh(proc_t p, struct getfh_args *uap, __unused int *retval)
 {
-	register struct vnode *vp;
-	fhandle_t fh;
-	int error;
+	vnode_t vp;
+	struct nfs_filehandle nfh;
+	int error, fhlen, fidlen;
 	struct nameidata nd;
+	char path[MAXPATHLEN], *ptr;
+	size_t pathlen;
+	struct nfs_exportfs *nxfs;
+	struct nfs_export *nx;
 
 	/*
 	 * Must be super user
 	 */
-	error = suser(p->p_ucred, &p->p_acflag);
-	if(error)
+	error = proc_suser(p);
+	if (error)
+		return (error);
+
+	error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
+	if (!error)
+		error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
+	if (error)
 		return (error);
-	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+	/* limit fh size to length specified (or v3 size by default) */
+	if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
+		fhlen = NFSV3_MAX_FH_SIZE;
+	fidlen = fhlen - sizeof(struct nfs_exphandle);
+
+	if (!nfsrv_is_initialized())
+		return (EINVAL);
+
+	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 
+			UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
 	error = namei(&nd);
 	if (error)
 		return (error);
+	nameidone(&nd);
+
 	vp = nd.ni_vp;
-	bzero((caddr_t)&fh, sizeof(fh));
-	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
-	error = VFS_VPTOFH(vp, &fh.fh_fid);
-	vput(vp);
+
+	// find exportfs that matches f_mntonname
+	lck_rw_lock_shared(&nfsrv_export_rwlock);
+	ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
+	LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
+		if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
+			break;
+	}
+	if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
+		error = EINVAL;
+		goto out;
+	}
+	// find export that best matches remainder of path
+	ptr = path + strlen(nxfs->nxfs_path);
+	while (*ptr && (*ptr == '/'))
+		ptr++;
+	LIST_FOREACH(nx, &nxfs->nxfs_exports, nx_next) {
+		int len = strlen(nx->nx_path);
+		if (len == 0)  // we've hit the export entry for the root directory
+			break;
+		if (!strncmp(nx->nx_path, ptr, len))
+			break;
+	}
+	if (!nx) {
+		error = EINVAL;
+		goto out;
+	}
+
+	bzero(&nfh, sizeof(nfh));
+	nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
+	nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
+	nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
+	nfh.nfh_xh.nxh_flags = 0;
+	nfh.nfh_xh.nxh_reserved = 0;
+	nfh.nfh_len = fidlen;
+	error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
+	if (nfh.nfh_len > (uint32_t)fidlen)
+		error = EOVERFLOW;
+	nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
+	nfh.nfh_len += sizeof(nfh.nfh_xh);
+	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
+
+out:
+	lck_rw_done(&nfsrv_export_rwlock);
+	vnode_put(vp);
 	if (error)
 		return (error);
-	error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+	error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
 	return (error);
 }
 
-#endif /* NFS_NOSERVER */
+extern const struct fileops vnops;
+
 /*
- * Nfs server psuedo system call for the nfsd's
- * Based on the flag value it either:
- * - adds a socket to the selection list
- * - remains in the kernel as an nfsd
- * - remains in the kernel as an nfsiod
+ * syscall for the rpc.lockd to use to translate a NFS file handle into
+ * an open descriptor.
+ *
+ * warning: do not remove the suser() call or this becomes one giant
+ * security hole.
  */
-#ifndef _SYS_SYSPROTO_H_
-struct nfssvc_args {
-	int flag;
-	caddr_t argp;
-};
-#endif
 int
-nfssvc(p, uap)
-	struct proc *p;
-	register struct nfssvc_args *uap;
+fhopen( proc_t p,
+	struct fhopen_args *uap,
+	int32_t *retval)
 {
-#ifndef NFS_NOSERVER
-	struct nameidata nd;
-	struct file *fp;
-	struct mbuf *nam;
-	struct nfsd_args nfsdarg;
-	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
-	struct nfsd_cargs ncd;
-	struct nfsd *nfsd;
-	struct nfssvc_sock *slp;
-	struct nfsuid *nuidp;
-	struct nfsmount *nmp;
-#endif /* NFS_NOSERVER */
-	int error;
+	vnode_t vp;
+	struct nfs_filehandle nfh;
+	struct nfs_export *nx;
+	struct nfs_export_options *nxo;
+	struct flock lf;
+	struct fileproc *fp, *nfp;
+	int fmode, error, type;
+	int indx;
+	vfs_context_t ctx = vfs_context_current();
+	kauth_action_t action;
 
 	/*
 	 * Must be super user
 	 */
-	error = suser(p->p_ucred, &p->p_acflag);
-	if(error)
+	error = suser(vfs_context_ucred(ctx), 0);
+	if (error) {
 		return (error);
-	while (nfssvc_sockhead_flag & SLP_INIT) {
-		 nfssvc_sockhead_flag |= SLP_WANTINIT;
-		(void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
 	}
-	if (uap->flag & NFSSVC_BIOD)
-		error = nfssvc_iod(p);
-#ifdef NFS_NOSERVER
-	else
-		error = ENXIO;
-#else /* !NFS_NOSERVER */
-	else if (uap->flag & NFSSVC_MNTD) {
-		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
-		if (error)
-			return (error);
-		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
-			ncd.ncd_dirp, p);
-		error = namei(&nd);
-		if (error)
-			return (error);
-		if ((nd.ni_vp->v_flag & VROOT) == 0)
-			error = EINVAL;
-		nmp = VFSTONFS(nd.ni_vp->v_mount);
-		vput(nd.ni_vp);
-		if (error)
-			return (error);
 
-		/* disable split funnels now */
-		thread_funnel_merge(kernel_flock, network_flock);
-
-		if ((nmp->nm_flag & NFSMNT_MNTD) &&
-			(uap->flag & NFSSVC_GOTAUTH) == 0)
-			return (0);
-		nmp->nm_flag |= NFSMNT_MNTD;
-		error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
-			uap->argp, p);
-	} else if (uap->flag & NFSSVC_ADDSOCK) {
-		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
-		if (error)
-			return (error);
-		error = getsock(p->p_fd, nfsdarg.sock, &fp);
-		if (error)
+	if (!nfsrv_is_initialized()) {
+		return (EINVAL);
+	}
+
+	fmode = FFLAGS(uap->flags);
+	/* why not allow a non-read/write open for our lockd? */
+	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
+		return (EINVAL);
+
+	error = copyin(uap->u_fhp, &nfh.nfh_len, sizeof(nfh.nfh_len));
+	if (error)
+		return (error);
+	if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
+	    (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
+		return (EINVAL);
+	error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
+	if (error)
+		return (error);
+	nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
+
+	lck_rw_lock_shared(&nfsrv_export_rwlock);
+	/* now give me my vnode, it gets returned to me with a reference */
+	error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
+	lck_rw_done(&nfsrv_export_rwlock);
+	if (error) {
+		if (error == NFSERR_TRYLATER)
+			error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
+		return (error);
+	}
+
+	/*
+	 * From now on we have to make sure not
+	 * to forget about the vnode.
+	 * Any error that causes an abort must vnode_put(vp).
+	 * Just set error = err and 'goto bad;'.
+	 */
+
+	/*
+	 * from vn_open  
+	 */      
+	if (vnode_vtype(vp) == VSOCK) {
+		error = EOPNOTSUPP;
+		goto bad;      
+	}
+
+	/* disallow write operations on directories */
+	if (vnode_isdir(vp) && (fmode & (FWRITE | O_TRUNC))) {
+		error = EISDIR;
+		goto bad;
+	}
+
+	/* compute action to be authorized */
+	action = 0;
+	if (fmode & FREAD)
+		action |= KAUTH_VNODE_READ_DATA;
+	if (fmode & (FWRITE | O_TRUNC))
+		action |= KAUTH_VNODE_WRITE_DATA;
+	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
+		goto bad;
+
+	if ((error = VNOP_OPEN(vp, fmode, ctx)))
+		goto bad;
+	if ((error = vnode_ref_ext(vp, fmode, 0)))
+		goto bad;
+
+	/*
+	 * end of vn_open code
+	 */
+
+	// starting here... error paths should call vn_close/vnode_put
+	if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
+		vn_close(vp, fmode & FMASK, ctx);
+		goto bad;
+	}
+	fp = nfp;
+
+	fp->f_fglob->fg_flag = fmode & FMASK;
+	fp->f_fglob->fg_ops = &vnops;
+	fp->f_fglob->fg_data = (caddr_t)vp;
+
+	// XXX do we really need to support this with fhopen()?
+	if (fmode & (O_EXLOCK | O_SHLOCK)) {
+		lf.l_whence = SEEK_SET;
+		lf.l_start = 0;
+		lf.l_len = 0;
+		if (fmode & O_EXLOCK)
+			lf.l_type = F_WRLCK;
+		else
+			lf.l_type = F_RDLCK;
+		type = F_FLOCK;
+		if ((fmode & FNONBLOCK) == 0)
+			type |= F_WAIT;
+		if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
+			struct vfs_context context = *vfs_context_current();
+			/* Modify local copy (to not damage thread copy) */
+			context.vc_ucred = fp->f_fglob->fg_cred;
+
+			vn_close(vp, fp->f_fglob->fg_flag, &context);
+			fp_free(p, indx, fp);
 			return (error);
-		/*
-		 * Get the client address for connected sockets.
-		 */
-		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
-			nam = (struct mbuf *)0;
-		else {
-			error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
-				MT_SONAME);
-			if (error)
-				return (error);
 		}
-		error = nfssvc_addsock(fp, nam, p);
-	} else {
-		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
-		if (error)
-			return (error);
+		fp->f_fglob->fg_flag |= FHASLOCK;
+	}
 
-		/* disable split funnels now */
-		thread_funnel_merge(kernel_flock, network_flock);
+	vnode_put(vp);
 
-		if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
-			(nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
-			slp = nfsd->nfsd_slp;
+	proc_fdlock(p);
+	procfdtbl_releasefd(p, indx, NULL);
+	fp_drop(p, indx, fp, 1);
+	proc_fdunlock(p);
 
-			/*
-			 * First check to see if another nfsd has already
-			 * added this credential.
-			 */
-			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
-			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
-				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
-				    (!nfsd->nfsd_nd->nd_nam2 ||
-				     netaddr_match(NU_NETFAM(nuidp),
-				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
-					break;
+	*retval = indx;
+	return (0);
+
+bad:
+	vnode_put(vp);
+	return (error);
+}
+
+/*
+ * NFS server pseudo system call
+ */
+int
+nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
+{
+	mbuf_t nam;
+	struct user_nfsd_args user_nfsdarg;
+	socket_t so;
+	int error;
+
+	AUDIT_ARG(cmd, uap->flag);
+
+	/*
+	 * Must be super user for most operations (export ops checked later).
+	 */
+	if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
+		return (error);
+#if CONFIG_MACF
+	error = mac_system_check_nfsd(kauth_cred_get());
+	if (error)
+		return (error);
+#endif
+
+	/* make sure NFS server data structures have been initialized */
+	nfsrv_init();
+
+	if (uap->flag & NFSSVC_ADDSOCK) {
+		if (IS_64BIT_PROCESS(p)) {
+			error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
+		} else {
+			struct nfsd_args    tmp_args;
+			error = copyin(uap->argp, (caddr_t)&tmp_args, sizeof(tmp_args));
+			if (error == 0) {
+				user_nfsdarg.sock = tmp_args.sock;
+				user_nfsdarg.name = CAST_USER_ADDR_T(tmp_args.name);
+				user_nfsdarg.namelen = tmp_args.namelen;
 			}
-			if (nuidp) {
-			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
-			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
-			} else {
-			    /*
-			     * Nope, so we will.
-			     */
-			    if (slp->ns_numuids < nuidhash_max) {
-				slp->ns_numuids++;
-				nuidp = (struct nfsuid *)
-				   _MALLOC_ZONE(sizeof (struct nfsuid),
-							M_NFSUID, M_WAITOK);
-			    } else
-				nuidp = (struct nfsuid *)0;
-			    if ((slp->ns_flag & SLP_VALID) == 0) {
-				if (nuidp)
-				    _FREE_ZONE((caddr_t)nuidp,
-					sizeof (struct nfsuid), M_NFSUID);
-			    } else {
-				if (nuidp == (struct nfsuid *)0) {
-				    nuidp = slp->ns_uidlruhead.tqh_first;
-				    LIST_REMOVE(nuidp, nu_hash);
-				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
-					nu_lru);
-				    if (nuidp->nu_flag & NU_NAM)
-					m_freem(nuidp->nu_nam);
-			        }
-				nuidp->nu_flag = 0;
-				nuidp->nu_cr = nsd->nsd_cr;
-				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
-				    nuidp->nu_cr.cr_ngroups = NGROUPS;
-				nuidp->nu_cr.cr_ref = 1;
-				nuidp->nu_timestamp = nsd->nsd_timestamp;
-				nuidp->nu_expire = time.tv_sec + nsd->nsd_ttl;
-				/*
-				 * and save the session key in nu_key.
-				 */
-				bcopy(nsd->nsd_key, nuidp->nu_key,
-				    sizeof (nsd->nsd_key));
-				if (nfsd->nfsd_nd->nd_nam2) {
-				    struct sockaddr_in *saddr;
-
-				    saddr = mtod(nfsd->nfsd_nd->nd_nam2,
-					 struct sockaddr_in *);
-				    switch (saddr->sin_family) {
-				    case AF_INET:
-					nuidp->nu_flag |= NU_INETADDR;
-					nuidp->nu_inetaddr =
-					     saddr->sin_addr.s_addr;
-					break;
-				    case AF_ISO:
-				    default:
-					nuidp->nu_flag |= NU_NAM;
-					nuidp->nu_nam = m_copym(
-					    nfsd->nfsd_nd->nd_nam2, 0,
-					     M_COPYALL, M_WAIT);
-					break;
-				    };
-				}
-				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
-					nu_lru);
-				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
-					nuidp, nu_hash);
-				nfsrv_setcred(&nuidp->nu_cr,
-				    &nfsd->nfsd_nd->nd_cr);
-				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
-			    }
+		}
+		if (error)
+			return (error);
+		/* get the socket */
+		error = file_socket(user_nfsdarg.sock, &so);
+		if (error)
+			return (error);
+		/* Get the client address for connected sockets. */
+		if (user_nfsdarg.name == USER_ADDR_NULL || user_nfsdarg.namelen == 0) {
+			nam = NULL;
+		} else {
+			error = sockargs(&nam, user_nfsdarg.name, user_nfsdarg.namelen, MBUF_TYPE_SONAME);
+			if (error) {
+				/* drop the iocount file_socket() grabbed on the file descriptor */
+				file_drop(user_nfsdarg.sock);
+				return (error);
 			}
 		}
-		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
-			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
-		error = nfssvc_nfsd(nsd, uap->argp, p);
+		/*
+		 * nfssvc_addsock() will grab a retain count on the socket
+		 * to keep the socket from being closed when nfsd closes its
+		 * file descriptor for it.
+		 */
+		error = nfssvc_addsock(so, nam);
+		/* drop the iocount file_socket() grabbed on the file descriptor */
+		file_drop(user_nfsdarg.sock);
+	} else if (uap->flag & NFSSVC_NFSD) {
+		error = nfssvc_nfsd();
+	} else if (uap->flag & NFSSVC_EXPORT) {
+		error = nfssvc_export(uap->argp);
+	} else {
+		error = EINVAL;
 	}
-#endif /* NFS_NOSERVER */
 	if (error == EINTR || error == ERESTART)
 		error = 0;
 	return (error);
 }
 
-#ifndef NFS_NOSERVER
 /*
  * Adds a socket to the list for servicing by nfsds.
  */
-static int
-nfssvc_addsock(fp, mynam, p)
-	struct file *fp;
-	struct mbuf *mynam;
-	struct proc *p;
+int
+nfssvc_addsock(socket_t so, mbuf_t mynam)
 {
-	register struct mbuf *m;
-	register int siz;
-	register struct nfssvc_sock *slp;
-	register struct socket *so;
-	struct nfssvc_sock *tslp;
-	int error, s;
-
-	so = (struct socket *)fp->f_data;
-	tslp = (struct nfssvc_sock *)0;
-	/*
-	 * Add it to the list, as required.
-	 */
-	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
-	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
-		tslp = nfs_udpsock;
-		if (tslp->ns_flag & SLP_VALID) {
-			m_freem(mynam);
-			thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-			return (EPERM);
-		}
-#if ISO
-	} else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
-		tslp = nfs_cltpsock;
-		if (tslp->ns_flag & SLP_VALID) {
-			m_freem(mynam);
-			thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-			return (EPERM);
-		}
-#endif /* ISO */
+	struct nfsrv_sock *slp;
+	int error = 0, sodomain, sotype, soprotocol, on = 1;
+	int first;
+	struct timeval timeo;
+
+	/* make sure mbuf constants are set up */
+	if (!nfs_mbuf_mhlen)
+		nfs_mbuf_init();
+
+	sock_gettype(so, &sodomain, &sotype, &soprotocol);
+
+	/* There should be only one UDP socket for each of IPv4 and IPv6 */
+	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+		mbuf_freem(mynam);
+		return (EEXIST);
 	}
-	if (so->so_type == SOCK_STREAM)
-		siz = NFS_MAXPACKET + sizeof (u_long);
-	else
-		siz = NFS_MAXPACKET;
-	error = soreserve(so, siz, siz);
-	if (error) {
-		m_freem(mynam);
-		thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-		return (error);
+	if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
+		mbuf_freem(mynam);
+		return (EEXIST);
+	}
+
+	/* Set protocol options and reserve some space (for UDP). */
+	if (sotype == SOCK_STREAM) {
+		error = nfsrv_check_exports_allow_address(mynam);
+		if (error)
+			return (error);
+		sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+	}
+	if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
+		sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+	if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
+		int reserve = NFS_UDPSOCKBUF;
+		error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
+		error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
+		if (error) {
+			log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
+			error = 0;
+		}
 	}
+	sock_nointerrupt(so, 0);
 
 	/*
-	 * Set protocol specific options { for now TCP only } and
-	 * reserve some space. For datagram sockets, this can get called
-	 * repeatedly for the same socket, but that isn't harmful.
+	 * Set socket send/receive timeouts.
+	 * Receive timeout shouldn't matter, but setting the send timeout
+	 * will make sure that an unresponsive client can't hang the server.
 	 */
-	if (so->so_type == SOCK_STREAM) {
-		struct sockopt sopt;
-		int val;
-
-		bzero(&sopt, sizeof sopt);
-		sopt.sopt_level = SOL_SOCKET;
-		sopt.sopt_name = SO_KEEPALIVE;
-		sopt.sopt_val = &val;
-		sopt.sopt_valsize = sizeof val;
-		val = 1;
-		sosetopt(so, &sopt);
+	timeo.tv_usec = 0;
+	timeo.tv_sec = 1;
+	error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+	timeo.tv_sec = 30;
+	error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+	if (error) {
+		log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
+		error = 0;
 	}
-	if (so->so_proto->pr_domain->dom_family == AF_INET &&
-	    so->so_proto->pr_protocol == IPPROTO_TCP) {
-		struct sockopt sopt;
-		int val;
-
-		bzero(&sopt, sizeof sopt);
-		sopt.sopt_level = IPPROTO_TCP;
-		sopt.sopt_name = TCP_NODELAY;
-		sopt.sopt_val = &val;
-		sopt.sopt_valsize = sizeof val;
-		val = 1;
-		sosetopt(so, &sopt);
+
+	MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
+	if (!slp) {
+		mbuf_freem(mynam);
+		return (ENOMEM);
+	}
+	bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
+	lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
+	lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
+
+	lck_mtx_lock(nfsd_mutex);
+
+	if (soprotocol == IPPROTO_UDP) {
+		if (sodomain == AF_INET) {
+			/* There should be only one UDP/IPv4 socket */
+			if (nfsrv_udpsock) {
+				lck_mtx_unlock(nfsd_mutex);
+				nfsrv_slpfree(slp);
+				mbuf_freem(mynam);
+				return (EEXIST);
+			}
+			nfsrv_udpsock = slp;
+		}
+		if (sodomain == AF_INET6) {
+			/* There should be only one UDP/IPv6 socket */
+			if (nfsrv_udp6sock) {
+				lck_mtx_unlock(nfsd_mutex);
+				nfsrv_slpfree(slp);
+				mbuf_freem(mynam);
+				return (EEXIST);
+			}
+			nfsrv_udp6sock = slp;
+		}
 	}
 
-	so->so_rcv.sb_flags &= ~SB_NOINTR;
-	so->so_rcv.sb_timeo = 0;
-	so->so_snd.sb_flags &= ~SB_NOINTR;
-	so->so_snd.sb_timeo = 0;
-	thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-	if (tslp)
-		slp = tslp;
-	else {
-		MALLOC(slp, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
-				M_NFSSVC, M_WAITOK);
-		bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
-		TAILQ_INIT(&slp->ns_uidlruhead);
-		TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
+	/* add the socket to the list */
+	first = TAILQ_EMPTY(&nfsrv_socklist);
+	TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+	if (soprotocol == IPPROTO_TCP) {
+		nfsrv_sock_tcp_cnt++;
+		if (nfsrv_sock_idle_timeout < 0)
+			nfsrv_sock_idle_timeout = 0;
+		if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
+			nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
+		/*
+		 * Possibly start or stop the idle timer. We only start the idle timer when
+		 * we have more than 2 * nfsd_thread_max connections. If the idle timer is
+		 * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
+		 * the number of connections.
+		 */
+		if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
+			if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+				if (nfsrv_idlesock_timer_on) {
+					thread_call_cancel(nfsrv_idlesock_timer_call);
+					nfsrv_idlesock_timer_on = 0;
+				}
+			} else {
+				struct nfsrv_sock *old_slp;
+				struct timeval now;
+				time_t time_to_wait = nfsrv_sock_idle_timeout;
+				/*
+				 * Get the oldest tcp socket and calculate the
+				 * earliest time for the next idle timer to fire
+				 * based on the possibly updated nfsrv_sock_idle_timeout
+				 */
+				TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
+					if (old_slp->ns_sotype == SOCK_STREAM) {
+						microuptime(&now);
+						time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
+						if (time_to_wait < 1)
+							time_to_wait = 1;
+						break;
+					}
+				}
+				/*
+				 * If we have a timer scheduled, but if its going to fire too late,
+				 * turn it off.
+				 */
+				if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
+					thread_call_cancel(nfsrv_idlesock_timer_call);
+					nfsrv_idlesock_timer_on = 0;
+				}
+				/* Schedule the idle thread if it isn't already */
+				if (!nfsrv_idlesock_timer_on) {
+					nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+					nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+				}
+			}
+		}
 	}
+
+	sock_retain(so); /* grab a retain count on the socket */
 	slp->ns_so = so;
+	slp->ns_sotype = sotype;
 	slp->ns_nam = mynam;
-	slp->ns_fp = fp;
-	(void)fref(fp);
-	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
-	s = splnet();
-	so->so_upcallarg = (caddr_t)slp;
-	so->so_upcall = nfsrv_rcv;
-	so->so_rcv.sb_flags |= SB_UPCALL; /* required for freebsd merge */
-	thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+
+	/* set up the socket up-call */
+	nfsrv_uc_addsock(slp, first);
+
+	/* mark that the socket is not in the nfsrv_sockwg list */
+	slp->ns_wgq.tqe_next = SLPNOLIST;
+
+	slp->ns_flag = SLP_VALID | SLP_NEEDQ;
+
 	nfsrv_wakenfsd(slp);
-	splx(s);
+	lck_mtx_unlock(nfsd_mutex);
+
 	return (0);
 }
 
 /*
- * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
- * until it is killed by a signal.
+ * nfssvc_nfsd()
+ *
+ * nfsd theory of operation:
+ *
+ * The first nfsd thread stays in user mode accepting new TCP connections
+ * which are then added via the "addsock" call.  The rest of the nfsd threads
+ * simply call into the kernel and remain there in a loop handling NFS
+ * requests until killed by a signal.
+ * 
+ * There's a list of nfsd threads (nfsd_head).
+ * There's an nfsd queue that contains only those nfsds that are
+ *   waiting for work to do (nfsd_queue).
+ *
+ * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
+ *   managing the work on the sockets:
+ *   nfsrv_sockwait - sockets w/new data waiting to be worked on
+ *   nfsrv_sockwork - sockets being worked on which may have more work to do
+ *   nfsrv_sockwg -- sockets which have pending write gather data
+ * When a socket receives data, if it is not currently queued, it
+ *   will be placed at the end of the "wait" queue.
+ * Whenever a socket needs servicing we make sure it is queued and
+ *   wake up a waiting nfsd (if there is one).
+ *
+ * nfsds will service at most 8 requests from the same socket before
+ *   defecting to work on another socket.
+ * nfsds will defect immediately if there are any sockets in the "wait" queue
+ * nfsds looking for a socket to work on check the "wait" queue first and
+ *   then check the "work" queue.
+ * When an nfsd starts working on a socket, it removes it from the head of
+ *   the queue it's currently on and moves it to the end of the "work" queue.
+ * When nfsds are checking the queues for work, any sockets found not to 
+ *   have any work are simply dropped from the queue.
+ *
  */
-static int
-nfssvc_nfsd(nsd, argp, p)
-	struct nfsd_srvargs *nsd;
-	caddr_t argp;
-	struct proc *p;
+int
+nfssvc_nfsd(void)
 {
-	register struct mbuf *m;
-	register int siz;
-	register struct nfssvc_sock *slp;
-	register struct socket *so;
-	register int *solockp;
-	struct nfsd *nfsd = nsd->nsd_nfsd;
+	mbuf_t m, mrep;
+	struct nfsrv_sock *slp;
+	struct nfsd *nfsd;
 	struct nfsrv_descript *nd = NULL;
-	struct mbuf *mreq;
-	int error = 0, cacherep, s, sotype, writes_todo;
-	int procrastinate;
+	int error = 0, cacherep, writes_todo;
+	int siz, procrastinate, opcnt = 0;
 	u_quad_t cur_usec;
-	extern void	nfs_aio_thread_init();
+	struct timeval now;
+	struct vfs_context context;
+	struct timespec to;
 
 #ifndef nolint
 	cacherep = RC_DOIT;
 	writes_todo = 0;
 #endif
-	s = splnet();
-	if (nfsd == (struct nfsd *)0) {
-		MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
-		nsd->nsd_nfsd = nfsd;
-		bzero((caddr_t)nfsd, sizeof (struct nfsd));
-		nfsd->nfsd_procp = p;
-		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
-		nfs_numnfsd++;
-		nfs_aio_thread_init();
-	}
+
+	MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
+	if (!nfsd)
+		return (ENOMEM);
+	bzero(nfsd, sizeof(struct nfsd));
+	lck_mtx_lock(nfsd_mutex);
+	if (nfsd_thread_count++ == 0)
+		nfsrv_initcache();		/* Init the server request cache */
+	
+	TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
+	lck_mtx_unlock(nfsd_mutex);
+
+	context.vc_thread = current_thread();
+
+	/* Set time out so that nfsd threads can wake up a see if they are still needed. */
+	to.tv_sec = 5;
+	to.tv_nsec = 0;
+
 	/*
 	 * Loop getting rpc requests until SIGKILL.
 	 */
 	for (;;) {
-		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
-			while (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
-			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
+		if (nfsd_thread_max <= 0) {
+			/* NFS server shutting down, get out ASAP */
+			error = EINTR;
+			slp = nfsd->nfsd_slp;
+		} else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
+			/* already have some work to do */
+			error = 0;
+			slp = nfsd->nfsd_slp;
+		} else {
+			/* need to find work to do */
+			error = 0;
+			lck_mtx_lock(nfsd_mutex);
+			while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
+				if (nfsd_thread_count > nfsd_thread_max) {
+					/*
+					 * If we have no socket and there are more
+					 * nfsd threads than configured, let's exit.
+					 */
+					error = 0;
+					goto done;
+				}
 				nfsd->nfsd_flag |= NFSD_WAITING;
-				nfsd_waiting++;
-				error = tsleep((caddr_t)nfsd, PSOCK | PCATCH,
-				    "nfsd", 0);
-				nfsd_waiting--;
-				if (error)
+				TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
+				error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
+				if (error) {
+					if (nfsd->nfsd_flag & NFSD_WAITING) {
+						TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
+						nfsd->nfsd_flag &= ~NFSD_WAITING;
+					}
+					if (error == EWOULDBLOCK)
+						continue;
 					goto done;
+				}
+			}
+			slp = nfsd->nfsd_slp;
+			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
+				/* look for a socket to work on in the wait queue */
+				while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
+					lck_rw_lock_exclusive(&slp->ns_rwlock);
+					/* remove from the head of the queue */
+					TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+					slp->ns_flag &= ~SLP_WAITQ;
+					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
+						break;
+					/* nothing to do, so skip this socket */
+					lck_rw_done(&slp->ns_rwlock);
+				}
 			}
-			if (nfsd->nfsd_slp == (struct nfssvc_sock *)0 &&
-			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
-				for (slp = nfssvc_sockhead.tqh_first; slp != 0;
-				    slp = slp->ns_chain.tqe_next) {
-				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
-					== (SLP_VALID | SLP_DOREC)) {
-					    slp->ns_flag &= ~SLP_DOREC;
-					    slp->ns_sref++;
-					    nfsd->nfsd_slp = slp;
-					    break;
-				    }
+			if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
+				/* look for a socket to work on in the work queue */
+				while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
+					lck_rw_lock_exclusive(&slp->ns_rwlock);
+					/* remove from the head of the queue */
+					TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+					slp->ns_flag &= ~SLP_WORKQ;
+					if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
+						break;
+					/* nothing to do, so skip this socket */
+					lck_rw_done(&slp->ns_rwlock);
 				}
-				if (slp == 0)
-					nfsd_head_flag &= ~NFSD_CHECKSLP;
 			}
-			if ((slp = nfsd->nfsd_slp) == (struct nfssvc_sock *)0)
+			if (!nfsd->nfsd_slp && slp) {
+				/* we found a socket to work on, grab a reference */
+				slp->ns_sref++;
+				microuptime(&now);
+				slp->ns_timestamp = now.tv_sec;
+				/* We keep the socket list in least recently used order for reaping idle sockets */
+				TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+				TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+				nfsd->nfsd_slp = slp;
+				opcnt = 0;
+				/* and put it at the back of the work queue */
+				TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
+				slp->ns_flag |= SLP_WORKQ;
+				lck_rw_done(&slp->ns_rwlock);
+			}
+			lck_mtx_unlock(nfsd_mutex);
+			if (!slp)
 				continue;
+			lck_rw_lock_exclusive(&slp->ns_rwlock);
 			if (slp->ns_flag & SLP_VALID) {
-				if (slp->ns_flag & SLP_DISCONN)
-					nfsrv_zapsock(slp);
-				else if (slp->ns_flag & SLP_NEEDQ) {
+				if ((slp->ns_flag & (SLP_NEEDQ|SLP_DISCONN)) == SLP_NEEDQ) {
 					slp->ns_flag &= ~SLP_NEEDQ;
-					(void) nfs_sndlock(&slp->ns_solock,
-						(struct nfsreq *)0);
-				 	thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
-					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
-						M_WAIT);
-					thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-					nfs_sndunlock(&slp->ns_solock);
+					nfsrv_rcv_locked(slp->ns_so, slp, MBUF_WAITOK);
 				}
+				if (slp->ns_flag & SLP_DISCONN)
+					nfsrv_zapsock(slp);
 				error = nfsrv_dorec(slp, nfsd, &nd);
-				cur_usec = (u_quad_t)time.tv_sec * 1000000 +
-					(u_quad_t)time.tv_usec;
-				if (error && slp->ns_tq.lh_first &&
-				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
-					error = 0;
-					cacherep = RC_DOIT;
-					writes_todo = 1;
-				} else
-					writes_todo = 0;
+				if (error == EINVAL) {	// RPCSEC_GSS drop
+					if (slp->ns_sotype == SOCK_STREAM)
+						nfsrv_zapsock(slp); // drop connection
+				}
+				writes_todo = 0;
+				if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
+					microuptime(&now);
+					cur_usec = (u_quad_t)now.tv_sec * 1000000 +
+						(u_quad_t)now.tv_usec;
+					if (slp->ns_wgtime <= cur_usec) {
+						error = 0;
+						cacherep = RC_DOIT;
+						writes_todo = 1;
+					}
+					slp->ns_flag &= ~SLP_DOWRITES;
+				}
 				nfsd->nfsd_flag |= NFSD_REQINPROG;
 			}
-		} else {
-			error = 0;
-			slp = nfsd->nfsd_slp;
+			lck_rw_done(&slp->ns_rwlock);
 		}
-		if (error || (slp->ns_flag & SLP_VALID) == 0) {
+		if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
 			if (nd) {
-				_FREE_ZONE((caddr_t)nd,
-						sizeof *nd, M_NFSRVDESC);
+				nfsm_chain_cleanup(&nd->nd_nmreq);
+				if (nd->nd_nam2)
+					mbuf_freem(nd->nd_nam2);
+				if (IS_VALID_CRED(nd->nd_cr))
+					kauth_cred_unref(&nd->nd_cr);
+				if (nd->nd_gss_context)
+					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+				FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 				nd = NULL;
 			}
-			nfsd->nfsd_slp = (struct nfssvc_sock *)0;
+			nfsd->nfsd_slp = NULL;
 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
-			nfsrv_slpderef(slp);
+			if (slp)
+				nfsrv_slpderef(slp);
+			if (nfsd_thread_max <= 0)
+				break;
 			continue;
 		}
-		splx(s);
-		so = slp->ns_so;
-		sotype = so->so_type;
-		if (so->so_proto->pr_flags & PR_CONNREQUIRED)
-			solockp = &slp->ns_solock;
-		else
-			solockp = (int *)0;
 		if (nd) {
-		    nd->nd_starttime = time;
+		    microuptime(&nd->nd_starttime);
 		    if (nd->nd_nam2)
 			nd->nd_nam = nd->nd_nam2;
 		    else
 			nd->nd_nam = slp->ns_nam;
 
-		    /*
-		     * Check to see if authorization is needed.
-		     */
-		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
-			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
-			nsd->nsd_haddr = mtod(nd->nd_nam,
-			    struct sockaddr_in *)->sin_addr.s_addr;
-			nsd->nsd_authlen = nfsd->nfsd_authlen;
-			nsd->nsd_verflen = nfsd->nfsd_verflen;
-			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
-				nfsd->nfsd_authlen) &&
-			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
-				nfsd->nfsd_verflen) &&
-			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
-			    return (ENEEDAUTH);
-			cacherep = RC_DROPIT;
-		    } else
-			cacherep = nfsrv_getcache(nd, slp, &mreq);
+		    cacherep = nfsrv_getcache(nd, slp, &mrep);
 
-		    /*
-		     * Check for just starting up for NQNFS and send
-		     * fake "try again later" replies to the NQNFS clients.
-		     */
-		    if (notstarted && nqnfsstarttime <= time.tv_sec) {
-			if (modify_flag) {
-				nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
-				modify_flag = 0;
-			} else
-				notstarted = 0;
-		    }
-		    if (notstarted) {
-			if ((nd->nd_flag & ND_NQNFS) == 0)
-				cacherep = RC_DROPIT;
-			else if (nd->nd_procnum != NFSPROC_WRITE) {
-				nd->nd_procnum = NFSPROC_NOOP;
-				nd->nd_repstat = NQNFS_TRYLATER;
-				cacherep = RC_DOIT;
-			} else
-				modify_flag = 1;
-		    } else if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
-			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
-			nd->nd_procnum = NFSPROC_NOOP;
-			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
-			cacherep = RC_DOIT;
-		    } else if (nfs_privport) {
-			/* Check if source port is privileged */
-			u_short port;
-			struct sockaddr *nam = nd->nd_nam;
-			struct sockaddr_in *sin;
-
-			sin = (struct sockaddr_in *)nam;
-			port = ntohs(sin->sin_port);
-			if (port >= IPPORT_RESERVED && 
-			    nd->nd_procnum != NFSPROC_NULL) {
+		    if (nfsrv_require_resv_port) {
+			/* Check if source port is a reserved port */
+			in_port_t port = 0;
+			struct sockaddr *saddr = mbuf_data(nd->nd_nam);
+
+			if (saddr->sa_family == AF_INET)
+				port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+			else if (saddr->sa_family == AF_INET6)
+				port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+			if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
 			    nd->nd_procnum = NFSPROC_NOOP;
 			    nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 			    cacherep = RC_DOIT;
-			    printf("NFS request from unprivileged port (%s:%d)\n",
-				   (char *)(inet_ntoa(sin->sin_addr)), port);
 			}
 		    }
 
 		}
 
 		/*
-		 * Loop to get all the write rpc relies that have been
+		 * Loop to get all the write RPC replies that have been
 		 * gathered together.
 		 */
 		do {
 		    switch (cacherep) {
 		    case RC_DOIT:
-			if (nd && (nd->nd_flag & ND_NFSV3))
-			    procrastinate = nfsrvw_procrastinate_v3;
+			if (nd && (nd->nd_vers == NFS_VER3))
+			    procrastinate = nfsrv_wg_delay_v3;
 			else
-			    procrastinate = nfsrvw_procrastinate;
-			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
-			    procrastinate > 0 && !notstarted))
-			    error = nfsrv_writegather(&nd, slp,
-				nfsd->nfsd_procp, &mreq);
+			    procrastinate = nfsrv_wg_delay;
+			lck_rw_lock_shared(&nfsrv_export_rwlock);
+			context.vc_ucred = NULL;
+			if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
+				error = nfsrv_writegather(&nd, slp, &context, &mrep);
 			else
-			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
-				slp, nfsd->nfsd_procp, &mreq);
-			if (mreq == NULL)
+				error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
+			lck_rw_done(&nfsrv_export_rwlock);
+			if (mrep == NULL) {
+				/*
+				 * If this is a stream socket and we are not going
+				 * to send a reply we better close the connection
+				 * so the client doesn't hang.
+				 */
+				if (error && slp->ns_sotype == SOCK_STREAM) {
+					lck_rw_lock_exclusive(&slp->ns_rwlock);
+					nfsrv_zapsock(slp);
+					lck_rw_done(&slp->ns_rwlock);
+					printf("NFS server: NULL reply from proc = %d error = %d\n",
+						nd->nd_procnum, error);
+				}
 				break;
+
+			}
 			if (error) {
-				if (nd->nd_procnum != NQNFSPROC_VACATED)
-					nfsstats.srv_errs++;
-				nfsrv_updatecache(nd, FALSE, mreq);
-				if (nd->nd_nam2)
-					m_freem(nd->nd_nam2);
+				OSAddAtomic64(1, &nfsstats.srv_errs);
+				nfsrv_updatecache(nd, FALSE, mrep);
+				if (nd->nd_nam2) {
+					mbuf_freem(nd->nd_nam2);
+					nd->nd_nam2 = NULL;
+				}
 				break;
 			}
-			nfsstats.srvrpccnt[nd->nd_procnum]++;
-			nfsrv_updatecache(nd, TRUE, mreq);
-			nd->nd_mrep = (struct mbuf *)0;
+			OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
+			nfsrv_updatecache(nd, TRUE, mrep);
+			/* FALLTHRU */
+
 		    case RC_REPLY:
-			m = mreq;
+			if (nd->nd_gss_mb != NULL) {	// It's RPCSEC_GSS
+				/*
+				 * Need to checksum or encrypt the reply
+				 */
+				error = nfs_gss_svc_protect_reply(nd, mrep);
+				if (error) {
+				    	mbuf_freem(mrep);
+					break;
+				}
+			}
+
+			/*
+			 * Get the total size of the reply
+			 */
+			m = mrep;
 			siz = 0;
 			while (m) {
-				siz += m->m_len;
-				m = m->m_next;
+				siz += mbuf_len(m);
+				m = mbuf_next(m);
 			}
 			if (siz <= 0 || siz > NFS_MAXPACKET) {
 				printf("mbuf siz=%d\n",siz);
 				panic("Bad nfs svc reply");
 			}
-			m = mreq;
-			m->m_pkthdr.len = siz;
-			m->m_pkthdr.rcvif = (struct ifnet *)0;
+			m = mrep;
+			mbuf_pkthdr_setlen(m, siz);
+			error = mbuf_pkthdr_setrcvif(m, NULL);
+			if (error)
+				panic("nfsd setrcvif failed: %d", error);
 			/*
 			 * For stream protocols, prepend a Sun RPC
 			 * Record Mark.
 			 */
-			if (sotype == SOCK_STREAM) {
-				M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
-				*mtod(m, u_long *) = htonl(0x80000000 | siz);
+			if (slp->ns_sotype == SOCK_STREAM) {
+				error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
+				if (!error)
+					*(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
+			}
+			if (!error) {
+				if (slp->ns_flag & SLP_VALID) {
+				    error = nfsrv_send(slp, nd->nd_nam2, m);
+				} else {
+				    error = EPIPE;
+				    mbuf_freem(m);
+				}
+			} else {
+				mbuf_freem(m);
 			}
-			if (solockp)
-				(void) nfs_sndlock(solockp, (struct nfsreq *)0);
-			if (slp->ns_flag & SLP_VALID)
-			    error = nfs_send(so, nd->nd_nam2, m, NULL);
-			else {
-			    error = EPIPE;
-			    m_freem(m);
+			mrep = NULL;
+			if (nd->nd_nam2) {
+				mbuf_freem(nd->nd_nam2);
+				nd->nd_nam2 = NULL;
 			}
-			if (nfsrtton)
-				nfsd_rt(sotype, nd, cacherep);
-			if (nd->nd_nam2)
-				MFREE(nd->nd_nam2, m);
-			if (nd->nd_mrep)
-				m_freem(nd->nd_mrep);
-			if (error == EPIPE)
+			if (error == EPIPE) {
+				lck_rw_lock_exclusive(&slp->ns_rwlock);
 				nfsrv_zapsock(slp);
-			if (solockp)
-				nfs_sndunlock(solockp);
+				lck_rw_done(&slp->ns_rwlock);
+			}
 			if (error == EINTR || error == ERESTART) {
-				_FREE_ZONE((caddr_t)nd,
-						sizeof *nd, M_NFSRVDESC);
+				nfsm_chain_cleanup(&nd->nd_nmreq);
+				if (IS_VALID_CRED(nd->nd_cr))
+					kauth_cred_unref(&nd->nd_cr);
+				if (nd->nd_gss_context)
+					nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+				FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 				nfsrv_slpderef(slp);
-				s = splnet();
+				lck_mtx_lock(nfsd_mutex);
 				goto done;
 			}
 			break;
 		    case RC_DROPIT:
-			if (nfsrtton)
-				nfsd_rt(sotype, nd, cacherep);
-			m_freem(nd->nd_mrep);
-			m_freem(nd->nd_nam2);
+			mbuf_freem(nd->nd_nam2);
+			nd->nd_nam2 = NULL;
 			break;
 		    };
+		    opcnt++;
 		    if (nd) {
-			FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
+			nfsm_chain_cleanup(&nd->nd_nmreq);
+			if (nd->nd_nam2)
+				mbuf_freem(nd->nd_nam2);
+			if (IS_VALID_CRED(nd->nd_cr))
+				kauth_cred_unref(&nd->nd_cr);
+			if (nd->nd_gss_context)
+				nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+			FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
 			nd = NULL;
 		    }
 
@@ -781,518 +1354,328 @@ nfssvc_nfsd(nsd, argp, p)
 		     * Check to see if there are outstanding writes that
 		     * need to be serviced.
 		     */
-		    cur_usec = (u_quad_t)time.tv_sec * 1000000 +
-			(u_quad_t)time.tv_usec;
-		    s = splsoftclock();
-		    if (slp->ns_tq.lh_first &&
-			slp->ns_tq.lh_first->nd_time <= cur_usec) {
-			cacherep = RC_DOIT;
-			writes_todo = 1;
-		    } else
-			writes_todo = 0;
-		    splx(s);
+		    writes_todo = 0;
+		    if (slp->ns_wgtime) {
+			microuptime(&now);
+			cur_usec = (u_quad_t)now.tv_sec * 1000000 +
+				(u_quad_t)now.tv_usec;
+			if (slp->ns_wgtime <= cur_usec) {
+			    cacherep = RC_DOIT;
+			    writes_todo = 1;
+			}
+		    }
 		} while (writes_todo);
-		s = splnet();
-		if (nfsrv_dorec(slp, nfsd, &nd)) {
+
+		nd = NULL;
+		if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
+			lck_rw_lock_exclusive(&slp->ns_rwlock);
+			error = nfsrv_dorec(slp, nfsd, &nd);
+			if (error == EINVAL) {	// RPCSEC_GSS drop
+				if (slp->ns_sotype == SOCK_STREAM)
+					nfsrv_zapsock(slp); // drop connection
+			}
+			lck_rw_done(&slp->ns_rwlock);
+		}
+		if (!nd) {
+			/* drop our reference on the socket */
 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
 			nfsd->nfsd_slp = NULL;
 			nfsrv_slpderef(slp);
 		}
 	}
+	lck_mtx_lock(nfsd_mutex);
 done:
 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
-	splx(s);
-	_FREE((caddr_t)nfsd, M_NFSD);
-	nsd->nsd_nfsd = (struct nfsd *)0;
-	if (--nfs_numnfsd == 0)
-		nfsrv_init(TRUE);	/* Reinitialize everything */
+	FREE(nfsd, M_NFSD);
+	if (--nfsd_thread_count == 0)
+		nfsrv_cleanup();
+	lck_mtx_unlock(nfsd_mutex);
 	return (error);
 }
-#endif /* NFS_NOSERVER */
 
-int nfs_defect = 0;
-/* XXX CSM 11/25/97 Upgrade sysctl.h someday */
-#ifdef notyet
-SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
-#endif
-
-static int nfssvc_iod_continue(int);
-
-/*
- * Asynchronous I/O daemons for client nfs.
- * They do read-ahead and write-behind operations on the block I/O cache.
- * Never returns unless it fails or gets killed.
- */
-static int
-nfssvc_iod(p)
-	struct proc *p;
-{
-	register struct buf *bp;
-	register int i, myiod;
-	struct nfsmount *nmp;
-	int error = 0;
-	struct uthread *ut;
-
-	/*
-	 * Assign my position or return error if too many already running
-	 */
-	myiod = -1;
-	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
-		if (nfs_asyncdaemon[i] == 0) {
-			nfs_asyncdaemon[i]++;
-			myiod = i;
-			break;
-		}
-	if (myiod == -1)
-		return (EBUSY);
-	nfs_numasync++;
-
-	/* stuff myiod into uthread to get off local stack for
-       continuation */
-
-	ut = (struct uthread *)get_bsdthread_info(current_act());
-	ut->uu_state.uu_nfs_myiod = myiod;  /* squirrel away for continuation */
-
-	nfssvc_iod_continue(0);
-	/* NOTREACHED */
-
-}
-
-/*
- * Continuation for Asynchronous I/O daemons for client nfs.
- */
-static int
-nfssvc_iod_continue(error)
+int
+nfssvc_export(user_addr_t argp)
 {
-	register struct buf *bp;
-	register int i, myiod;
-	struct nfsmount *nmp;
-	struct uthread *ut;
-	struct proc *p;
+	int error = 0, is_64bit;
+	struct user_nfs_export_args unxa;
+	vfs_context_t ctx = vfs_context_current();
 
-	/*
-	 * real myiod is stored in uthread, recover it
-	 */
-	ut = (struct uthread *)get_bsdthread_info(current_act());
-	myiod = ut->uu_state.uu_nfs_myiod;
-	p = current_proc();
+	is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
 
-	/*
-	 * Just loop around doin our stuff until SIGKILL
-     *  - actually we don't loop with continuations...
-	 */
-	for (;;) {
-	    while (((nmp = nfs_iodmount[myiod]) == NULL
-		    || nmp->nm_bufq.tqh_first == NULL)
-		   && error == 0) {
-		if (nmp)
-		    nmp->nm_bufqiods--;
-		nfs_iodwant[myiod] = p;
-		nfs_iodmount[myiod] = NULL;
-		error = tsleep0((caddr_t)&nfs_iodwant[myiod],
-			PWAIT | PCATCH, "nfsidl", 0, nfssvc_iod_continue);
-		/* NOTREACHED */
-	    }
-	    if (error) {
-		nfs_asyncdaemon[myiod] = 0;
-		if (nmp) nmp->nm_bufqiods--;
-		nfs_iodwant[myiod] = NULL;
-		nfs_iodmount[myiod] = NULL;
-		nfs_numasync--;
-		if (error == EINTR || error == ERESTART)
-		  error = 0;
-		unix_syscall_return(error);
-	    }
-	    while ((bp = nmp->nm_bufq.tqh_first) != NULL) {
-		/* Take one off the front of the list */
-		TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
-		nmp->nm_bufqlen--;
-		if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
-		    nmp->nm_bufqwant = FALSE;
-		    wakeup(&nmp->nm_bufq);
-		}
-		if (ISSET(bp->b_flags, B_READ))
-		    (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
-		else
-		    (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
-
-		/*
-		 * If there are more than one iod on this mount, then defect
-		 * so that the iods can be shared out fairly between the mounts
-		 */
-		if (nfs_defect && nmp->nm_bufqiods > 1) {
-		    NFS_DPF(ASYNCIO,
-			    ("nfssvc_iod: iod %d defecting from mount %p\n",
-			     myiod, nmp));
-		    nfs_iodmount[myiod] = NULL;
-		    nmp->nm_bufqiods--;
-		    break;
+	/* copy in pointers to path and export args */
+	if (is_64bit) {
+		error = copyin(argp, (caddr_t)&unxa, sizeof(unxa));
+	} else {
+		struct nfs_export_args tnxa;
+		error = copyin(argp, (caddr_t)&tnxa, sizeof(tnxa));
+		if (error == 0) {
+			/* munge into LP64 version of nfs_export_args structure */
+			unxa.nxa_fsid = tnxa.nxa_fsid;
+			unxa.nxa_expid = tnxa.nxa_expid;
+			unxa.nxa_fspath = CAST_USER_ADDR_T(tnxa.nxa_fspath);
+			unxa.nxa_exppath = CAST_USER_ADDR_T(tnxa.nxa_exppath);
+			unxa.nxa_flags = tnxa.nxa_flags;
+			unxa.nxa_netcount = tnxa.nxa_netcount;
+			unxa.nxa_nets = CAST_USER_ADDR_T(tnxa.nxa_nets);
 		}
-	    }
 	}
+	if (error)
+		return (error);
+
+	error = nfsrv_export(&unxa, ctx);
+
+	return (error);
 }
 
 /*
- * Shut down a socket associated with an nfssvc_sock structure.
+ * Shut down a socket associated with an nfsrv_sock structure.
  * Should be called with the send lock set, if required.
  * The trick here is to increment the sref at the start, so that the nfsds
  * will stop using it and clear ns_flag at the end so that it will not be
  * reassigned during cleanup.
  */
-static void
-nfsrv_zapsock(slp)
-	register struct nfssvc_sock *slp;
+void
+nfsrv_zapsock(struct nfsrv_sock *slp)
 {
-	register struct nfsuid *nuidp, *nnuidp;
-	register struct nfsrv_descript *nwp, *nnwp;
-	struct socket *so;
-	struct file *fp;
-	struct mbuf *m;
-	int s;
+	socket_t so;
 
+	if ((slp->ns_flag & SLP_VALID) == 0)
+		return;
 	slp->ns_flag &= ~SLP_ALLFLAGS;
-	fp = slp->ns_fp;
-	if (fp) {
-		slp->ns_fp = (struct file *)0;
-		so = slp->ns_so;
-		thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
-		so->so_upcall = NULL;
-		so->so_rcv.sb_flags &= ~SB_UPCALL;
-		soshutdown(so, 2);
-		thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
-		closef(fp, (struct proc *)0);
-		if (slp->ns_nam)
-			MFREE(slp->ns_nam, m);
-		m_freem(slp->ns_raw);
-		m_freem(slp->ns_rec);
-		for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
-		    nuidp = nnuidp) {
-			nnuidp = nuidp->nu_lru.tqe_next;
-			LIST_REMOVE(nuidp, nu_hash);
-			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
-			if (nuidp->nu_flag & NU_NAM)
-				m_freem(nuidp->nu_nam);
-			_FREE_ZONE((caddr_t)nuidp,
-					sizeof (struct nfsuid), M_NFSUID);
-		}
-		s = splsoftclock();
-		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
-			nnwp = nwp->nd_tq.le_next;
-			LIST_REMOVE(nwp, nd_tq);
-			_FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC);
-		}
-		LIST_INIT(&slp->ns_tq);
-		splx(s);
-	}
+
+	so = slp->ns_so;
+	if (so == NULL)
+		return;
+
+	sock_setupcall(so, NULL, NULL);
+	sock_shutdown(so, SHUT_RDWR);
+
+	/*
+	 * Remove from the up-call queue
+	 */
+	nfsrv_uc_dequeue(slp);
 }
 
 /*
- * Get an authorization string for the uid by having the mount_nfs sitting
- * on this mount point porpous out of the kernel and do it.
+ * cleanup and release a server socket structure.
  */
-int
-nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
-	register struct nfsmount *nmp;
-	struct nfsreq *rep;
-	struct ucred *cred;
-	char **auth_str;
-	int *auth_len;
-	char *verf_str;
-	int *verf_len;
-	NFSKERBKEY_T key;		/* return session key */
+void
+nfsrv_slpfree(struct nfsrv_sock *slp)
 {
-	int error = 0;
+	struct nfsrv_descript *nwp, *nnwp;
 
-	while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
-		nmp->nm_flag |= NFSMNT_WANTAUTH;
-		(void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
-			"nfsauth1", 2 * hz);
-		error = nfs_sigintr(nmp, rep, rep->r_procp);
-		if (error) {
-			nmp->nm_flag &= ~NFSMNT_WANTAUTH;
-			return (error);
-		}
-	}
-	nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
-	MALLOC(*auth_str, char *, RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
-	nmp->nm_authstr = *auth_str;
-	nmp->nm_authlen = RPCAUTH_MAXSIZ;
-	nmp->nm_verfstr = verf_str;
-	nmp->nm_verflen = *verf_len;
-	nmp->nm_authuid = cred->cr_uid;
-	wakeup((caddr_t)&nmp->nm_authstr);
-
-	/*
-	 * And wait for mount_nfs to do its stuff.
-	 */
-	while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
-		(void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
-			"nfsauth2", 2 * hz);
-		error = nfs_sigintr(nmp, rep, rep->r_procp);
+	if (slp->ns_so) {
+		sock_release(slp->ns_so);
+		slp->ns_so = NULL;
 	}
-	if (nmp->nm_flag & NFSMNT_AUTHERR) {
-		nmp->nm_flag &= ~NFSMNT_AUTHERR;
-		error = EAUTH;
-	}
-	if (error)
-		_FREE((caddr_t)*auth_str, M_TEMP);
-	else {
-		*auth_len = nmp->nm_authlen;
-		*verf_len = nmp->nm_verflen;
-		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
+	if (slp->ns_nam)
+		mbuf_free(slp->ns_nam);
+	if (slp->ns_raw)
+		mbuf_freem(slp->ns_raw);
+	if (slp->ns_rec)
+		mbuf_freem(slp->ns_rec);
+	if (slp->ns_frag)
+		mbuf_freem(slp->ns_frag);
+	slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
+	slp->ns_reccnt = 0;
+
+	for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
+		nnwp = nwp->nd_tq.le_next;
+		LIST_REMOVE(nwp, nd_tq);
+		nfsm_chain_cleanup(&nwp->nd_nmreq);
+		if (nwp->nd_mrep)
+			mbuf_freem(nwp->nd_mrep);
+		if (nwp->nd_nam2)
+			mbuf_freem(nwp->nd_nam2);
+		if (IS_VALID_CRED(nwp->nd_cr))
+			kauth_cred_unref(&nwp->nd_cr);
+		if (nwp->nd_gss_context)
+			nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
+		FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
 	}
-	nmp->nm_flag &= ~NFSMNT_HASAUTH;
-	nmp->nm_flag |= NFSMNT_WAITAUTH;
-	if (nmp->nm_flag & NFSMNT_WANTAUTH) {
-		nmp->nm_flag &= ~NFSMNT_WANTAUTH;
-		wakeup((caddr_t)&nmp->nm_authtype);
-	}
-	return (error);
+	LIST_INIT(&slp->ns_tq);
+
+	lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
+	lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
+	FREE(slp, M_NFSSVC);
 }
 
 /*
- * Get a nickname authenticator and verifier.
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
  */
-int
-nfs_getnickauth(nmp, cred, auth_str, auth_len, verf_str, verf_len)
-	struct nfsmount *nmp;
-	struct ucred *cred;
-	char **auth_str;
-	int *auth_len;
-	char *verf_str;
-	int verf_len;
+static void
+nfsrv_slpderef_locked(struct nfsrv_sock *slp)
 {
-	register struct nfsuid *nuidp;
-	register u_long *nickp, *verfp;
-	struct timeval ktvin, ktvout;
-
-#if DIAGNOSTIC
-	if (verf_len < (4 * NFSX_UNSIGNED))
-		panic("nfs_getnickauth verf too small");
-#endif
-	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
-	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
-		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
-			break;
+	lck_rw_lock_exclusive(&slp->ns_rwlock);
+	slp->ns_sref--;
+
+	if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
+		if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
+			/* remove socket from queue since there's no work */
+			if (slp->ns_flag & SLP_WAITQ)
+				TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+			else
+				TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+			slp->ns_flag &= ~SLP_QUEUED;
+		}
+		lck_rw_done(&slp->ns_rwlock);
+		return;
 	}
-	if (!nuidp || nuidp->nu_expire < time.tv_sec)
-		return (EACCES);
 
-	/*
-	 * Move to the end of the lru list (end of lru == most recently used).
-	 */
-	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
-	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
+	/* This socket is no longer valid, so we'll get rid of it */
 
-	MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
-	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
-	*nickp = txdr_unsigned(nuidp->nu_nickname);
-	*auth_str = (char *)nickp;
-	*auth_len = 2 * NFSX_UNSIGNED;
+	if (slp->ns_flag & SLP_QUEUED) {
+		if (slp->ns_flag & SLP_WAITQ)
+			TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+		else
+			TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+		slp->ns_flag &= ~SLP_QUEUED;
+	}
+	lck_rw_done(&slp->ns_rwlock);
 
-	/*
-	 * Now we must encrypt the verifier and package it up.
-	 */
-	verfp = (u_long *)verf_str;
-	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
-	if (time.tv_sec > nuidp->nu_timestamp.tv_sec ||
-	    (time.tv_sec == nuidp->nu_timestamp.tv_sec &&
-	     time.tv_usec > nuidp->nu_timestamp.tv_usec))
-		nuidp->nu_timestamp = time;
-	else
-		nuidp->nu_timestamp.tv_usec++;
-	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
-	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
+	TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+	if (slp->ns_sotype == SOCK_STREAM)
+		nfsrv_sock_tcp_cnt--;
 
-	/*
-	 * Now encrypt the timestamp verifier in ecb mode using the session
-	 * key.
-	 */
-#if NFSKERB
-	XXX
-#endif
+	/* now remove from the write gather socket list */ 
+	if (slp->ns_wgq.tqe_next != SLPNOLIST) {
+		TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
+		slp->ns_wgq.tqe_next = SLPNOLIST;
+	}
+	nfsrv_slpfree(slp);
+}
 
-	*verfp++ = ktvout.tv_sec;
-	*verfp++ = ktvout.tv_usec;
-	*verfp = 0;
-	return (0);
+void
+nfsrv_slpderef(struct nfsrv_sock *slp)
+{
+	lck_mtx_lock(nfsd_mutex);
+	nfsrv_slpderef_locked(slp);
+	lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
- * Save the current nickname in a hash list entry on the mount point.
+ * Check periodically for idle sockest if needed and
+ * zap them.
  */
-int
-nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
-	register struct nfsmount *nmp;
-	struct ucred *cred;
-	int len;
-	NFSKERBKEY_T key;
-	struct mbuf **mdp;
-	char **dposp;
-	struct mbuf *mrep;
+void
+nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
 {
-	register struct nfsuid *nuidp;
-	register u_long *tl;
-	register long t1;
-	struct mbuf *md = *mdp;
-	struct timeval ktvin, ktvout;
-	u_long nick;
-	char *dpos = *dposp, *cp2;
-	int deltasec, error = 0;
-
-	if (len == (3 * NFSX_UNSIGNED)) {
-		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
-		ktvin.tv_sec = *tl++;
-		ktvin.tv_usec = *tl++;
-		nick = fxdr_unsigned(u_long, *tl);
+	struct nfsrv_sock *slp, *tslp;
+	struct timeval now;
+	time_t time_to_wait = nfsrv_sock_idle_timeout;
+
+	microuptime(&now);
+	lck_mtx_lock(nfsd_mutex);
+
+	/* Turn off the timer if we're suppose to and get out */
+	if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
+	    nfsrv_sock_idle_timeout = 0;
+	if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
+		nfsrv_idlesock_timer_on = 0;
+		lck_mtx_unlock(nfsd_mutex);
+		return;
+	}
 
+	TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
+		lck_rw_lock_exclusive(&slp->ns_rwlock);
+		/* Skip udp and referenced sockets */
+		if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
+			lck_rw_done(&slp->ns_rwlock);
+			continue;
+		}
 		/*
-		 * Decrypt the timestamp in ecb mode.
+		 * If this is the first non-referenced socket that hasn't idle out,
+		 * use its time stamp to calculate the earlist time in the future
+		 * to start the next invocation of the timer. Since the nfsrv_socklist
+		 * is sorted oldest access to newest. Once we find the first one,
+		 * we're done and break out of the loop.
 		 */
-#if NFSKERB
-		XXX
-#endif
-		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
-		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
-		deltasec = time.tv_sec - ktvout.tv_sec;
-		if (deltasec < 0)
-			deltasec = -deltasec;
+		if (((slp->ns_timestamp + nfsrv_sock_idle_timeout)  >  now.tv_sec) ||
+			nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+			time_to_wait -= now.tv_sec - slp->ns_timestamp;
+			if (time_to_wait < 1)
+				time_to_wait = 1;
+			lck_rw_done(&slp->ns_rwlock);
+			break;
+		}
 		/*
-		 * If ok, add it to the hash list for the mount point.
+		 * Bump the ref count. nfsrv_slpderef below will destroy
+		 * the socket, since nfsrv_zapsock has closed it.
 		 */
-		if (deltasec <= NFS_KERBCLOCKSKEW) {
-			if (nmp->nm_numuids < nuidhash_max) {
-				nmp->nm_numuids++;
-				MALLOC_ZONE(nuidp, struct nfsuid *,
-						sizeof (struct nfsuid),
-							M_NFSUID, M_WAITOK);
-			} else {
-				nuidp = nmp->nm_uidlruhead.tqh_first;
-				LIST_REMOVE(nuidp, nu_hash);
-				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
-					nu_lru);
-			}
-			nuidp->nu_flag = 0;
-			nuidp->nu_cr.cr_uid = cred->cr_uid;
-			nuidp->nu_expire = time.tv_sec + NFS_KERBTTL;
-			nuidp->nu_timestamp = ktvout;
-			nuidp->nu_nickname = nick;
-			bcopy(key, nuidp->nu_key, sizeof (key));
-			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
-				nu_lru);
-			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
-				nuidp, nu_hash);
-		}
-	} else
-		nfsm_adv(nfsm_rndup(len));
-nfsmout:
-	*mdp = md;
-	*dposp = dpos;
-	return (error);
-}
-
-#ifndef NFS_NOSERVER
-
-/*
- * Derefence a server socket structure. If it has no more references and
- * is no longer valid, you can throw it away.
- */
-void
-nfsrv_slpderef(slp)
-	register struct nfssvc_sock *slp;
-{
-	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
-		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
-		_FREE((caddr_t)slp, M_NFSSVC);
+		slp->ns_sref++;
+		nfsrv_zapsock(slp);
+		lck_rw_done(&slp->ns_rwlock);
+		nfsrv_slpderef_locked(slp);
 	}
+
+	/* Start ourself back up */
+	nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+	/* Remember when the next timer will fire for nfssvc_addsock. */
+	nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+	lck_mtx_unlock(nfsd_mutex);
 }
 
 /*
- * Initialize the data structures for the server.
- * Handshake with any new nfsds starting up to avoid any chance of
- * corruption.
+ * Clean up the data structures for the server.
  */
 void
-nfsrv_init(terminating)
-	int terminating;
+nfsrv_cleanup(void)
 {
-	register struct nfssvc_sock *slp, *nslp;
-
-	if (nfssvc_sockhead_flag & SLP_INIT)
-		panic("nfsd init");
-	nfssvc_sockhead_flag |= SLP_INIT;
-	if (terminating) {
-		for (slp = nfssvc_sockhead.tqh_first; slp != 0; slp = nslp) {
-			nslp = slp->ns_chain.tqe_next;
-			if (slp->ns_flag & SLP_VALID)
-				nfsrv_zapsock(slp);
-			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
-			_FREE((caddr_t)slp, M_NFSSVC);
-		}
-		nfsrv_cleancache();	/* And clear out server cache */
-/* XXX CSM 12/4/97 Revisit when enabling WebNFS */
-#ifdef notyet
-	} else
-		nfs_pub.np_valid = 0;
-#else
-	}
+	struct nfsrv_sock *slp, *nslp;
+	struct timeval now;
+#if CONFIG_FSE
+	struct nfsrv_fmod *fp, *nfp;
+	int i;
 #endif
 
-	TAILQ_INIT(&nfssvc_sockhead);
-	nfssvc_sockhead_flag &= ~SLP_INIT;
-	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
-		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
-		wakeup((caddr_t)&nfssvc_sockhead);
+	microuptime(&now);
+	for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
+		nslp = TAILQ_NEXT(slp, ns_chain);
+		lck_rw_lock_exclusive(&slp->ns_rwlock);
+		slp->ns_sref++;
+		if (slp->ns_flag & SLP_VALID)
+			nfsrv_zapsock(slp);
+		lck_rw_done(&slp->ns_rwlock);
+		nfsrv_slpderef_locked(slp);
 	}
+#
+#if CONFIG_FSE
+	/*
+	 * Flush pending file write fsevents
+	 */
+	lck_mtx_lock(nfsrv_fmod_mutex);
+	for (i = 0; i < NFSRVFMODHASHSZ; i++) {
+		for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
+			/*
+			 * Fire off the content modified fsevent for each
+			 * entry, remove it from the list, and free it.
+			 */
+			if (nfsrv_fsevents_enabled) {
+				fp->fm_context.vc_thread = current_thread();
+				add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
+						FSE_ARG_VNODE, fp->fm_vp,
+						FSE_ARG_DONE);
+			}
+			vnode_put(fp->fm_vp);
+			kauth_cred_unref(&fp->fm_context.vc_ucred);
+			nfp = LIST_NEXT(fp, fm_link);
+			LIST_REMOVE(fp, fm_link);
+			FREE(fp, M_TEMP);
+		}
+	}
+	nfsrv_fmod_pending = 0;
+	lck_mtx_unlock(nfsrv_fmod_mutex);
+#endif
 
-	TAILQ_INIT(&nfsd_head);
-	nfsd_head_flag &= ~NFSD_CHECKSLP;
+	nfsrv_uc_cleanup();     /* Stop nfs socket up-call threads */
+	
+	nfs_gss_svc_cleanup();	/* Remove any RPCSEC_GSS contexts */
 
-	MALLOC(nfs_udpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
-			M_NFSSVC, M_WAITOK);
-	bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
-	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
-	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
+	nfsrv_cleancache();	/* And clear out server cache */
 
-	MALLOC(nfs_cltpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
-			M_NFSSVC, M_WAITOK);
-	bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
-	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
-	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
+	nfsrv_udpsock = NULL;
+	nfsrv_udp6sock = NULL;
 }
 
-/*
- * Add entries to the server monitor log.
- */
-static void
-nfsd_rt(sotype, nd, cacherep)
-	int sotype;
-	register struct nfsrv_descript *nd;
-	int cacherep;
-{
-	register struct drt *rt;
-
-	rt = &nfsdrt.drt[nfsdrt.pos];
-	if (cacherep == RC_DOIT)
-		rt->flag = 0;
-	else if (cacherep == RC_REPLY)
-		rt->flag = DRT_CACHEREPLY;
-	else
-		rt->flag = DRT_CACHEDROP;
-	if (sotype == SOCK_STREAM)
-		rt->flag |= DRT_TCP;
-	if (nd->nd_flag & ND_NQNFS)
-		rt->flag |= DRT_NQNFS;
-	else if (nd->nd_flag & ND_NFSV3)
-		rt->flag |= DRT_NFSV3;
-	rt->proc = nd->nd_procnum;
-	if (mtod(nd->nd_nam, struct sockaddr *)->sa_family == AF_INET)
-	    rt->ipadr = mtod(nd->nd_nam, struct sockaddr_in *)->sin_addr.s_addr;
-	else
-	    rt->ipadr = INADDR_ANY;
-	rt->resptime = ((time.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
-		(time.tv_usec - nd->nd_starttime.tv_usec);
-	rt->tstamp = time;
-	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
-}
 #endif /* NFS_NOSERVER */