/*
- * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the
- * License may not be used to create, or enable the creation or
- * redistribution of, unlawful or unlicensed copies of an Apple operating
- * system, or to circumvent, violate, or enable the circumvention or
- * violation of, any terms of an Apple operating system software license
- * agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
* limitations under the License.
- *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
/*
* @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
* FreeBSD-Id: nfs_syscalls.c,v 1.32 1997/11/07 08:53:25 phk Exp $
*/
+/*
+ * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
+ * support for mandatory and extensible security protections. This notice
+ * is included in support of clause 2.2 (b) of the Apple Public License,
+ * Version 2.0.
+ */
#include <sys/param.h>
#include <sys/systm.h>
-/* XXX CSM 11/25/97 FreeBSD's generated syscall prototypes */
-#ifdef notyet
-#include <sys/sysproto.h>
-#endif
#include <sys/kernel.h>
#include <sys/file_internal.h>
#include <sys/filedesc.h>
#include <sys/user.h>
#include <sys/sysproto.h>
#include <sys/kpi_socket.h>
+#include <sys/fsevents.h>
#include <libkern/OSAtomic.h>
+#include <kern/thread_call.h>
+#include <kern/task.h>
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
-#if ISO
-#include <netiso/iso.h>
-#endif
#include <nfs/xdr_subs.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfsm_subs.h>
#include <nfs/nfsrvcache.h>
+#include <nfs/nfs_gss.h>
#include <nfs/nfsmount.h>
#include <nfs/nfsnode.h>
-#include <nfs/nfsrtt.h>
#include <nfs/nfs_lock.h>
+#if CONFIG_MACF
+#include <security/mac_framework.h>
+#endif
+
+kern_return_t thread_terminate(thread_t); /* XXX */
+
+#if NFSSERVER
+
+extern int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
+ struct nfsrv_sock *slp,
+ vfs_context_t ctx,
+ mbuf_t *mrepp);
+extern int nfsrv_wg_delay;
+extern int nfsrv_wg_delay_v3;
+
+static int nfsrv_require_resv_port = 0;
+static time_t nfsrv_idlesock_timer_on = 0;
+static int nfsrv_sock_tcp_cnt = 0;
+#define NFSD_MIN_IDLE_TIMEOUT 30
+static int nfsrv_sock_idle_timeout = 3600; /* One hour */
-extern void unix_syscall_return(int);
-
-/* Global defs. */
-extern int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *nd,
- struct nfssvc_sock *slp,
- proc_t procp,
- mbuf_t *mreqp);
-extern int nfs_numasync;
-extern int nfs_ioddelwri;
-extern int nfsrtton;
-extern struct nfsstats nfsstats;
-extern int nfsrvw_procrastinate;
-extern int nfsrvw_procrastinate_v3;
-
-struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
-static int nuidhash_max = NFS_MAXUIDHASH;
-
-static void nfsrv_zapsock(struct nfssvc_sock *slp);
-static int nfssvc_iod(proc_t);
-static int nfskerb_clientd(struct nfsmount *, struct nfsd_cargs *, int, user_addr_t, proc_t);
-
-static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
-
-#ifndef NFS_NOSERVER
-int nfsd_waiting = 0;
-static struct nfsdrt nfsdrt;
-int nfs_numnfsd = 0;
-static void nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep);
-static int nfssvc_addsock(socket_t, mbuf_t, proc_t);
-static int nfssvc_nfsd(struct nfsd_srvargs *,user_addr_t, proc_t);
-static int nfssvc_export(user_addr_t, proc_t);
-
-static int nfs_privport = 0;
-/* XXX CSM 11/25/97 Upgrade sysctl.h someday */
-#ifdef notyet
-SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
-SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
+int nfssvc_export(user_addr_t argp);
+int nfssvc_nfsd(void);
+int nfssvc_addsock(socket_t, mbuf_t);
+void nfsrv_zapsock(struct nfsrv_sock *);
+void nfsrv_slpderef(struct nfsrv_sock *);
+void nfsrv_slpfree(struct nfsrv_sock *);
+
+#endif /* NFSSERVER */
+
+/*
+ * sysctl stuff
+ */
+SYSCTL_DECL(_vfs_generic);
+SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hinge");
+
+#if NFSCLIENT
+SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
+SYSCTL_STRING(_vfs_generic_nfs_client, OID_AUTO, default_nfs4domain, CTLFLAG_RW | CTLFLAG_LOCKED, nfs4_domain, sizeof(nfs4_domain), "");
+#endif /* NFSCLIENT */
+
+#if NFSSERVER
+SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
+#if CONFIG_FSE
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
#endif
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
+#ifdef NFS_UC_Q_DEBUG
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
+#endif
+#endif /* NFSSERVER */
+
+
+#if NFSCLIENT
+
+static int
+mapname2id(struct nfs_testmapid *map)
+{
+ int error;
+
+ error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
+ if (error)
+ return (error);
+
+ if (map->ntm_grpflag)
+ error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
+ else
+ error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
+
+ return (error);
+}
+
+static int
+mapid2name(struct nfs_testmapid *map)
+{
+ int error;
+ int len = sizeof(map->ntm_name);
+
+ if (map->ntm_grpflag)
+ error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
+ else
+ error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
+
+ if (error)
+ return (error);
+
+ error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
+
+ return (error);
+
+}
+
+
+static int
+nfsclnt_testidmap(proc_t p, user_addr_t argp)
+{
+ struct nfs_testmapid mapid;
+ int error, coerror;
+
+ /* Let root make this call. */
+ error = proc_suser(p);
+ if (error)
+ return (error);
+
+ error = copyin(argp, &mapid, sizeof(mapid));
+ if (error)
+ return (error);
+ if (mapid.ntm_name2id)
+ error = mapname2id(&mapid);
+ else
+ error = mapid2name(&mapid);
+
+ coerror = copyout(&mapid, argp, sizeof(mapid));
+
+ return (error ? error : coerror);
+}
+
+int
+nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
+{
+ struct lockd_ans la;
+ int error;
+
+ switch (uap->flag) {
+ case NFSCLNT_LOCKDANS:
+ error = copyin(uap->argp, &la, sizeof(la));
+ if (!error)
+ error = nfslockdans(p, &la);
+ break;
+ case NFSCLNT_LOCKDNOTIFY:
+ error = nfslockdnotify(p, uap->argp);
+ break;
+ case NFSCLNT_TESTIDMAP:
+ error = nfsclnt_testidmap(p, uap->argp);
+ break;
+ default:
+ error = EINVAL;
+ }
+ return (error);
+}
+
+
+/*
+ * Asynchronous I/O threads for client NFS.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ *
+ * The pool of up to nfsiod_thread_max threads is launched on demand and exit
+ * when unused for a while. There are as many nfsiod structs as there are
+ * nfsiod threads; however there's no strict tie between a thread and a struct.
+ * Each thread puts an nfsiod on the free list and sleeps on it. When it wakes
+ * up, it removes the next struct nfsiod from the queue and services it. Then
+ * it will put the struct at the head of free list and sleep on it.
+ * Async requests will pull the next struct nfsiod from the head of the free list,
+ * put it on the work queue, and wake whatever thread is waiting on that struct.
+ */
+
+/*
+ * nfsiod thread exit routine
+ *
+ * Must be called with nfsiod_mutex held so that the
+ * decision to terminate is atomic with the termination.
+ */
+void
+nfsiod_terminate(struct nfsiod *niod)
+{
+ nfsiod_thread_count--;
+ lck_mtx_unlock(nfsiod_mutex);
+ if (niod)
+ FREE(niod, M_TEMP);
+ else
+ printf("nfsiod: terminating without niod\n");
+ thread_terminate(current_thread());
+ /*NOTREACHED*/
+}
+
+/* nfsiod thread startup routine */
+void
+nfsiod_thread(void)
+{
+ struct nfsiod *niod;
+ int error;
+
+ MALLOC(niod, struct nfsiod *, sizeof(struct nfsiod), M_TEMP, M_WAITOK);
+ if (!niod) {
+ lck_mtx_lock(nfsiod_mutex);
+ nfsiod_thread_count--;
+ wakeup(current_thread());
+ lck_mtx_unlock(nfsiod_mutex);
+ thread_terminate(current_thread());
+ /*NOTREACHED*/
+ }
+ bzero(niod, sizeof(*niod));
+ lck_mtx_lock(nfsiod_mutex);
+ TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
+ wakeup(current_thread());
+ error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
+ /* shouldn't return... so we have an error */
+ /* remove an old nfsiod struct and terminate */
+ lck_mtx_lock(nfsiod_mutex);
+ if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+ TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+ nfsiod_terminate(niod);
+ /*NOTREACHED*/
+}
+
+/*
+ * Start up another nfsiod thread.
+ * (unless we're already maxed out and there are nfsiods running)
+ */
+int
+nfsiod_start(void)
+{
+ thread_t thd = THREAD_NULL;
+
+ lck_mtx_lock(nfsiod_mutex);
+ if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
+ lck_mtx_unlock(nfsiod_mutex);
+ return (EBUSY);
+ }
+ nfsiod_thread_count++;
+ if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
+ lck_mtx_unlock(nfsiod_mutex);
+ return (EBUSY);
+ }
+ /* wait for the thread to complete startup */
+ msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
+ thread_deallocate(thd);
+ return (0);
+}
+
+/*
+ * Continuation for Asynchronous I/O threads for NFS client.
+ *
+ * Grab an nfsiod struct to work on, do some work, then drop it
+ */
+int
+nfsiod_continue(int error)
+{
+ struct nfsiod *niod;
+ struct nfsmount *nmp;
+ struct nfsreq *req, *treq;
+ struct nfs_reqqhead iodq;
+ int morework;
+
+ lck_mtx_lock(nfsiod_mutex);
+ niod = TAILQ_FIRST(&nfsiodwork);
+ if (!niod) {
+ /* there's no work queued up */
+ /* remove an old nfsiod struct and terminate */
+ if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+ TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+ nfsiod_terminate(niod);
+ /*NOTREACHED*/
+ }
+ TAILQ_REMOVE(&nfsiodwork, niod, niod_link);
+
+worktodo:
+ while ((nmp = niod->niod_nmp)) {
+ if (nmp == NULL){
+ niod->niod_nmp = NULL;
+ break;
+ }
+
+ /*
+ * Service this mount's async I/O queue.
+ *
+ * In order to ensure some level of fairness between mounts,
+ * we grab all the work up front before processing it so any
+ * new work that arrives will be serviced on a subsequent
+ * iteration - and we have a chance to see if other work needs
+ * to be done (e.g. the delayed write queue needs to be pushed
+ * or other mounts are waiting for an nfsiod).
+ */
+ /* grab the current contents of the queue */
+ TAILQ_INIT(&iodq);
+ TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
+ /* Mark each iod request as being managed by an iod */
+ TAILQ_FOREACH(req, &iodq, r_achain) {
+ lck_mtx_lock(&req->r_mtx);
+ assert(!(req->r_flags & R_IOD));
+ req->r_flags |= R_IOD;
+ lck_mtx_unlock(&req->r_mtx);
+ }
+ lck_mtx_unlock(nfsiod_mutex);
+
+ /* process the queue */
+ TAILQ_FOREACH_SAFE(req, &iodq, r_achain, treq) {
+ TAILQ_REMOVE(&iodq, req, r_achain);
+ req->r_achain.tqe_next = NFSREQNOLIST;
+ req->r_callback.rcb_func(req);
+ }
+
+ /* now check if there's more/other work to be done */
+ lck_mtx_lock(nfsiod_mutex);
+ morework = !TAILQ_EMPTY(&nmp->nm_iodq);
+ if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
+ /*
+ * we're going to stop working on this mount but if the
+ * mount still needs more work so queue it up
+ */
+ if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST)
+ TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
+ nmp->nm_niod = NULL;
+ niod->niod_nmp = NULL;
+ }
+ }
+
+ /* loop if there's still a mount to work on */
+ if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
+ niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
+ TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
+ niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
+ }
+ if (niod->niod_nmp)
+ goto worktodo;
+
+ /* queue ourselves back up - if there aren't too many threads running */
+ if (nfsiod_thread_count <= NFSIOD_MAX) {
+ TAILQ_INSERT_HEAD(&nfsiodfree, niod, niod_link);
+ error = msleep0(niod, nfsiod_mutex, PWAIT | PDROP, "nfsiod", NFS_ASYNCTHREADMAXIDLE*hz, nfsiod_continue);
+ /* shouldn't return... so we have an error */
+ /* remove an old nfsiod struct and terminate */
+ lck_mtx_lock(nfsiod_mutex);
+ if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
+ TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
+ }
+ nfsiod_terminate(niod);
+ /*NOTREACHED*/
+ return (0);
+}
+
+#endif /* NFSCLIENT */
+
+
+#if NFSSERVER
/*
* NFS server system calls
{
vnode_t vp;
struct nfs_filehandle nfh;
- int error;
+ int error, fhlen, fidlen;
struct nameidata nd;
- struct vfs_context context;
char path[MAXPATHLEN], *ptr;
- u_int pathlen;
+ size_t pathlen;
struct nfs_exportfs *nxfs;
struct nfs_export *nx;
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
-
/*
* Must be super user
*/
if (error)
return (error);
- error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
+ error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
+ if (!error)
+ error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
if (error)
return (error);
+ /* limit fh size to length specified (or v3 size by default) */
+ if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
+ fhlen = NFSV3_MAX_FH_SIZE;
+ fidlen = fhlen - sizeof(struct nfs_exphandle);
+
+ if (!nfsrv_is_initialized())
+ return (EINVAL);
- NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
- UIO_SYSSPACE, path, &context);
+ NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+ UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
error = namei(&nd);
if (error)
return (error);
vp = nd.ni_vp;
// find exportfs that matches f_mntonname
- lck_rw_lock_shared(&nfs_export_rwlock);
+ lck_rw_lock_shared(&nfsrv_export_rwlock);
ptr = vnode_mount(vp)->mnt_vfsstat.f_mntonname;
- LIST_FOREACH(nxfs, &nfs_exports, nxfs_next) {
- if (!strcmp(nxfs->nxfs_path, ptr))
+ LIST_FOREACH(nxfs, &nfsrv_exports, nxfs_next) {
+ if (!strncmp(nxfs->nxfs_path, ptr, MAXPATHLEN))
break;
}
if (!nxfs || strncmp(nxfs->nxfs_path, path, strlen(nxfs->nxfs_path))) {
}
bzero(&nfh, sizeof(nfh));
- nfh.nfh_xh.nxh_version = NFS_FH_VERSION;
- nfh.nfh_xh.nxh_fsid = nxfs->nxfs_id;
- nfh.nfh_xh.nxh_expid = nx->nx_id;
+ nfh.nfh_xh.nxh_version = htonl(NFS_FH_VERSION);
+ nfh.nfh_xh.nxh_fsid = htonl(nxfs->nxfs_id);
+ nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
nfh.nfh_xh.nxh_flags = 0;
nfh.nfh_xh.nxh_reserved = 0;
- nfh.nfh_len = NFS_MAX_FID_SIZE;
- error = VFS_VPTOFH(vp, &nfh.nfh_len, &nfh.nfh_fid[0], NULL);
- if (nfh.nfh_len > (int)NFS_MAX_FID_SIZE)
+ nfh.nfh_len = fidlen;
+ error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
+ if (nfh.nfh_len > (uint32_t)fidlen)
error = EOVERFLOW;
nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
nfh.nfh_len += sizeof(nfh.nfh_xh);
+ nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
out:
- lck_rw_done(&nfs_export_rwlock);
+ lck_rw_done(&nfsrv_export_rwlock);
vnode_put(vp);
if (error)
return (error);
- error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
+ error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
return (error);
}
-#endif /* NFS_NOSERVER */
-
-extern struct fileops vnops;
+extern const struct fileops vnops;
/*
* syscall for the rpc.lockd to use to translate a NFS file handle into
int
fhopen( proc_t p,
struct fhopen_args *uap,
- register_t *retval)
+ int32_t *retval)
{
vnode_t vp;
struct nfs_filehandle nfh;
struct fileproc *fp, *nfp;
int fmode, error, type;
int indx;
- kauth_cred_t cred = proc_ucred(p);
- struct vfs_context context;
+ vfs_context_t ctx = vfs_context_current();
kauth_action_t action;
- context.vc_proc = p;
- context.vc_ucred = cred;
-
/*
* Must be super user
*/
- error = suser(cred, 0);
- if (error)
+ error = suser(vfs_context_ucred(ctx), 0);
+ if (error) {
return (error);
+ }
+
+ if (!nfsrv_is_initialized()) {
+ return (EINVAL);
+ }
fmode = FFLAGS(uap->flags);
/* why not allow a non-read/write open for our lockd? */
if (error)
return (error);
if ((nfh.nfh_len < (int)sizeof(struct nfs_exphandle)) ||
- (nfh.nfh_len > (int)NFS_MAX_FH_SIZE))
+ (nfh.nfh_len > (int)NFSV3_MAX_FH_SIZE))
return (EINVAL);
error = copyin(uap->u_fhp, &nfh, sizeof(nfh.nfh_len) + nfh.nfh_len);
if (error)
return (error);
+ nfh.nfh_fhp = (u_char*)&nfh.nfh_xh;
- lck_rw_lock_shared(&nfs_export_rwlock);
+ lck_rw_lock_shared(&nfsrv_export_rwlock);
/* now give me my vnode, it gets returned to me with a reference */
- error = nfsrv_fhtovp(&nfh, NULL, TRUE, &vp, &nx, &nxo);
- lck_rw_done(&nfs_export_rwlock);
- if (error)
+ error = nfsrv_fhtovp(&nfh, NULL, &vp, &nx, &nxo);
+ lck_rw_done(&nfsrv_export_rwlock);
+ if (error) {
+ if (error == NFSERR_TRYLATER)
+ error = EAGAIN; // XXX EBUSY? Or just leave as TRYLATER?
return (error);
+ }
/*
* From now on we have to make sure not
goto bad;
}
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_open(ctx, vp, fmode)))
+ goto bad;
+#endif
+
/* compute action to be authorized */
action = 0;
if (fmode & FREAD)
action |= KAUTH_VNODE_READ_DATA;
if (fmode & (FWRITE | O_TRUNC))
action |= KAUTH_VNODE_WRITE_DATA;
- if ((error = vnode_authorize(vp, NULL, action, &context)) != 0)
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
goto bad;
- if ((error = VNOP_OPEN(vp, fmode, &context)))
+ if ((error = VNOP_OPEN(vp, fmode, ctx)))
goto bad;
- if ((error = vnode_ref_ext(vp, fmode)))
+ if ((error = vnode_ref_ext(vp, fmode, 0)))
goto bad;
/*
*/
// starting here... error paths should call vn_close/vnode_put
- if ((error = falloc(p, &nfp, &indx)) != 0) {
- vn_close(vp, fmode & FMASK, cred, p);
+ if ((error = falloc(p, &nfp, &indx, ctx)) != 0) {
+ vn_close(vp, fmode & FMASK, ctx);
goto bad;
}
fp = nfp;
fp->f_fglob->fg_flag = fmode & FMASK;
- fp->f_fglob->fg_type = DTYPE_VNODE;
fp->f_fglob->fg_ops = &vnops;
fp->f_fglob->fg_data = (caddr_t)vp;
type = F_FLOCK;
if ((fmode & FNONBLOCK) == 0)
type |= F_WAIT;
- if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, &context))) {
- vn_close(vp, fp->f_fglob->fg_flag, fp->f_fglob->fg_cred, p);
+ if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
+ struct vfs_context context = *vfs_context_current();
+ /* Modify local copy (to not damage thread copy) */
+ context.vc_ucred = fp->f_fglob->fg_cred;
+
+ vn_close(vp, fp->f_fglob->fg_flag, &context);
fp_free(p, indx, fp);
return (error);
}
vnode_put(vp);
proc_fdlock(p);
- *fdflags(p, indx) &= ~UF_RESERVED;
+ procfdtbl_releasefd(p, indx, NULL);
fp_drop(p, indx, fp, 1);
proc_fdunlock(p);
}
/*
- * Nfs server psuedo system call for the nfsd's
- * Based on the flag value it either:
- * - adds a socket to the selection list
- * - remains in the kernel as an nfsd
- * - remains in the kernel as an nfsiod
+ * NFS server pseudo system call
*/
int
nfssvc(proc_t p, struct nfssvc_args *uap, __unused int *retval)
{
-#ifndef NFS_NOSERVER
- struct nameidata nd;
mbuf_t nam;
struct user_nfsd_args user_nfsdarg;
- struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
- struct nfsd_cargs ncd;
- struct nfsd *nfsd;
- struct nfssvc_sock *slp;
- struct nfsuid *nuidp;
- struct nfsmount *nmp;
- struct timeval now;
socket_t so;
- struct vfs_context context;
- struct ucred temp_cred;
-#endif /* NFS_NOSERVER */
int error;
AUDIT_ARG(cmd, uap->flag);
/*
- * Must be super user
+ * Must be super user for most operations (export ops checked later).
*/
- error = proc_suser(p);
- if(error)
+ if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
return (error);
- if (uap->flag & NFSSVC_BIOD)
- error = nfssvc_iod(p);
-#ifdef NFS_NOSERVER
- else
- error = ENXIO;
-#else /* !NFS_NOSERVER */
- else if (uap->flag & NFSSVC_MNTD) {
-
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
-
- error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
- if (error)
- return (error);
-
- NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
- (proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32),
- CAST_USER_ADDR_T(ncd.ncd_dirp), &context);
- error = namei(&nd);
- if (error)
- return (error);
- nameidone(&nd);
+#if CONFIG_MACF
+ error = mac_system_check_nfsd(kauth_cred_get());
+ if (error)
+ return (error);
+#endif
- if (vnode_isvroot(nd.ni_vp) == 0)
- error = EINVAL;
- nmp = VFSTONFS(vnode_mount(nd.ni_vp));
- vnode_put(nd.ni_vp);
- if (error)
- return (error);
+ /* make sure NFS server data structures have been initialized */
+ nfsrv_init();
- if ((nmp->nm_state & NFSSTA_MNTD) &&
- (uap->flag & NFSSVC_GOTAUTH) == 0)
- return (0);
- nmp->nm_state |= NFSSTA_MNTD;
- error = nfskerb_clientd(nmp, &ncd, uap->flag, uap->argp, p);
- } else if (uap->flag & NFSSVC_ADDSOCK) {
+ if (uap->flag & NFSSVC_ADDSOCK) {
if (IS_64BIT_PROCESS(p)) {
error = copyin(uap->argp, (caddr_t)&user_nfsdarg, sizeof(user_nfsdarg));
} else {
* to keep the socket from being closed when nfsd closes its
* file descriptor for it.
*/
- error = nfssvc_addsock(so, nam, p);
+ error = nfssvc_addsock(so, nam);
/* drop the iocount file_socket() grabbed on the file descriptor */
file_drop(user_nfsdarg.sock);
} else if (uap->flag & NFSSVC_NFSD) {
- error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
- if (error)
- return (error);
-
- if ((uap->flag & NFSSVC_AUTHIN) && ((nfsd = nsd->nsd_nfsd)) &&
- (nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
- slp = nfsd->nfsd_slp;
-
- /*
- * First check to see if another nfsd has already
- * added this credential.
- */
- for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
- nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
- if (kauth_cred_getuid(nuidp->nu_cr) == nsd->nsd_cr.cr_uid &&
- (!nfsd->nfsd_nd->nd_nam2 ||
- netaddr_match(NU_NETFAM(nuidp),
- &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
- break;
- }
- if (nuidp) {
- nfsrv_setcred(nuidp->nu_cr,nfsd->nfsd_nd->nd_cr);
- nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
- } else {
- /*
- * Nope, so we will.
- */
- if (slp->ns_numuids < nuidhash_max) {
- slp->ns_numuids++;
- nuidp = (struct nfsuid *)
- _MALLOC_ZONE(sizeof (struct nfsuid),
- M_NFSUID, M_WAITOK);
- } else
- nuidp = (struct nfsuid *)0;
- if ((slp->ns_flag & SLP_VALID) == 0) {
- if (nuidp) {
- FREE_ZONE((caddr_t)nuidp,
- sizeof (struct nfsuid), M_NFSUID);
- slp->ns_numuids--;
- }
- } else {
- if (nuidp == (struct nfsuid *)0) {
- nuidp = slp->ns_uidlruhead.tqh_first;
- if (!nuidp)
- return (ENOMEM);
- LIST_REMOVE(nuidp, nu_hash);
- TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
- nu_lru);
- if (nuidp->nu_flag & NU_NAM)
- mbuf_freem(nuidp->nu_nam);
- kauth_cred_rele(nuidp->nu_cr);
- }
- nuidp->nu_flag = 0;
-
- if (nsd->nsd_cr.cr_ngroups > NGROUPS)
- nsd->nsd_cr.cr_ngroups = NGROUPS;
-
- nfsrv_setcred(&nsd->nsd_cr, &temp_cred);
- nuidp->nu_cr = kauth_cred_create(&temp_cred);
-
- if (!nuidp->nu_cr) {
- FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID);
- slp->ns_numuids--;
- return (ENOMEM);
- }
- nuidp->nu_timestamp = nsd->nsd_timestamp;
- microtime(&now);
- nuidp->nu_expire = now.tv_sec + nsd->nsd_ttl;
- /*
- * and save the session key in nu_key.
- */
- bcopy(nsd->nsd_key, nuidp->nu_key,
- sizeof (nsd->nsd_key));
- if (nfsd->nfsd_nd->nd_nam2) {
- struct sockaddr_in *saddr;
-
- saddr = mbuf_data(nfsd->nfsd_nd->nd_nam2);
- switch (saddr->sin_family) {
- case AF_INET:
- nuidp->nu_flag |= NU_INETADDR;
- nuidp->nu_inetaddr =
- saddr->sin_addr.s_addr;
- break;
- case AF_ISO:
- default:
- nuidp->nu_flag |= NU_NAM;
- error = mbuf_copym(nfsd->nfsd_nd->nd_nam2, 0,
- MBUF_COPYALL, MBUF_WAITOK,
- &nuidp->nu_nam);
- if (error) {
- kauth_cred_rele(nuidp->nu_cr);
- FREE_ZONE(nuidp, sizeof(struct nfsuid), M_NFSUID);
- slp->ns_numuids--;
- return (error);
- }
- break;
- };
- }
- TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
- nu_lru);
- LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
- nuidp, nu_hash);
- nfsrv_setcred(nuidp->nu_cr,
- nfsd->nfsd_nd->nd_cr);
- nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
- }
- }
- }
- if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
- nfsd->nfsd_flag |= NFSD_AUTHFAIL;
- error = nfssvc_nfsd(nsd, uap->argp, p);
+ error = nfssvc_nfsd();
} else if (uap->flag & NFSSVC_EXPORT) {
- error = nfssvc_export(uap->argp, p);
+ error = nfssvc_export(uap->argp);
} else {
error = EINVAL;
}
-#endif /* NFS_NOSERVER */
if (error == EINTR || error == ERESTART)
error = 0;
return (error);
}
-/*
- * NFSKERB client helper daemon.
- * Gets authorization strings for "kerb" mounts.
- */
-static int
-nfskerb_clientd(
- struct nfsmount *nmp,
- struct nfsd_cargs *ncd,
- int flag,
- user_addr_t argp,
- proc_t p)
-{
- struct nfsuid *nuidp, *nnuidp;
- int error = 0;
- struct nfsreq *rp;
- struct timeval now;
-
- /*
- * First initialize some variables
- */
- microtime(&now);
-
- /*
- * If an authorization string is being passed in, get it.
- */
- if ((flag & NFSSVC_GOTAUTH) && (nmp->nm_state & NFSSTA_MOUNTED) &&
- ((nmp->nm_state & NFSSTA_WAITAUTH) == 0)) {
- if (nmp->nm_state & NFSSTA_HASAUTH)
- panic("cld kerb");
- if ((flag & NFSSVC_AUTHINFAIL) == 0) {
- if (ncd->ncd_authlen <= nmp->nm_authlen &&
- ncd->ncd_verflen <= nmp->nm_verflen &&
- !copyin(CAST_USER_ADDR_T(ncd->ncd_authstr),nmp->nm_authstr,ncd->ncd_authlen)&&
- !copyin(CAST_USER_ADDR_T(ncd->ncd_verfstr),nmp->nm_verfstr,ncd->ncd_verflen)){
- nmp->nm_authtype = ncd->ncd_authtype;
- nmp->nm_authlen = ncd->ncd_authlen;
- nmp->nm_verflen = ncd->ncd_verflen;
-#if NFSKERB
- nmp->nm_key = ncd->ncd_key;
-#endif
- } else
- nmp->nm_state |= NFSSTA_AUTHERR;
- } else
- nmp->nm_state |= NFSSTA_AUTHERR;
- nmp->nm_state |= NFSSTA_HASAUTH;
- wakeup((caddr_t)&nmp->nm_authlen);
- } else {
- nmp->nm_state |= NFSSTA_WAITAUTH;
- }
-
- /*
- * Loop every second updating queue until there is a termination sig.
- */
- while (nmp->nm_state & NFSSTA_MOUNTED) {
- /* Get an authorization string, if required. */
- if ((nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH)) == 0) {
- ncd->ncd_authuid = nmp->nm_authuid;
- if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs)))
- nmp->nm_state |= NFSSTA_WAITAUTH;
- else
- return (ENEEDAUTH);
- }
- /* Wait a bit (no pun) and do it again. */
- if ((nmp->nm_state & NFSSTA_MOUNTED) &&
- (nmp->nm_state & (NFSSTA_WAITAUTH | NFSSTA_HASAUTH))) {
- error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH,
- "nfskrbtimr", hz / 3);
- if (error == EINTR || error == ERESTART)
- dounmount(nmp->nm_mountp, 0, p);
- }
- }
-
- /*
- * Finally, we can free up the mount structure.
- */
- for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) {
- nnuidp = nuidp->nu_lru.tqe_next;
- LIST_REMOVE(nuidp, nu_hash);
- TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
- kauth_cred_rele(nuidp->nu_cr);
- FREE_ZONE((caddr_t)nuidp, sizeof (struct nfsuid), M_NFSUID);
- }
- /*
- * Loop through outstanding request list and remove dangling
- * references to defunct nfsmount struct
- */
- for (rp = nfs_reqq.tqh_first; rp; rp = rp->r_chain.tqe_next)
- if (rp->r_nmp == nmp)
- rp->r_nmp = (struct nfsmount *)0;
- /* Need to wake up any rcvlock waiters so they notice the unmount. */
- if (nmp->nm_state & NFSSTA_WANTRCV) {
- nmp->nm_state &= ~NFSSTA_WANTRCV;
- wakeup(&nmp->nm_state);
- }
- FREE_ZONE((caddr_t)nmp, sizeof (struct nfsmount), M_NFSMNT);
- if (error == EWOULDBLOCK)
- error = 0;
- return (error);
-}
-
-#ifndef NFS_NOSERVER
/*
* Adds a socket to the list for servicing by nfsds.
*/
-static int
-nfssvc_addsock(
- socket_t so,
- mbuf_t mynam,
- __unused proc_t p)
+int
+nfssvc_addsock(socket_t so, mbuf_t mynam)
{
- int siz;
- struct nfssvc_sock *slp;
- struct nfssvc_sock *tslp = NULL;
- int error, sodomain, sotype, soprotocol, on = 1;
+ struct nfsrv_sock *slp;
+ int error = 0, sodomain, sotype, soprotocol, on = 1;
+ int first;
struct timeval timeo;
/* make sure mbuf constants are set up */
- if (!nfs_mbuf_mlen)
+ if (!nfs_mbuf_mhlen)
nfs_mbuf_init();
sock_gettype(so, &sodomain, &sotype, &soprotocol);
- /*
- * Add it to the list, as required.
- */
- if (soprotocol == IPPROTO_UDP) {
- tslp = nfs_udpsock;
- if (!tslp || (tslp->ns_flag & SLP_VALID)) {
- mbuf_freem(mynam);
- return (EPERM);
- }
-#if ISO
- } else if (soprotocol == ISOPROTO_CLTP) {
- tslp = nfs_cltpsock;
- if (!tslp || (tslp->ns_flag & SLP_VALID)) {
- mbuf_freem(mynam);
- return (EPERM);
- }
-#endif /* ISO */
+ /* There should be only one UDP socket for each of IPv4 and IPv6 */
+ if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+ mbuf_freem(mynam);
+ return (EEXIST);
}
- /* reserve buffer space for 2 maximally-sized packets */
- siz = NFS_MAXPACKET;
- if (sotype == SOCK_STREAM)
- siz += sizeof (u_long);
- siz *= 2;
- if (siz > NFS_MAXSOCKBUF)
- siz = NFS_MAXSOCKBUF;
- if ((error = sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &siz, sizeof(siz))) ||
- (error = sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &siz, sizeof(siz)))) {
+ if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
mbuf_freem(mynam);
- return (error);
+ return (EEXIST);
}
- /*
- * Set protocol specific options { for now TCP only } and
- * reserve some space. For datagram sockets, this can get called
- * repeatedly for the same socket, but that isn't harmful.
- */
+ /* Set protocol options and reserve some space (for UDP). */
if (sotype == SOCK_STREAM) {
+ error = nfsrv_check_exports_allow_address(mynam);
+ if (error)
+ return (error);
sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
}
- if (sodomain == AF_INET && soprotocol == IPPROTO_TCP) {
+ if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+ if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
+ int reserve = NFS_UDPSOCKBUF;
+ error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDBUF, &reserve, sizeof(reserve));
+ error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVBUF, &reserve, sizeof(reserve));
+ if (error) {
+ log(LOG_INFO, "nfssvc_addsock: UDP socket buffer setting error(s) %d\n", error);
+ error = 0;
+ }
}
-
sock_nointerrupt(so, 0);
+ /*
+ * Set socket send/receive timeouts.
+ * Receive timeout shouldn't matter, but setting the send timeout
+ * will make sure that an unresponsive client can't hang the server.
+ */
timeo.tv_usec = 0;
- timeo.tv_sec = 0;
- error = sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
- error = sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ timeo.tv_sec = 1;
+ error |= sock_setsockopt(so, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+ timeo.tv_sec = 30;
+ error |= sock_setsockopt(so, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ if (error) {
+ log(LOG_INFO, "nfssvc_addsock: socket timeout setting error(s) %d\n", error);
+ error = 0;
+ }
- if (tslp) {
- slp = tslp;
- lck_mtx_lock(nfsd_mutex);
- } else {
- MALLOC(slp, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
- M_NFSSVC, M_WAITOK);
- if (!slp) {
- mbuf_freem(mynam);
- return (ENOMEM);
- }
- bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
- lck_rw_init(&slp->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr);
- lck_mtx_init(&slp->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr);
- TAILQ_INIT(&slp->ns_uidlruhead);
- lck_mtx_lock(nfsd_mutex);
- TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
+ MALLOC(slp, struct nfsrv_sock *, sizeof(struct nfsrv_sock), M_NFSSVC, M_WAITOK);
+ if (!slp) {
+ mbuf_freem(mynam);
+ return (ENOMEM);
}
+ bzero((caddr_t)slp, sizeof (struct nfsrv_sock));
+ lck_rw_init(&slp->ns_rwlock, nfsrv_slp_rwlock_group, LCK_ATTR_NULL);
+ lck_mtx_init(&slp->ns_wgmutex, nfsrv_slp_mutex_group, LCK_ATTR_NULL);
- sock_retain(so); /* grab a retain count on the socket */
- slp->ns_so = so;
- slp->ns_sotype = sotype;
- slp->ns_nam = mynam;
+ lck_mtx_lock(nfsd_mutex);
+
+ if (soprotocol == IPPROTO_UDP) {
+ if (sodomain == AF_INET) {
+ /* There should be only one UDP/IPv4 socket */
+ if (nfsrv_udpsock) {
+ lck_mtx_unlock(nfsd_mutex);
+ nfsrv_slpfree(slp);
+ mbuf_freem(mynam);
+ return (EEXIST);
+ }
+ nfsrv_udpsock = slp;
+ }
+ if (sodomain == AF_INET6) {
+ /* There should be only one UDP/IPv6 socket */
+ if (nfsrv_udp6sock) {
+ lck_mtx_unlock(nfsd_mutex);
+ nfsrv_slpfree(slp);
+ mbuf_freem(mynam);
+ return (EEXIST);
+ }
+ nfsrv_udp6sock = slp;
+ }
+ }
+
+ /* add the socket to the list */
+ first = TAILQ_EMPTY(&nfsrv_socklist);
+ TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+ if (soprotocol == IPPROTO_TCP) {
+ nfsrv_sock_tcp_cnt++;
+ if (nfsrv_sock_idle_timeout < 0)
+ nfsrv_sock_idle_timeout = 0;
+ if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
+ nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
+ /*
+ * Possibly start or stop the idle timer. We only start the idle timer when
+ * we have more than 2 * nfsd_thread_max connections. If the idle timer is
+ * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
+ * the number of connections.
+ */
+ if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
+ if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+ if (nfsrv_idlesock_timer_on) {
+ thread_call_cancel(nfsrv_idlesock_timer_call);
+ nfsrv_idlesock_timer_on = 0;
+ }
+ } else {
+ struct nfsrv_sock *old_slp;
+ struct timeval now;
+ time_t time_to_wait = nfsrv_sock_idle_timeout;
+ /*
+ * Get the oldest tcp socket and calculate the
+ * earliest time for the next idle timer to fire
+ * based on the possibly updated nfsrv_sock_idle_timeout
+ */
+ TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
+ if (old_slp->ns_sotype == SOCK_STREAM) {
+ microuptime(&now);
+ time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
+ if (time_to_wait < 1)
+ time_to_wait = 1;
+ break;
+ }
+ }
+ /*
+ * If we have a timer scheduled, but if its going to fire too late,
+ * turn it off.
+ */
+ if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
+ thread_call_cancel(nfsrv_idlesock_timer_call);
+ nfsrv_idlesock_timer_on = 0;
+ }
+ /* Schedule the idle thread if it isn't already */
+ if (!nfsrv_idlesock_timer_on) {
+ nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+ nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+ }
+ }
+ }
+ }
+
+ sock_retain(so); /* grab a retain count on the socket */
+ slp->ns_so = so;
+ slp->ns_sotype = sotype;
+ slp->ns_nam = mynam;
+
+ /* set up the socket up-call */
+ nfsrv_uc_addsock(slp, first);
- socket_lock(so, 1);
- so->so_upcallarg = (caddr_t)slp;
- so->so_upcall = nfsrv_rcv;
- so->so_rcv.sb_flags |= SB_UPCALL; /* required for freebsd merge */
- socket_unlock(so, 1);
+ /* mark that the socket is not in the nfsrv_sockwg list */
+ slp->ns_wgq.tqe_next = SLPNOLIST;
slp->ns_flag = SLP_VALID | SLP_NEEDQ;
}
/*
- * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
- * until it is killed by a signal.
+ * nfssvc_nfsd()
+ *
+ * nfsd theory of operation:
+ *
+ * The first nfsd thread stays in user mode accepting new TCP connections
+ * which are then added via the "addsock" call. The rest of the nfsd threads
+ * simply call into the kernel and remain there in a loop handling NFS
+ * requests until killed by a signal.
+ *
+ * There's a list of nfsd threads (nfsd_head).
+ * There's an nfsd queue that contains only those nfsds that are
+ * waiting for work to do (nfsd_queue).
+ *
+ * There's a list of all NFS sockets (nfsrv_socklist) and two queues for
+ * managing the work on the sockets:
+ * nfsrv_sockwait - sockets w/new data waiting to be worked on
+ * nfsrv_sockwork - sockets being worked on which may have more work to do
+ * nfsrv_sockwg -- sockets which have pending write gather data
+ * When a socket receives data, if it is not currently queued, it
+ * will be placed at the end of the "wait" queue.
+ * Whenever a socket needs servicing we make sure it is queued and
+ * wake up a waiting nfsd (if there is one).
+ *
+ * nfsds will service at most 8 requests from the same socket before
+ * defecting to work on another socket.
+ * nfsds will defect immediately if there are any sockets in the "wait" queue
+ * nfsds looking for a socket to work on check the "wait" queue first and
+ * then check the "work" queue.
+ * When an nfsd starts working on a socket, it removes it from the head of
+ * the queue it's currently on and moves it to the end of the "work" queue.
+ * When nfsds are checking the queues for work, any sockets found not to
+ * have any work are simply dropped from the queue.
+ *
*/
-static int
-nfssvc_nfsd(nsd, argp, p)
- struct nfsd_srvargs *nsd;
- user_addr_t argp;
- proc_t p;
+int
+nfssvc_nfsd(void)
{
- mbuf_t m, mreq;
- struct nfssvc_sock *slp;
- struct nfsd *nfsd = nsd->nsd_nfsd;
+ mbuf_t m, mrep;
+ struct nfsrv_sock *slp;
+ struct nfsd *nfsd;
struct nfsrv_descript *nd = NULL;
int error = 0, cacherep, writes_todo;
- int siz, procrastinate;
+ int siz, procrastinate, opcnt = 0;
u_quad_t cur_usec;
struct timeval now;
- boolean_t funnel_state;
+ struct vfs_context context;
+ struct timespec to;
#ifndef nolint
cacherep = RC_DOIT;
writes_todo = 0;
#endif
- if (nfsd == (struct nfsd *)0) {
- MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
- if (!nfsd)
- return (ENOMEM);
- nsd->nsd_nfsd = nfsd;
- bzero((caddr_t)nfsd, sizeof (struct nfsd));
- nfsd->nfsd_procp = p;
- lck_mtx_lock(nfsd_mutex);
- TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
- nfs_numnfsd++;
- lck_mtx_unlock(nfsd_mutex);
- }
- funnel_state = thread_funnel_set(kernel_flock, FALSE);
+ MALLOC(nfsd, struct nfsd *, sizeof(struct nfsd), M_NFSD, M_WAITOK);
+ if (!nfsd)
+ return (ENOMEM);
+ bzero(nfsd, sizeof(struct nfsd));
+ lck_mtx_lock(nfsd_mutex);
+ if (nfsd_thread_count++ == 0)
+ nfsrv_initcache(); /* Init the server request cache */
+
+ TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
+ lck_mtx_unlock(nfsd_mutex);
+
+ context.vc_thread = current_thread();
+
+ /* Set time out so that nfsd threads can wake up a see if they are still needed. */
+ to.tv_sec = 5;
+ to.tv_nsec = 0;
/*
* Loop getting rpc requests until SIGKILL.
*/
for (;;) {
- if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
+ if (nfsd_thread_max <= 0) {
+ /* NFS server shutting down, get out ASAP */
+ error = EINTR;
+ slp = nfsd->nfsd_slp;
+ } else if (nfsd->nfsd_flag & NFSD_REQINPROG) {
+ /* already have some work to do */
+ error = 0;
+ slp = nfsd->nfsd_slp;
+ } else {
+ /* need to find work to do */
+ error = 0;
lck_mtx_lock(nfsd_mutex);
- while ((nfsd->nfsd_slp == NULL) && !(nfsd_head_flag & NFSD_CHECKSLP)) {
+ while (!nfsd->nfsd_slp && TAILQ_EMPTY(&nfsrv_sockwait) && TAILQ_EMPTY(&nfsrv_sockwork)) {
+ if (nfsd_thread_count > nfsd_thread_max) {
+ /*
+ * If we have no socket and there are more
+ * nfsd threads than configured, let's exit.
+ */
+ error = 0;
+ goto done;
+ }
nfsd->nfsd_flag |= NFSD_WAITING;
- nfsd_waiting++;
- error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", 0);
- nfsd_waiting--;
+ TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
+ error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
if (error) {
- lck_mtx_unlock(nfsd_mutex);
+ if (nfsd->nfsd_flag & NFSD_WAITING) {
+ TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
+ nfsd->nfsd_flag &= ~NFSD_WAITING;
+ }
+ if (error == EWOULDBLOCK)
+ continue;
goto done;
}
}
- if ((nfsd->nfsd_slp == NULL) && (nfsd_head_flag & NFSD_CHECKSLP)) {
- TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
- lck_rw_lock_shared(&slp->ns_rwlock);
- if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
- == (SLP_VALID | SLP_DOREC)) {
- if (lck_rw_lock_shared_to_exclusive(&slp->ns_rwlock)) {
- /* upgrade failed and we lost the lock; take exclusive and recheck */
- lck_rw_lock_exclusive(&slp->ns_rwlock);
- if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
- != (SLP_VALID | SLP_DOREC)) {
- /* flags no longer set, so skip this socket */
- lck_rw_done(&slp->ns_rwlock);
- continue;
- }
- }
- slp->ns_flag &= ~SLP_DOREC;
- slp->ns_sref++;
- nfsd->nfsd_slp = slp;
- lck_rw_done(&slp->ns_rwlock);
- break;
- }
- lck_rw_done(&slp->ns_rwlock);
+ slp = nfsd->nfsd_slp;
+ if (!slp && !TAILQ_EMPTY(&nfsrv_sockwait)) {
+ /* look for a socket to work on in the wait queue */
+ while ((slp = TAILQ_FIRST(&nfsrv_sockwait))) {
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ /* remove from the head of the queue */
+ TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+ slp->ns_flag &= ~SLP_WAITQ;
+ if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
+ break;
+ /* nothing to do, so skip this socket */
+ lck_rw_done(&slp->ns_rwlock);
}
- if (slp == 0)
- nfsd_head_flag &= ~NFSD_CHECKSLP;
+ }
+ if (!slp && !TAILQ_EMPTY(&nfsrv_sockwork)) {
+ /* look for a socket to work on in the work queue */
+ while ((slp = TAILQ_FIRST(&nfsrv_sockwork))) {
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ /* remove from the head of the queue */
+ TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+ slp->ns_flag &= ~SLP_WORKQ;
+ if ((slp->ns_flag & SLP_VALID) && (slp->ns_flag & SLP_WORKTODO))
+ break;
+ /* nothing to do, so skip this socket */
+ lck_rw_done(&slp->ns_rwlock);
+ }
+ }
+ if (!nfsd->nfsd_slp && slp) {
+ /* we found a socket to work on, grab a reference */
+ slp->ns_sref++;
+ microuptime(&now);
+ slp->ns_timestamp = now.tv_sec;
+ /* We keep the socket list in least recently used order for reaping idle sockets */
+ TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+ TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+ nfsd->nfsd_slp = slp;
+ opcnt = 0;
+ /* and put it at the back of the work queue */
+ TAILQ_INSERT_TAIL(&nfsrv_sockwork, slp, ns_svcq);
+ slp->ns_flag |= SLP_WORKQ;
+ lck_rw_done(&slp->ns_rwlock);
}
lck_mtx_unlock(nfsd_mutex);
- if ((slp = nfsd->nfsd_slp) == NULL)
+ if (!slp)
continue;
lck_rw_lock_exclusive(&slp->ns_rwlock);
if (slp->ns_flag & SLP_VALID) {
if (slp->ns_flag & SLP_DISCONN)
nfsrv_zapsock(slp);
error = nfsrv_dorec(slp, nfsd, &nd);
- microuptime(&now);
- cur_usec = (u_quad_t)now.tv_sec * 1000000 +
- (u_quad_t)now.tv_usec;
- if (error && slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) {
- error = 0;
- cacherep = RC_DOIT;
- writes_todo = 1;
- } else
- writes_todo = 0;
+ if (error == EINVAL) { // RPCSEC_GSS drop
+ if (slp->ns_sotype == SOCK_STREAM)
+ nfsrv_zapsock(slp); // drop connection
+ }
+ writes_todo = 0;
+ if (error && (slp->ns_wgtime || (slp->ns_flag & SLP_DOWRITES))) {
+ microuptime(&now);
+ cur_usec = (u_quad_t)now.tv_sec * 1000000 +
+ (u_quad_t)now.tv_usec;
+ if (slp->ns_wgtime <= cur_usec) {
+ error = 0;
+ cacherep = RC_DOIT;
+ writes_todo = 1;
+ }
+ slp->ns_flag &= ~SLP_DOWRITES;
+ }
nfsd->nfsd_flag |= NFSD_REQINPROG;
}
lck_rw_done(&slp->ns_rwlock);
- } else {
- error = 0;
- slp = nfsd->nfsd_slp;
}
- if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ if (error || (slp && !(slp->ns_flag & SLP_VALID))) {
if (nd) {
+ nfsm_chain_cleanup(&nd->nd_nmreq);
if (nd->nd_nam2)
mbuf_freem(nd->nd_nam2);
- if (nd->nd_cr)
- kauth_cred_rele(nd->nd_cr);
- FREE_ZONE((caddr_t)nd,
- sizeof *nd, M_NFSRVDESC);
+ if (IS_VALID_CRED(nd->nd_cr))
+ kauth_cred_unref(&nd->nd_cr);
+ if (nd->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+ FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nd = NULL;
}
nfsd->nfsd_slp = NULL;
nfsd->nfsd_flag &= ~NFSD_REQINPROG;
- nfsrv_slpderef(slp);
+ if (slp)
+ nfsrv_slpderef(slp);
+ if (nfsd_thread_max <= 0)
+ break;
continue;
}
if (nd) {
else
nd->nd_nam = slp->ns_nam;
- /*
- * Check to see if authorization is needed.
- */
- if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
- nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
- nsd->nsd_haddr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr;
- nsd->nsd_authlen = nfsd->nfsd_authlen;
- nsd->nsd_verflen = nfsd->nfsd_verflen;
- if (!copyout(nfsd->nfsd_authstr,CAST_USER_ADDR_T(nsd->nsd_authstr),
- nfsd->nfsd_authlen) &&
- !copyout(nfsd->nfsd_verfstr, CAST_USER_ADDR_T(nsd->nsd_verfstr),
- nfsd->nfsd_verflen) &&
- !copyout((caddr_t)nsd, argp, sizeof (*nsd))) {
- thread_funnel_set(kernel_flock, funnel_state);
- return (ENEEDAUTH);
- }
- cacherep = RC_DROPIT;
- } else
- cacherep = nfsrv_getcache(nd, slp, &mreq);
-
- if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
- nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
- nd->nd_procnum = NFSPROC_NOOP;
- nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
- cacherep = RC_DOIT;
- } else if (nfs_privport) {
- /* Check if source port is privileged */
- u_short port;
- struct sockaddr *nam = mbuf_data(nd->nd_nam);
- struct sockaddr_in *sin;
-
- sin = (struct sockaddr_in *)nam;
- port = ntohs(sin->sin_port);
- if (port >= IPPORT_RESERVED &&
- nd->nd_procnum != NFSPROC_NULL) {
- char strbuf[MAX_IPv4_STR_LEN];
+ cacherep = nfsrv_getcache(nd, slp, &mrep);
+
+ if (nfsrv_require_resv_port) {
+ /* Check if source port is a reserved port */
+ in_port_t port = 0;
+ struct sockaddr *saddr = mbuf_data(nd->nd_nam);
+
+ if (saddr->sa_family == AF_INET)
+ port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+ else if (saddr->sa_family == AF_INET6)
+ port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+ if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
nd->nd_procnum = NFSPROC_NOOP;
nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
cacherep = RC_DOIT;
- printf("NFS request from unprivileged port (%s:%d)\n",
- inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
- port);
}
}
}
/*
- * Loop to get all the write rpc relies that have been
+ * Loop to get all the write RPC replies that have been
* gathered together.
*/
do {
switch (cacherep) {
case RC_DOIT:
- if (nd && (nd->nd_flag & ND_NFSV3))
- procrastinate = nfsrvw_procrastinate_v3;
+ if (nd && (nd->nd_vers == NFS_VER3))
+ procrastinate = nfsrv_wg_delay_v3;
else
- procrastinate = nfsrvw_procrastinate;
- lck_rw_lock_shared(&nfs_export_rwlock);
+ procrastinate = nfsrv_wg_delay;
+ lck_rw_lock_shared(&nfsrv_export_rwlock);
+ context.vc_ucred = NULL;
if (writes_todo || ((nd->nd_procnum == NFSPROC_WRITE) && (procrastinate > 0)))
- error = nfsrv_writegather(&nd, slp, nfsd->nfsd_procp, &mreq);
+ error = nfsrv_writegather(&nd, slp, &context, &mrep);
else
- error = (*(nfsrv3_procs[nd->nd_procnum]))(nd, slp, nfsd->nfsd_procp, &mreq);
- lck_rw_done(&nfs_export_rwlock);
- if (mreq == NULL)
+ error = (*(nfsrv_procs[nd->nd_procnum]))(nd, slp, &context, &mrep);
+ lck_rw_done(&nfsrv_export_rwlock);
+ if (mrep == NULL) {
+ /*
+ * If this is a stream socket and we are not going
+ * to send a reply we better close the connection
+ * so the client doesn't hang.
+ */
+ if (error && slp->ns_sotype == SOCK_STREAM) {
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ nfsrv_zapsock(slp);
+ lck_rw_done(&slp->ns_rwlock);
+ printf("NFS server: NULL reply from proc = %d error = %d\n",
+ nd->nd_procnum, error);
+ }
break;
+
+ }
if (error) {
- OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs);
- nfsrv_updatecache(nd, FALSE, mreq);
+ OSAddAtomic64(1, &nfsstats.srv_errs);
+ nfsrv_updatecache(nd, FALSE, mrep);
if (nd->nd_nam2) {
mbuf_freem(nd->nd_nam2);
nd->nd_nam2 = NULL;
}
break;
}
- OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]);
- nfsrv_updatecache(nd, TRUE, mreq);
- nd->nd_mrep = NULL;
+ OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
+ nfsrv_updatecache(nd, TRUE, mrep);
+ /* FALLTHRU */
+
case RC_REPLY:
- m = mreq;
+ if (nd->nd_gss_mb != NULL) { // It's RPCSEC_GSS
+ /*
+ * Need to checksum or encrypt the reply
+ */
+ error = nfs_gss_svc_protect_reply(nd, mrep);
+ if (error) {
+ mbuf_freem(mrep);
+ break;
+ }
+ }
+
+ /*
+ * Get the total size of the reply
+ */
+ m = mrep;
siz = 0;
while (m) {
siz += mbuf_len(m);
printf("mbuf siz=%d\n",siz);
panic("Bad nfs svc reply");
}
- m = mreq;
+ m = mrep;
mbuf_pkthdr_setlen(m, siz);
error = mbuf_pkthdr_setrcvif(m, NULL);
if (error)
if (slp->ns_sotype == SOCK_STREAM) {
error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
if (!error)
- *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz);
+ *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
}
if (!error) {
if (slp->ns_flag & SLP_VALID) {
- error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL);
+ error = nfsrv_send(slp, nd->nd_nam2, m);
} else {
error = EPIPE;
mbuf_freem(m);
} else {
mbuf_freem(m);
}
- mreq = NULL;
- if (nfsrtton)
- nfsd_rt(slp->ns_sotype, nd, cacherep);
+ mrep = NULL;
if (nd->nd_nam2) {
mbuf_freem(nd->nd_nam2);
nd->nd_nam2 = NULL;
}
- if (nd->nd_mrep) {
- mbuf_freem(nd->nd_mrep);
- nd->nd_mrep = NULL;
- }
if (error == EPIPE) {
lck_rw_lock_exclusive(&slp->ns_rwlock);
nfsrv_zapsock(slp);
lck_rw_done(&slp->ns_rwlock);
}
if (error == EINTR || error == ERESTART) {
- if (nd->nd_cr)
- kauth_cred_rele(nd->nd_cr);
- FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
+ nfsm_chain_cleanup(&nd->nd_nmreq);
+ if (IS_VALID_CRED(nd->nd_cr))
+ kauth_cred_unref(&nd->nd_cr);
+ if (nd->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+ FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nfsrv_slpderef(slp);
+ lck_mtx_lock(nfsd_mutex);
goto done;
}
break;
case RC_DROPIT:
- if (nfsrtton)
- nfsd_rt(slp->ns_sotype, nd, cacherep);
- mbuf_freem(nd->nd_mrep);
mbuf_freem(nd->nd_nam2);
- nd->nd_mrep = nd->nd_nam2 = NULL;
+ nd->nd_nam2 = NULL;
break;
};
+ opcnt++;
if (nd) {
- if (nd->nd_mrep)
- mbuf_freem(nd->nd_mrep);
+ nfsm_chain_cleanup(&nd->nd_nmreq);
if (nd->nd_nam2)
mbuf_freem(nd->nd_nam2);
- if (nd->nd_cr)
- kauth_cred_rele(nd->nd_cr);
- FREE_ZONE((caddr_t)nd, sizeof *nd, M_NFSRVDESC);
+ if (IS_VALID_CRED(nd->nd_cr))
+ kauth_cred_unref(&nd->nd_cr);
+ if (nd->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nd->nd_gss_context);
+ FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nd = NULL;
}
* Check to see if there are outstanding writes that
* need to be serviced.
*/
- microuptime(&now);
- cur_usec = (u_quad_t)now.tv_sec * 1000000 +
- (u_quad_t)now.tv_usec;
- if (slp->ns_wgtime && (slp->ns_wgtime <= cur_usec)) {
- cacherep = RC_DOIT;
- writes_todo = 1;
- } else {
- writes_todo = 0;
+ writes_todo = 0;
+ if (slp->ns_wgtime) {
+ microuptime(&now);
+ cur_usec = (u_quad_t)now.tv_sec * 1000000 +
+ (u_quad_t)now.tv_usec;
+ if (slp->ns_wgtime <= cur_usec) {
+ cacherep = RC_DOIT;
+ writes_todo = 1;
+ }
}
} while (writes_todo);
- lck_rw_lock_exclusive(&slp->ns_rwlock);
- if (nfsrv_dorec(slp, nfsd, &nd)) {
+
+ nd = NULL;
+ if (TAILQ_EMPTY(&nfsrv_sockwait) && (opcnt < 8)) {
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ error = nfsrv_dorec(slp, nfsd, &nd);
+ if (error == EINVAL) { // RPCSEC_GSS drop
+ if (slp->ns_sotype == SOCK_STREAM)
+ nfsrv_zapsock(slp); // drop connection
+ }
lck_rw_done(&slp->ns_rwlock);
+ }
+ if (!nd) {
+ /* drop our reference on the socket */
nfsd->nfsd_flag &= ~NFSD_REQINPROG;
nfsd->nfsd_slp = NULL;
nfsrv_slpderef(slp);
- } else {
- lck_rw_done(&slp->ns_rwlock);
}
}
-done:
- thread_funnel_set(kernel_flock, funnel_state);
lck_mtx_lock(nfsd_mutex);
+done:
TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
FREE(nfsd, M_NFSD);
- nsd->nsd_nfsd = (struct nfsd *)0;
- if (--nfs_numnfsd == 0)
- nfsrv_init(TRUE); /* Reinitialize everything */
+ if (--nfsd_thread_count == 0)
+ nfsrv_cleanup();
lck_mtx_unlock(nfsd_mutex);
return (error);
}
-static int
-nfssvc_export(user_addr_t argp, proc_t p)
+int
+nfssvc_export(user_addr_t argp)
{
int error = 0, is_64bit;
struct user_nfs_export_args unxa;
- struct vfs_context context;
+ vfs_context_t ctx = vfs_context_current();
- context.vc_proc = p;
- context.vc_ucred = kauth_cred_get();
- is_64bit = IS_64BIT_PROCESS(p);
+ is_64bit = IS_64BIT_PROCESS(vfs_context_proc(ctx));
/* copy in pointers to path and export args */
if (is_64bit) {
if (error)
return (error);
- error = nfsrv_export(&unxa, &context);
+ error = nfsrv_export(&unxa, ctx);
return (error);
}
-#endif /* NFS_NOSERVER */
-
-int nfs_defect = 0;
-/* XXX CSM 11/25/97 Upgrade sysctl.h someday */
-#ifdef notyet
-SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
-#endif
-
-int
-nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
-{
- struct lockd_ans la;
- int error;
-
- if (uap->flag == NFSCLNT_LOCKDWAIT) {
- return (nfslockdwait(p));
- }
- if (uap->flag == NFSCLNT_LOCKDANS) {
- error = copyin(uap->argp, &la, sizeof(la));
- return (error != 0 ? error : nfslockdans(p, &la));
- }
- if (uap->flag == NFSCLNT_LOCKDFD)
- return (nfslockdfd(p, CAST_DOWN(int, uap->argp)));
- return EINVAL;
-}
-
-
-static int nfssvc_iod_continue(int);
-
/*
- * Asynchronous I/O daemons for client nfs.
- * They do read-ahead and write-behind operations on the block I/O cache.
- * Never returns unless it fails or gets killed.
- */
-static int
-nfssvc_iod(__unused proc_t p)
-{
- register int i, myiod;
- struct uthread *ut;
-
- /*
- * Assign my position or return error if too many already running
- */
- myiod = -1;
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
- if (nfs_asyncdaemon[i] == 0) {
- nfs_asyncdaemon[i]++;
- myiod = i;
- break;
- }
- if (myiod == -1)
- return (EBUSY);
- nfs_numasync++;
-
- /* stuff myiod into uthread to get off local stack for continuation */
-
- ut = (struct uthread *)get_bsdthread_info(current_thread());
- ut->uu_state.uu_nfs_myiod = myiod; /* squirrel away for continuation */
-
- nfssvc_iod_continue(0);
- /* NOTREACHED */
- return (0);
-}
-
-/*
- * Continuation for Asynchronous I/O daemons for client nfs.
- */
-static int
-nfssvc_iod_continue(int error)
-{
- register struct nfsbuf *bp;
- register int i, myiod;
- struct nfsmount *nmp;
- struct uthread *ut;
- proc_t p;
-
- /*
- * real myiod is stored in uthread, recover it
- */
- ut = (struct uthread *)get_bsdthread_info(current_thread());
- myiod = ut->uu_state.uu_nfs_myiod;
- p = current_proc(); // XXX
-
- /*
- * Just loop around doin our stuff until SIGKILL
- * - actually we don't loop with continuations...
- */
- lck_mtx_lock(nfs_iod_mutex);
- for (;;) {
- while (((nmp = nfs_iodmount[myiod]) == NULL
- || nmp->nm_bufq.tqh_first == NULL)
- && error == 0 && nfs_ioddelwri == 0) {
- if (nmp)
- nmp->nm_bufqiods--;
- nfs_iodwant[myiod] = p; // XXX this doesn't need to be a proc_t
- nfs_iodmount[myiod] = NULL;
- error = msleep0((caddr_t)&nfs_iodwant[myiod], nfs_iod_mutex,
- PWAIT | PCATCH | PDROP, "nfsidl", 0, nfssvc_iod_continue);
- lck_mtx_lock(nfs_iod_mutex);
- }
- if (error) {
- nfs_asyncdaemon[myiod] = 0;
- if (nmp) nmp->nm_bufqiods--;
- nfs_iodwant[myiod] = NULL;
- nfs_iodmount[myiod] = NULL;
- lck_mtx_unlock(nfs_iod_mutex);
- nfs_numasync--;
- if (error == EINTR || error == ERESTART)
- error = 0;
- unix_syscall_return(error);
- }
- if (nmp != NULL) {
- while ((bp = TAILQ_FIRST(&nmp->nm_bufq)) != NULL) {
- /* Take one off the front of the list */
- TAILQ_REMOVE(&nmp->nm_bufq, bp, nb_free);
- bp->nb_free.tqe_next = NFSNOLIST;
- nmp->nm_bufqlen--;
- if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
- nmp->nm_bufqwant = FALSE;
- lck_mtx_unlock(nfs_iod_mutex);
- wakeup(&nmp->nm_bufq);
- } else {
- lck_mtx_unlock(nfs_iod_mutex);
- }
-
- SET(bp->nb_flags, NB_IOD);
- if (ISSET(bp->nb_flags, NB_READ))
- nfs_doio(bp, bp->nb_rcred, NULL);
- else
- nfs_doio(bp, bp->nb_wcred, NULL);
-
- lck_mtx_lock(nfs_iod_mutex);
- /*
- * If there are more than one iod on this mount, then defect
- * so that the iods can be shared out fairly between the mounts
- */
- if (nfs_defect && nmp->nm_bufqiods > 1) {
- nfs_iodmount[myiod] = NULL;
- nmp->nm_bufqiods--;
- break;
- }
- }
- }
- lck_mtx_unlock(nfs_iod_mutex);
-
- if (nfs_ioddelwri) {
- i = 0;
- nfs_ioddelwri = 0;
- lck_mtx_lock(nfs_buf_mutex);
- while (i < 8 && (bp = TAILQ_FIRST(&nfsbufdelwri)) != NULL) {
- struct nfsnode *np = VTONFS(bp->nb_vp);
- nfs_buf_remfree(bp);
- nfs_buf_refget(bp);
- while ((error = nfs_buf_acquire(bp, 0, 0, 0)) == EAGAIN);
- nfs_buf_refrele(bp);
- if (error)
- break;
- if (!bp->nb_vp) {
- /* buffer is no longer valid */
- nfs_buf_drop(bp);
- continue;
- }
- if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
- /* put buffer at end of delwri list */
- TAILQ_INSERT_TAIL(&nfsbufdelwri, bp, nb_free);
- nfsbufdelwricnt++;
- nfs_buf_drop(bp);
- lck_mtx_unlock(nfs_buf_mutex);
- nfs_flushcommits(np->n_vnode, NULL, 1);
- } else {
- SET(bp->nb_flags, (NB_ASYNC | NB_IOD));
- lck_mtx_unlock(nfs_buf_mutex);
- nfs_buf_write(bp);
- }
- i++;
- lck_mtx_lock(nfs_buf_mutex);
- }
- lck_mtx_unlock(nfs_buf_mutex);
- }
-
- lck_mtx_lock(nfs_iod_mutex);
- }
-}
-
-/*
- * Shut down a socket associated with an nfssvc_sock structure.
+ * Shut down a socket associated with an nfsrv_sock structure.
* Should be called with the send lock set, if required.
* The trick here is to increment the sref at the start, so that the nfsds
* will stop using it and clear ns_flag at the end so that it will not be
* reassigned during cleanup.
*/
-static void
-nfsrv_zapsock(struct nfssvc_sock *slp)
+void
+nfsrv_zapsock(struct nfsrv_sock *slp)
{
socket_t so;
if (so == NULL)
return;
- /*
- * Attempt to deter future upcalls, but leave the
- * upcall info in place to avoid a race with the
- * networking code.
- */
- socket_lock(so, 1);
- so->so_rcv.sb_flags &= ~SB_UPCALL;
- socket_unlock(so, 1);
-
+ sock_setupcall(so, NULL, NULL);
sock_shutdown(so, SHUT_RDWR);
-}
-
-/*
- * Get an authorization string for the uid by having the mount_nfs sitting
- * on this mount point porpous out of the kernel and do it.
- */
-int
-nfs_getauth(nmp, rep, cred, auth_str, auth_len, verf_str, verf_len, key)
- register struct nfsmount *nmp;
- struct nfsreq *rep;
- kauth_cred_t cred;
- char **auth_str;
- int *auth_len;
- char *verf_str;
- int *verf_len;
- NFSKERBKEY_T key; /* return session key */
-{
- int error = 0;
-
- while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) {
- nmp->nm_state |= NFSSTA_WANTAUTH;
- (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
- "nfsauth1", 2 * hz);
- error = nfs_sigintr(nmp, rep, rep->r_procp);
- if (error) {
- nmp->nm_state &= ~NFSSTA_WANTAUTH;
- return (error);
- }
- }
- nmp->nm_state &= ~NFSSTA_WANTAUTH;
- MALLOC(*auth_str, char *, RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
- if (!*auth_str)
- return (ENOMEM);
- nmp->nm_authstr = *auth_str;
- nmp->nm_authlen = RPCAUTH_MAXSIZ;
- nmp->nm_verfstr = verf_str;
- nmp->nm_verflen = *verf_len;
- nmp->nm_authuid = kauth_cred_getuid(cred);
- nmp->nm_state &= ~NFSSTA_WAITAUTH;
- wakeup((caddr_t)&nmp->nm_authstr);
/*
- * And wait for mount_nfs to do its stuff.
+ * Remove from the up-call queue
*/
- while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) {
- (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
- "nfsauth2", 2 * hz);
- error = nfs_sigintr(nmp, rep, rep->r_procp);
- }
- if (nmp->nm_state & NFSSTA_AUTHERR) {
- nmp->nm_state &= ~NFSSTA_AUTHERR;
- error = EAUTH;
- }
- if (error)
- FREE(*auth_str, M_TEMP);
- else {
- *auth_len = nmp->nm_authlen;
- *verf_len = nmp->nm_verflen;
- bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
- }
- nmp->nm_state &= ~NFSSTA_HASAUTH;
- nmp->nm_state |= NFSSTA_WAITAUTH;
- if (nmp->nm_state & NFSSTA_WANTAUTH) {
- nmp->nm_state &= ~NFSSTA_WANTAUTH;
- wakeup((caddr_t)&nmp->nm_authtype);
- }
- return (error);
-}
-
-/*
- * Get a nickname authenticator and verifier.
- */
-int
-nfs_getnickauth(
- struct nfsmount *nmp,
- kauth_cred_t cred,
- char **auth_str,
- int *auth_len,
- char *verf_str,
- __unused int verf_len)
-{
- register struct nfsuid *nuidp;
- register u_long *nickp, *verfp;
- struct timeval ktvin, ktvout, now;
-
-#if DIAGNOSTIC
- if (verf_len < (4 * NFSX_UNSIGNED))
- panic("nfs_getnickauth verf too small");
-#endif
- for (nuidp = NMUIDHASH(nmp, kauth_cred_getuid(cred))->lh_first;
- nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
- if (kauth_cred_getuid(nuidp->nu_cr) == kauth_cred_getuid(cred))
- break;
- }
- microtime(&now);
- if (!nuidp || nuidp->nu_expire < now.tv_sec)
- return (EACCES);
-
- MALLOC(nickp, u_long *, 2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
- if (!nickp)
- return (ENOMEM);
-
- /*
- * Move to the end of the lru list (end of lru == most recently used).
- */
- TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
- TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
-
- *nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
- *nickp = txdr_unsigned(nuidp->nu_nickname);
- *auth_str = (char *)nickp;
- *auth_len = 2 * NFSX_UNSIGNED;
-
- /*
- * Now we must encrypt the verifier and package it up.
- */
- verfp = (u_long *)verf_str;
- *verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
- microtime(&now);
- if (now.tv_sec > nuidp->nu_timestamp.tv_sec ||
- (now.tv_sec == nuidp->nu_timestamp.tv_sec &&
- now.tv_usec > nuidp->nu_timestamp.tv_usec))
- nuidp->nu_timestamp = now;
- else
- nuidp->nu_timestamp.tv_usec++;
- ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
- ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
-
- /*
- * Now encrypt the timestamp verifier in ecb mode using the session
- * key.
- */
-#if NFSKERB
- XXX
-#endif
-
- *verfp++ = ktvout.tv_sec;
- *verfp++ = ktvout.tv_usec;
- *verfp = 0;
- return (0);
-}
-
-/*
- * Save the current nickname in a hash list entry on the mount point.
- */
-int
-nfs_savenickauth(nmp, cred, len, key, mdp, dposp, mrep)
- register struct nfsmount *nmp;
- kauth_cred_t cred;
- int len;
- NFSKERBKEY_T key;
- mbuf_t *mdp;
- char **dposp;
- mbuf_t mrep;
-{
- register struct nfsuid *nuidp;
- register u_long *tl;
- register long t1;
- mbuf_t md = *mdp;
- struct timeval ktvin, ktvout, now;
- u_long nick;
- char *dpos = *dposp, *cp2;
- int deltasec, error = 0;
-
- if (len == (3 * NFSX_UNSIGNED)) {
- nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
- ktvin.tv_sec = *tl++;
- ktvin.tv_usec = *tl++;
- nick = fxdr_unsigned(u_long, *tl);
-
- /*
- * Decrypt the timestamp in ecb mode.
- */
-#if NFSKERB
- XXX
-#endif
- ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
- ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
- microtime(&now);
- deltasec = now.tv_sec - ktvout.tv_sec;
- if (deltasec < 0)
- deltasec = -deltasec;
- /*
- * If ok, add it to the hash list for the mount point.
- */
- if (deltasec <= NFS_KERBCLOCKSKEW) {
- if (nmp->nm_numuids < nuidhash_max) {
- nmp->nm_numuids++;
- MALLOC_ZONE(nuidp, struct nfsuid *,
- sizeof (struct nfsuid),
- M_NFSUID, M_WAITOK);
- } else {
- nuidp = NULL;
- }
- if (!nuidp) {
- nuidp = nmp->nm_uidlruhead.tqh_first;
- if (!nuidp) {
- error = ENOMEM;
- goto nfsmout;
- }
- LIST_REMOVE(nuidp, nu_hash);
- TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
- kauth_cred_rele(nuidp->nu_cr);
- }
- nuidp->nu_flag = 0;
- kauth_cred_ref(cred);
- nuidp->nu_cr = cred;
- nuidp->nu_expire = now.tv_sec + NFS_KERBTTL;
- nuidp->nu_timestamp = ktvout;
- nuidp->nu_nickname = nick;
- bcopy(key, nuidp->nu_key, sizeof (key));
- TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
- LIST_INSERT_HEAD(NMUIDHASH(nmp, kauth_cred_getuid(cred)),
- nuidp, nu_hash);
- }
- } else
- nfsm_adv(nfsm_rndup(len));
-nfsmout:
- *mdp = md;
- *dposp = dpos;
- return (error);
+ nfsrv_uc_dequeue(slp);
}
-#ifndef NFS_NOSERVER
-
/*
* cleanup and release a server socket structure.
*/
void
-nfsrv_slpfree(struct nfssvc_sock *slp)
+nfsrv_slpfree(struct nfsrv_sock *slp)
{
- struct nfsuid *nuidp, *nnuidp;
struct nfsrv_descript *nwp, *nnwp;
if (slp->ns_so) {
mbuf_freem(slp->ns_raw);
if (slp->ns_rec)
mbuf_freem(slp->ns_rec);
- slp->ns_nam = slp->ns_raw = slp->ns_rec = NULL;
-
- for (nuidp = slp->ns_uidlruhead.tqh_first; nuidp != 0;
- nuidp = nnuidp) {
- nnuidp = nuidp->nu_lru.tqe_next;
- LIST_REMOVE(nuidp, nu_hash);
- TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
- if (nuidp->nu_flag & NU_NAM)
- mbuf_freem(nuidp->nu_nam);
- kauth_cred_rele(nuidp->nu_cr);
- FREE_ZONE((caddr_t)nuidp,
- sizeof (struct nfsuid), M_NFSUID);
- }
+ if (slp->ns_frag)
+ mbuf_freem(slp->ns_frag);
+ slp->ns_nam = slp->ns_raw = slp->ns_rec = slp->ns_frag = NULL;
+ slp->ns_reccnt = 0;
for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
nnwp = nwp->nd_tq.le_next;
LIST_REMOVE(nwp, nd_tq);
- if (nwp->nd_cr)
- kauth_cred_rele(nwp->nd_cr);
- FREE_ZONE((caddr_t)nwp, sizeof *nwp, M_NFSRVDESC);
+ nfsm_chain_cleanup(&nwp->nd_nmreq);
+ if (nwp->nd_mrep)
+ mbuf_freem(nwp->nd_mrep);
+ if (nwp->nd_nam2)
+ mbuf_freem(nwp->nd_nam2);
+ if (IS_VALID_CRED(nwp->nd_cr))
+ kauth_cred_unref(&nwp->nd_cr);
+ if (nwp->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
+ FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
}
LIST_INIT(&slp->ns_tq);
- lck_rw_destroy(&slp->ns_rwlock, nfs_slp_rwlock_group);
- lck_mtx_destroy(&slp->ns_wgmutex, nfs_slp_mutex_group);
+ lck_rw_destroy(&slp->ns_rwlock, nfsrv_slp_rwlock_group);
+ lck_mtx_destroy(&slp->ns_wgmutex, nfsrv_slp_mutex_group);
FREE(slp, M_NFSSVC);
}
* Derefence a server socket structure. If it has no more references and
* is no longer valid, you can throw it away.
*/
-void
-nfsrv_slpderef(struct nfssvc_sock *slp)
+static void
+nfsrv_slpderef_locked(struct nfsrv_sock *slp)
{
- struct timeval now;
-
- lck_mtx_lock(nfsd_mutex);
lck_rw_lock_exclusive(&slp->ns_rwlock);
slp->ns_sref--;
+
if (slp->ns_sref || (slp->ns_flag & SLP_VALID)) {
+ if ((slp->ns_flag & SLP_QUEUED) && !(slp->ns_flag & SLP_WORKTODO)) {
+ /* remove socket from queue since there's no work */
+ if (slp->ns_flag & SLP_WAITQ)
+ TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+ else
+ TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+ slp->ns_flag &= ~SLP_QUEUED;
+ }
lck_rw_done(&slp->ns_rwlock);
- lck_mtx_unlock(nfsd_mutex);
return;
}
- /* queue the socket up for deletion */
- microuptime(&now);
- slp->ns_timestamp = now.tv_sec;
- TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
- TAILQ_INSERT_TAIL(&nfssvc_deadsockhead, slp, ns_chain);
+ /* This socket is no longer valid, so we'll get rid of it */
+
+ if (slp->ns_flag & SLP_QUEUED) {
+ if (slp->ns_flag & SLP_WAITQ)
+ TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
+ else
+ TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
+ slp->ns_flag &= ~SLP_QUEUED;
+ }
lck_rw_done(&slp->ns_rwlock);
- if (slp == nfs_udpsock)
- nfs_udpsock = NULL;
-#if ISO
- else if (slp == nfs_cltpsock)
- nfs_cltpsock = NULL;
-#endif
- lck_mtx_unlock(nfsd_mutex);
+
+ TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+ if (slp->ns_sotype == SOCK_STREAM)
+ nfsrv_sock_tcp_cnt--;
+
+ /* now remove from the write gather socket list */
+ if (slp->ns_wgq.tqe_next != SLPNOLIST) {
+ TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
+ slp->ns_wgq.tqe_next = SLPNOLIST;
+ }
+ nfsrv_slpfree(slp);
}
+void
+nfsrv_slpderef(struct nfsrv_sock *slp)
+{
+ lck_mtx_lock(nfsd_mutex);
+ nfsrv_slpderef_locked(slp);
+ lck_mtx_unlock(nfsd_mutex);
+}
/*
- * Initialize the data structures for the server.
- * Handshake with any new nfsds starting up to avoid any chance of
- * corruption.
+ * Check periodically for idle sockest if needed and
+ * zap them.
*/
void
-nfsrv_init(terminating)
- int terminating;
+nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
{
- struct nfssvc_sock *slp, *nslp;
+ struct nfsrv_sock *slp, *tslp;
struct timeval now;
+ time_t time_to_wait = nfsrv_sock_idle_timeout;
- if (terminating) {
- microuptime(&now);
- for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != 0; slp = nslp) {
- nslp = TAILQ_NEXT(slp, ns_chain);
- if (slp->ns_flag & SLP_VALID) {
- lck_rw_lock_exclusive(&slp->ns_rwlock);
- nfsrv_zapsock(slp);
- lck_rw_done(&slp->ns_rwlock);
- }
- /* queue the socket up for deletion */
- slp->ns_timestamp = now.tv_sec;
- TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
- TAILQ_INSERT_TAIL(&nfssvc_deadsockhead, slp, ns_chain);
- if (slp == nfs_udpsock)
- nfs_udpsock = NULL;
-#if ISO
- else if (slp == nfs_cltpsock)
- nfs_cltpsock = NULL;
-#endif
- }
- nfsrv_cleancache(); /* And clear out server cache */
-/* XXX Revisit when enabling WebNFS */
-#ifdef WEBNFS_ENABLED
- } else
- nfs_pub.np_valid = 0;
-#else
- }
-#endif
+ microuptime(&now);
+ lck_mtx_lock(nfsd_mutex);
- if (!terminating) {
- TAILQ_INIT(&nfssvc_sockhead);
- TAILQ_INIT(&nfssvc_deadsockhead);
- TAILQ_INIT(&nfsd_head);
- nfsd_head_flag &= ~NFSD_CHECKSLP;
+ /* Turn off the timer if we're suppose to and get out */
+ if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
+ nfsrv_sock_idle_timeout = 0;
+ if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
+ nfsrv_idlesock_timer_on = 0;
+ lck_mtx_unlock(nfsd_mutex);
+ return;
}
- MALLOC(nfs_udpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
- M_NFSSVC, M_WAITOK);
- if (nfs_udpsock) {
- bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
- lck_rw_init(&nfs_udpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr);
- lck_mtx_init(&nfs_udpsock->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr);
- TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
- TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
- } else {
- printf("nfsrv_init() failed to allocate UDP socket\n");
+ TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ /* Skip udp and referenced sockets */
+ if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
+ lck_rw_done(&slp->ns_rwlock);
+ continue;
+ }
+ /*
+ * If this is the first non-referenced socket that hasn't idle out,
+ * use its time stamp to calculate the earlist time in the future
+ * to start the next invocation of the timer. Since the nfsrv_socklist
+ * is sorted oldest access to newest. Once we find the first one,
+ * we're done and break out of the loop.
+ */
+ if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
+ nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+ time_to_wait -= now.tv_sec - slp->ns_timestamp;
+ if (time_to_wait < 1)
+ time_to_wait = 1;
+ lck_rw_done(&slp->ns_rwlock);
+ break;
+ }
+ /*
+ * Bump the ref count. nfsrv_slpderef below will destroy
+ * the socket, since nfsrv_zapsock has closed it.
+ */
+ slp->ns_sref++;
+ nfsrv_zapsock(slp);
+ lck_rw_done(&slp->ns_rwlock);
+ nfsrv_slpderef_locked(slp);
}
-#if ISO
- MALLOC(nfs_cltpsock, struct nfssvc_sock *, sizeof(struct nfssvc_sock),
- M_NFSSVC, M_WAITOK);
- if (nfs_cltpsock) {
- bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
- lck_rw_init(&nfs_cltpsock->ns_rwlock, nfs_slp_rwlock_group, nfs_slp_lock_attr);
- lck_mtx_init(&nfs_cltpsock->ns_wgmutex, nfs_slp_mutex_group, nfs_slp_lock_attr);
- TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
- TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
- } else {
- printf("nfsrv_init() failed to allocate CLTP socket\n");
- }
-#endif
+ /* Start ourself back up */
+ nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+ /* Remember when the next timer will fire for nfssvc_addsock. */
+ nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+ lck_mtx_unlock(nfsd_mutex);
}
/*
- * Add entries to the server monitor log.
+ * Clean up the data structures for the server.
*/
-static void
-nfsd_rt(sotype, nd, cacherep)
- int sotype;
- register struct nfsrv_descript *nd;
- int cacherep;
+void
+nfsrv_cleanup(void)
{
- register struct drt *rt;
+ struct nfsrv_sock *slp, *nslp;
struct timeval now;
+#if CONFIG_FSE
+ struct nfsrv_fmod *fp, *nfp;
+ int i;
+#endif
- rt = &nfsdrt.drt[nfsdrt.pos];
- if (cacherep == RC_DOIT)
- rt->flag = 0;
- else if (cacherep == RC_REPLY)
- rt->flag = DRT_CACHEREPLY;
- else
- rt->flag = DRT_CACHEDROP;
- if (sotype == SOCK_STREAM)
- rt->flag |= DRT_TCP;
- else if (nd->nd_flag & ND_NFSV3)
- rt->flag |= DRT_NFSV3;
- rt->proc = nd->nd_procnum;
- if (((struct sockaddr *)mbuf_data(nd->nd_nam))->sa_family == AF_INET)
- rt->ipadr = ((struct sockaddr_in *)mbuf_data(nd->nd_nam))->sin_addr.s_addr;
- else
- rt->ipadr = INADDR_ANY;
microuptime(&now);
- rt->resptime = ((now.tv_sec - nd->nd_starttime.tv_sec) * 1000000) +
- (now.tv_usec - nd->nd_starttime.tv_usec);
- microtime(&rt->tstamp); // XXX unused
- nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+ for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
+ nslp = TAILQ_NEXT(slp, ns_chain);
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ slp->ns_sref++;
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ lck_rw_done(&slp->ns_rwlock);
+ nfsrv_slpderef_locked(slp);
+ }
+#
+#if CONFIG_FSE
+ /*
+ * Flush pending file write fsevents
+ */
+ lck_mtx_lock(nfsrv_fmod_mutex);
+ for (i = 0; i < NFSRVFMODHASHSZ; i++) {
+ for (fp = LIST_FIRST(&nfsrv_fmod_hashtbl[i]); fp; fp = nfp) {
+ /*
+ * Fire off the content modified fsevent for each
+ * entry, remove it from the list, and free it.
+ */
+ if (nfsrv_fsevents_enabled) {
+ fp->fm_context.vc_thread = current_thread();
+ add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
+ FSE_ARG_VNODE, fp->fm_vp,
+ FSE_ARG_DONE);
+ }
+ vnode_put(fp->fm_vp);
+ kauth_cred_unref(&fp->fm_context.vc_ucred);
+ nfp = LIST_NEXT(fp, fm_link);
+ LIST_REMOVE(fp, fm_link);
+ FREE(fp, M_TEMP);
+ }
+ }
+ nfsrv_fmod_pending = 0;
+ lck_mtx_unlock(nfsrv_fmod_mutex);
+#endif
+
+ nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
+
+ nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
+
+ nfsrv_cleancache(); /* And clear out server cache */
+
+ nfsrv_udpsock = NULL;
+ nfsrv_udp6sock = NULL;
}
+
#endif /* NFS_NOSERVER */