/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <kern/thread_call.h>
#include <kern/task.h>
-#include <bsm/audit_kernel.h>
+#include <security/audit/audit.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
extern int nfsrv_wg_delay_v3;
static int nfsrv_require_resv_port = 0;
-static int nfsrv_deadsock_timer_on = 0;
-
-static int nfssvc_addsock(socket_t, mbuf_t);
-static int nfssvc_nfsd(void);
-static int nfssvc_export(user_addr_t);
-
-static void nfsrv_zapsock(struct nfsrv_sock *slp);
-static void nfsrv_slpderef(struct nfsrv_sock *);
-static void nfsrv_slpfree(struct nfsrv_sock *);
+static time_t nfsrv_idlesock_timer_on = 0;
+static int nfsrv_sock_tcp_cnt = 0;
+#define NFSD_MIN_IDLE_TIMEOUT 30
+static int nfsrv_sock_idle_timeout = 3600; /* One hour */
+
+int nfssvc_export(user_addr_t argp);
+int nfssvc_nfsd(void);
+int nfssvc_addsock(socket_t, mbuf_t);
+void nfsrv_zapsock(struct nfsrv_sock *);
+void nfsrv_slpderef(struct nfsrv_sock *);
+void nfsrv_slpfree(struct nfsrv_sock *);
#endif /* NFSSERVER */
#if NFSCLIENT
SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW, &nfs_iosize, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfs_access_cache_timeout, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW, &nfs_allow_async, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW, &nfs_statfs_rate_limit, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nfsiod_thread_max, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, "");
#endif /* NFSCLIENT */
#if NFSSERVER
SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_delay, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, "");
-SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, "");
+SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, "");
+#if CONFIG_FSE
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, "");
+#endif
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, "");
+#ifdef NFS_UC_Q_DEBUG
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, "");
+SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, "");
+#endif
#endif /* NFSSERVER */
#if NFSCLIENT
+static int
+mapname2id(struct nfs_testmapid *map)
+{
+ int error;
+
+ error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag);
+ if (error)
+ return (error);
+
+ if (map->ntm_grpflag)
+ error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id);
+ else
+ error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id);
+
+ return (error);
+}
+
+static int
+mapid2name(struct nfs_testmapid *map)
+{
+ int error;
+ int len = sizeof(map->ntm_name);
+
+ if (map->ntm_grpflag)
+ error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid);
+ else
+ error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid);
+
+ if (error)
+ return (error);
+
+ error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag);
+
+ return (error);
+
+}
+
+
+static int
+nfsclnt_testidmap(proc_t p, user_addr_t argp)
+{
+ struct nfs_testmapid mapid;
+ int error, coerror;
+
+ /* Let root make this call. */
+ error = proc_suser(p);
+ if (error)
+ return (error);
+
+ error = copyin(argp, &mapid, sizeof(mapid));
+ if (error)
+ return (error);
+ if (mapid.ntm_name2id)
+ error = mapname2id(&mapid);
+ else
+ error = mapid2name(&mapid);
+
+ coerror = copyout(&mapid, argp, sizeof(mapid));
+
+ return (error ? error : coerror);
+}
+
int
nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval)
{
struct lockd_ans la;
int error;
- if (uap->flag == NFSCLNT_LOCKDANS) {
+ switch (uap->flag) {
+ case NFSCLNT_LOCKDANS:
error = copyin(uap->argp, &la, sizeof(la));
- return (error != 0 ? error : nfslockdans(p, &la));
+ if (!error)
+ error = nfslockdans(p, &la);
+ break;
+ case NFSCLNT_LOCKDNOTIFY:
+ error = nfslockdnotify(p, uap->argp);
+ break;
+ case NFSCLNT_TESTIDMAP:
+ error = nfsclnt_testidmap(p, uap->argp);
+ break;
+ default:
+ error = EINVAL;
}
- return EINVAL;
+ return (error);
}
+
/*
* Asynchronous I/O threads for client NFS.
* They do read-ahead and write-behind operations on the block I/O cache.
* Async requests will pull the next struct nfsiod from the head of the free list,
* put it on the work queue, and wake whatever thread is waiting on that struct.
*/
-static int nfsiod_continue(int);
/*
* nfsiod thread exit routine
* Must be called with nfsiod_mutex held so that the
* decision to terminate is atomic with the termination.
*/
-static void
+void
nfsiod_terminate(struct nfsiod *niod)
{
nfsiod_thread_count--;
}
/* nfsiod thread startup routine */
-static void
+void
nfsiod_thread(void)
{
struct nfsiod *niod;
if (!niod) {
lck_mtx_lock(nfsiod_mutex);
nfsiod_thread_count--;
+ wakeup(current_thread());
lck_mtx_unlock(nfsiod_mutex);
thread_terminate(current_thread());
/*NOTREACHED*/
int
nfsiod_start(void)
{
- thread_t thd;
+ thread_t thd = THREAD_NULL;
lck_mtx_lock(nfsiod_mutex);
if ((nfsiod_thread_count >= NFSIOD_MAX) && (nfsiod_thread_count > 0)) {
return (EBUSY);
}
nfsiod_thread_count++;
- thd = kernel_thread(kernel_task, nfsiod_thread);
+ if (kernel_thread_start((thread_continue_t)nfsiod_thread, NULL, &thd) != KERN_SUCCESS) {
+ lck_mtx_unlock(nfsiod_mutex);
+ return (EBUSY);
+ }
/* wait for the thread to complete startup */
msleep(thd, nfsiod_mutex, PWAIT | PDROP, "nfsiodw", NULL);
+ thread_deallocate(thd);
return (0);
}
*
* Grab an nfsiod struct to work on, do some work, then drop it
*/
-static int
+int
nfsiod_continue(int error)
{
struct nfsiod *niod;
niod = TAILQ_FIRST(&nfsiodwork);
if (!niod) {
/* there's no work queued up */
- if (error != EWOULDBLOCK)
- printf("nfsiod: error %d work %p\n", error, niod);
/* remove an old nfsiod struct and terminate */
if ((niod = TAILQ_LAST(&nfsiodfree, nfsiodlist)))
TAILQ_REMOVE(&nfsiodfree, niod, niod_link);
worktodo:
while ((nmp = niod->niod_nmp)) {
+ if (nmp == NULL){
+ niod->niod_nmp = NULL;
+ break;
+ }
+
/*
* Service this mount's async I/O queue.
*
/* grab the current contents of the queue */
TAILQ_INIT(&iodq);
TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain);
+ /* Mark each iod request as being managed by an iod */
+ TAILQ_FOREACH(req, &iodq, r_achain) {
+ lck_mtx_lock(&req->r_mtx);
+ assert(!(req->r_flags & R_IOD));
+ req->r_flags |= R_IOD;
+ lck_mtx_unlock(&req->r_mtx);
+ }
lck_mtx_unlock(nfsiod_mutex);
/* process the queue */
lck_mtx_lock(nfsiod_mutex);
morework = !TAILQ_EMPTY(&nmp->nm_iodq);
if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) {
- /* we're going to stop working on this mount */
- if (morework) /* mount still needs more work so queue it up */
+ /*
+ * we're going to stop working on this mount but if the
+ * mount still needs more work so queue it up
+ */
+ if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST)
TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink);
nmp->nm_niod = NULL;
niod->niod_nmp = NULL;
if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) {
niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts);
TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink);
+ niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST;
}
if (niod->niod_nmp)
goto worktodo;
{
vnode_t vp;
struct nfs_filehandle nfh;
- int error;
+ int error, fhlen, fidlen;
struct nameidata nd;
char path[MAXPATHLEN], *ptr;
- u_int pathlen;
+ size_t pathlen;
struct nfs_exportfs *nxfs;
struct nfs_export *nx;
if (error)
return (error);
- error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen);
+ error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen);
+ if (!error)
+ error = copyin(uap->fhp, &fhlen, sizeof(fhlen));
if (error)
return (error);
+ /* limit fh size to length specified (or v3 size by default) */
+ if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE))
+ fhlen = NFSV3_MAX_FH_SIZE;
+ fidlen = fhlen - sizeof(struct nfs_exphandle);
if (!nfsrv_is_initialized())
return (EINVAL);
- NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
+ NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current());
error = namei(&nd);
if (error)
nfh.nfh_xh.nxh_expid = htonl(nx->nx_id);
nfh.nfh_xh.nxh_flags = 0;
nfh.nfh_xh.nxh_reserved = 0;
- nfh.nfh_len = NFSV3_MAX_FID_SIZE;
+ nfh.nfh_len = fidlen;
error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL);
- if (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE)
+ if (nfh.nfh_len > (uint32_t)fidlen)
error = EOVERFLOW;
nfh.nfh_xh.nxh_fidlen = nfh.nfh_len;
nfh.nfh_len += sizeof(nfh.nfh_xh);
vnode_put(vp);
if (error)
return (error);
- error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh));
+ error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t));
return (error);
}
-extern struct fileops vnops;
+extern const struct fileops vnops;
/*
* syscall for the rpc.lockd to use to translate a NFS file handle into
int
fhopen( proc_t p,
struct fhopen_args *uap,
- register_t *retval)
+ int32_t *retval)
{
vnode_t vp;
struct nfs_filehandle nfh;
if ((error = VNOP_OPEN(vp, fmode, ctx)))
goto bad;
- if ((error = vnode_ref_ext(vp, fmode)))
+ if ((error = vnode_ref_ext(vp, fmode, 0)))
goto bad;
/*
fp = nfp;
fp->f_fglob->fg_flag = fmode & FMASK;
- fp->f_fglob->fg_type = DTYPE_VNODE;
fp->f_fglob->fg_ops = &vnops;
fp->f_fglob->fg_data = (caddr_t)vp;
type = F_FLOCK;
if ((fmode & FNONBLOCK) == 0)
type |= F_WAIT;
- if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) {
+ if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) {
struct vfs_context context = *vfs_context_current();
/* Modify local copy (to not damage thread copy) */
context.vc_ucred = fp->f_fglob->fg_cred;
AUDIT_ARG(cmd, uap->flag);
/*
- * Must be super user
+ * Must be super user for most operations (export ops checked later).
*/
- error = proc_suser(p);
- if (error)
+ if ((uap->flag != NFSSVC_EXPORT) && ((error = proc_suser(p))))
return (error);
#if CONFIG_MACF
error = mac_system_check_nfsd(kauth_cred_get());
/*
* Adds a socket to the list for servicing by nfsds.
*/
-static int
+int
nfssvc_addsock(socket_t so, mbuf_t mynam)
{
struct nfsrv_sock *slp;
int error = 0, sodomain, sotype, soprotocol, on = 1;
+ int first;
struct timeval timeo;
/* make sure mbuf constants are set up */
sock_gettype(so, &sodomain, &sotype, &soprotocol);
- /* There should be only one UDP socket */
- if ((soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+ /* There should be only one UDP socket for each of IPv4 and IPv6 */
+ if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) {
+ mbuf_freem(mynam);
+ return (EEXIST);
+ }
+ if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) {
mbuf_freem(mynam);
return (EEXIST);
}
/* Set protocol options and reserve some space (for UDP). */
- if (sotype == SOCK_STREAM)
+ if (sotype == SOCK_STREAM) {
+ error = nfsrv_check_exports_allow_address(mynam);
+ if (error)
+ return (error);
sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+ }
if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP))
sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */
lck_mtx_lock(nfsd_mutex);
if (soprotocol == IPPROTO_UDP) {
- /* There should be only one UDP socket */
- if (nfsrv_udpsock) {
- lck_mtx_unlock(nfsd_mutex);
- nfsrv_slpfree(slp);
- mbuf_freem(mynam);
- return (EEXIST);
+ if (sodomain == AF_INET) {
+ /* There should be only one UDP/IPv4 socket */
+ if (nfsrv_udpsock) {
+ lck_mtx_unlock(nfsd_mutex);
+ nfsrv_slpfree(slp);
+ mbuf_freem(mynam);
+ return (EEXIST);
+ }
+ nfsrv_udpsock = slp;
+ }
+ if (sodomain == AF_INET6) {
+ /* There should be only one UDP/IPv6 socket */
+ if (nfsrv_udp6sock) {
+ lck_mtx_unlock(nfsd_mutex);
+ nfsrv_slpfree(slp);
+ mbuf_freem(mynam);
+ return (EEXIST);
+ }
+ nfsrv_udp6sock = slp;
}
- nfsrv_udpsock = slp;
}
/* add the socket to the list */
+ first = TAILQ_EMPTY(&nfsrv_socklist);
TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
+ if (soprotocol == IPPROTO_TCP) {
+ nfsrv_sock_tcp_cnt++;
+ if (nfsrv_sock_idle_timeout < 0)
+ nfsrv_sock_idle_timeout = 0;
+ if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT))
+ nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT;
+ /*
+ * Possibly start or stop the idle timer. We only start the idle timer when
+ * we have more than 2 * nfsd_thread_max connections. If the idle timer is
+ * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or
+ * the number of connections.
+ */
+ if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) {
+ if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+ if (nfsrv_idlesock_timer_on) {
+ thread_call_cancel(nfsrv_idlesock_timer_call);
+ nfsrv_idlesock_timer_on = 0;
+ }
+ } else {
+ struct nfsrv_sock *old_slp;
+ struct timeval now;
+ time_t time_to_wait = nfsrv_sock_idle_timeout;
+ /*
+ * Get the oldest tcp socket and calculate the
+ * earliest time for the next idle timer to fire
+ * based on the possibly updated nfsrv_sock_idle_timeout
+ */
+ TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) {
+ if (old_slp->ns_sotype == SOCK_STREAM) {
+ microuptime(&now);
+ time_to_wait -= now.tv_sec - old_slp->ns_timestamp;
+ if (time_to_wait < 1)
+ time_to_wait = 1;
+ break;
+ }
+ }
+ /*
+ * If we have a timer scheduled, but if its going to fire too late,
+ * turn it off.
+ */
+ if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) {
+ thread_call_cancel(nfsrv_idlesock_timer_call);
+ nfsrv_idlesock_timer_on = 0;
+ }
+ /* Schedule the idle thread if it isn't already */
+ if (!nfsrv_idlesock_timer_on) {
+ nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+ nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+ }
+ }
+ }
+ }
sock_retain(so); /* grab a retain count on the socket */
slp->ns_so = so;
slp->ns_sotype = sotype;
slp->ns_nam = mynam;
- /* set up the socket upcall */
- socket_lock(so, 1);
- so->so_upcallarg = (caddr_t)slp;
- so->so_upcall = nfsrv_rcv;
- so->so_rcv.sb_flags |= SB_UPCALL;
- socket_unlock(so, 1);
- /* just playin' it safe */
- sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on));
+ /* set up the socket up-call */
+ nfsrv_uc_addsock(slp, first);
/* mark that the socket is not in the nfsrv_sockwg list */
slp->ns_wgq.tqe_next = SLPNOLIST;
-
+
slp->ns_flag = SLP_VALID | SLP_NEEDQ;
nfsrv_wakenfsd(slp);
* have any work are simply dropped from the queue.
*
*/
-static int
+int
nfssvc_nfsd(void)
{
mbuf_t m, mrep;
u_quad_t cur_usec;
struct timeval now;
struct vfs_context context;
+ struct timespec to;
#ifndef nolint
cacherep = RC_DOIT;
lck_mtx_lock(nfsd_mutex);
if (nfsd_thread_count++ == 0)
nfsrv_initcache(); /* Init the server request cache */
+
TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
lck_mtx_unlock(nfsd_mutex);
context.vc_thread = current_thread();
+ /* Set time out so that nfsd threads can wake up a see if they are still needed. */
+ to.tv_sec = 5;
+ to.tv_nsec = 0;
+
/*
* Loop getting rpc requests until SIGKILL.
*/
}
nfsd->nfsd_flag |= NFSD_WAITING;
TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue);
- error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL);
+ error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to);
if (error) {
if (nfsd->nfsd_flag & NFSD_WAITING) {
TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue);
nfsd->nfsd_flag &= ~NFSD_WAITING;
}
+ if (error == EWOULDBLOCK)
+ continue;
goto done;
}
}
if (!nfsd->nfsd_slp && slp) {
/* we found a socket to work on, grab a reference */
slp->ns_sref++;
+ microuptime(&now);
+ slp->ns_timestamp = now.tv_sec;
+ /* We keep the socket list in least recently used order for reaping idle sockets */
+ TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
+ TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain);
nfsd->nfsd_slp = slp;
opcnt = 0;
/* and put it at the back of the work queue */
mbuf_freem(nd->nd_nam2);
if (IS_VALID_CRED(nd->nd_cr))
kauth_cred_unref(&nd->nd_cr);
+ if (nd->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nd->nd_gss_context);
FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nd = NULL;
}
if (nfsrv_require_resv_port) {
/* Check if source port is a reserved port */
- u_short port;
- struct sockaddr *nam = mbuf_data(nd->nd_nam);
- struct sockaddr_in *sin;
-
- sin = (struct sockaddr_in *)nam;
- port = ntohs(sin->sin_port);
- if (port >= IPPORT_RESERVED &&
- nd->nd_procnum != NFSPROC_NULL) {
- char strbuf[MAX_IPv4_STR_LEN];
+ in_port_t port = 0;
+ struct sockaddr *saddr = mbuf_data(nd->nd_nam);
+
+ if (saddr->sa_family == AF_INET)
+ port = ntohs(((struct sockaddr_in*)saddr)->sin_port);
+ else if (saddr->sa_family == AF_INET6)
+ port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port);
+ if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) {
nd->nd_procnum = NFSPROC_NOOP;
nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
cacherep = RC_DOIT;
- printf("NFS request from unprivileged port (%s:%d)\n",
- inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)),
- port);
}
}
}
if (error) {
- OSAddAtomic(1, (SInt32*)&nfsstats.srv_errs);
+ OSAddAtomic64(1, &nfsstats.srv_errs);
nfsrv_updatecache(nd, FALSE, mrep);
if (nd->nd_nam2) {
mbuf_freem(nd->nd_nam2);
}
break;
}
- OSAddAtomic(1, (SInt32*)&nfsstats.srvrpccnt[nd->nd_procnum]);
+ OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]);
nfsrv_updatecache(nd, TRUE, mrep);
/* FALLTHRU */
if (slp->ns_sotype == SOCK_STREAM) {
error = mbuf_prepend(&m, NFSX_UNSIGNED, MBUF_WAITOK);
if (!error)
- *(u_long*)mbuf_data(m) = htonl(0x80000000 | siz);
+ *(u_int32_t*)mbuf_data(m) = htonl(0x80000000 | siz);
}
if (!error) {
if (slp->ns_flag & SLP_VALID) {
nfsm_chain_cleanup(&nd->nd_nmreq);
if (IS_VALID_CRED(nd->nd_cr))
kauth_cred_unref(&nd->nd_cr);
+ if (nd->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nd->nd_gss_context);
FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nfsrv_slpderef(slp);
lck_mtx_lock(nfsd_mutex);
mbuf_freem(nd->nd_nam2);
if (IS_VALID_CRED(nd->nd_cr))
kauth_cred_unref(&nd->nd_cr);
+ if (nd->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nd->nd_gss_context);
FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC);
nd = NULL;
}
return (error);
}
-static int
+int
nfssvc_export(user_addr_t argp)
{
int error = 0, is_64bit;
* will stop using it and clear ns_flag at the end so that it will not be
* reassigned during cleanup.
*/
-static void
+void
nfsrv_zapsock(struct nfsrv_sock *slp)
{
socket_t so;
if (so == NULL)
return;
+ sock_setupcall(so, NULL, NULL);
+ sock_shutdown(so, SHUT_RDWR);
+
/*
- * Attempt to deter future upcalls, but leave the
- * upcall info in place to avoid a race with the
- * networking code.
+ * Remove from the up-call queue
*/
- socket_lock(so, 1);
- so->so_rcv.sb_flags &= ~SB_UPCALL;
- socket_unlock(so, 1);
-
- sock_shutdown(so, SHUT_RDWR);
+ nfsrv_uc_dequeue(slp);
}
/*
* cleanup and release a server socket structure.
*/
-static void
+void
nfsrv_slpfree(struct nfsrv_sock *slp)
{
struct nfsrv_descript *nwp, *nnwp;
mbuf_freem(nwp->nd_nam2);
if (IS_VALID_CRED(nwp->nd_cr))
kauth_cred_unref(&nwp->nd_cr);
+ if (nwp->nd_gss_context)
+ nfs_gss_svc_ctx_deref(nwp->nd_gss_context);
FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC);
}
LIST_INIT(&slp->ns_tq);
* Derefence a server socket structure. If it has no more references and
* is no longer valid, you can throw it away.
*/
-void
-nfsrv_slpderef(struct nfsrv_sock *slp)
+static void
+nfsrv_slpderef_locked(struct nfsrv_sock *slp)
{
- struct timeval now;
-
- lck_mtx_lock(nfsd_mutex);
lck_rw_lock_exclusive(&slp->ns_rwlock);
slp->ns_sref--;
slp->ns_flag &= ~SLP_QUEUED;
}
lck_rw_done(&slp->ns_rwlock);
- lck_mtx_unlock(nfsd_mutex);
return;
}
TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
slp->ns_flag &= ~SLP_QUEUED;
}
+ lck_rw_done(&slp->ns_rwlock);
- /*
- * Queue the socket up for deletion
- * and start the timer to delete it
- * after it has been in limbo for
- * a while.
- */
- microuptime(&now);
- slp->ns_timestamp = now.tv_sec;
TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
- TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
- if (!nfsrv_deadsock_timer_on) {
- nfsrv_deadsock_timer_on = 1;
- nfs_interval_timer_start(nfsrv_deadsock_timer_call,
- NFSRV_DEADSOCKDELAY * 1000);
- }
+ if (slp->ns_sotype == SOCK_STREAM)
+ nfsrv_sock_tcp_cnt--;
- lck_rw_done(&slp->ns_rwlock);
/* now remove from the write gather socket list */
if (slp->ns_wgq.tqe_next != SLPNOLIST) {
TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
slp->ns_wgq.tqe_next = SLPNOLIST;
}
+ nfsrv_slpfree(slp);
+}
+
+void
+nfsrv_slpderef(struct nfsrv_sock *slp)
+{
+ lck_mtx_lock(nfsd_mutex);
+ nfsrv_slpderef_locked(slp);
lck_mtx_unlock(nfsd_mutex);
}
/*
- * Check periodically for dead sockets pending delete.
- * If a socket has been dead for more than NFSRV_DEADSOCKDELAY
- * seconds then we assume it's safe to free.
+ * Check periodically for idle sockest if needed and
+ * zap them.
*/
void
-nfsrv_deadsock_timer(__unused void *param0, __unused void *param1)
+nfsrv_idlesock_timer(__unused void *param0, __unused void *param1)
{
- struct nfsrv_sock *slp;
+ struct nfsrv_sock *slp, *tslp;
struct timeval now;
- time_t time_to_wait;
+ time_t time_to_wait = nfsrv_sock_idle_timeout;
microuptime(&now);
lck_mtx_lock(nfsd_mutex);
- while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) {
- if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec)
- break;
- TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain);
- nfsrv_slpfree(slp);
- }
- if (TAILQ_EMPTY(&nfsrv_deadsocklist)) {
- nfsrv_deadsock_timer_on = 0;
+ /* Turn off the timer if we're suppose to and get out */
+ if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)
+ nfsrv_sock_idle_timeout = 0;
+ if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) {
+ nfsrv_idlesock_timer_on = 0;
lck_mtx_unlock(nfsd_mutex);
return;
}
- time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec;
- if (time_to_wait < 1)
- time_to_wait = 1;
- lck_mtx_unlock(nfsd_mutex);
+ TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) {
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ /* Skip udp and referenced sockets */
+ if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) {
+ lck_rw_done(&slp->ns_rwlock);
+ continue;
+ }
+ /*
+ * If this is the first non-referenced socket that hasn't idle out,
+ * use its time stamp to calculate the earlist time in the future
+ * to start the next invocation of the timer. Since the nfsrv_socklist
+ * is sorted oldest access to newest. Once we find the first one,
+ * we're done and break out of the loop.
+ */
+ if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) ||
+ nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) {
+ time_to_wait -= now.tv_sec - slp->ns_timestamp;
+ if (time_to_wait < 1)
+ time_to_wait = 1;
+ lck_rw_done(&slp->ns_rwlock);
+ break;
+ }
+ /*
+ * Bump the ref count. nfsrv_slpderef below will destroy
+ * the socket, since nfsrv_zapsock has closed it.
+ */
+ slp->ns_sref++;
+ nfsrv_zapsock(slp);
+ lck_rw_done(&slp->ns_rwlock);
+ nfsrv_slpderef_locked(slp);
+ }
- nfs_interval_timer_start(nfsrv_deadsock_timer_call,
- time_to_wait * 1000);
+ /* Start ourself back up */
+ nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000);
+ /* Remember when the next timer will fire for nfssvc_addsock. */
+ nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait;
+ lck_mtx_unlock(nfsd_mutex);
}
/*
{
struct nfsrv_sock *slp, *nslp;
struct timeval now;
+#if CONFIG_FSE
struct nfsrv_fmod *fp, *nfp;
int i;
+#endif
microuptime(&now);
for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) {
nslp = TAILQ_NEXT(slp, ns_chain);
- if (slp->ns_flag & SLP_VALID) {
- lck_rw_lock_exclusive(&slp->ns_rwlock);
+ lck_rw_lock_exclusive(&slp->ns_rwlock);
+ slp->ns_sref++;
+ if (slp->ns_flag & SLP_VALID)
nfsrv_zapsock(slp);
- lck_rw_done(&slp->ns_rwlock);
- }
- if (slp->ns_flag & SLP_QUEUED) {
- if (slp->ns_flag & SLP_WAITQ)
- TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq);
- else
- TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq);
- slp->ns_flag &= ~SLP_QUEUED;
- }
- if (slp->ns_wgq.tqe_next != SLPNOLIST) {
- TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq);
- slp->ns_wgq.tqe_next = SLPNOLIST;
- }
- /* queue the socket up for deletion */
- slp->ns_timestamp = now.tv_sec;
- TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain);
- TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain);
- if (!nfsrv_deadsock_timer_on) {
- nfsrv_deadsock_timer_on = 1;
- nfs_interval_timer_start(nfsrv_deadsock_timer_call,
- NFSRV_DEADSOCKDELAY * 1000);
- }
+ lck_rw_done(&slp->ns_rwlock);
+ nfsrv_slpderef_locked(slp);
}
-
+#
+#if CONFIG_FSE
/*
* Flush pending file write fsevents
*/
* Fire off the content modified fsevent for each
* entry, remove it from the list, and free it.
*/
-#if CONFIG_FSE
- if (nfsrv_fsevents_enabled)
+ if (nfsrv_fsevents_enabled) {
+ fp->fm_context.vc_thread = current_thread();
add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context,
FSE_ARG_VNODE, fp->fm_vp,
FSE_ARG_DONE);
-#endif
+ }
vnode_put(fp->fm_vp);
kauth_cred_unref(&fp->fm_context.vc_ucred);
nfp = LIST_NEXT(fp, fm_link);
}
nfsrv_fmod_pending = 0;
lck_mtx_unlock(nfsrv_fmod_mutex);
+#endif
+ nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */
+
nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */
nfsrv_cleancache(); /* And clear out server cache */
nfsrv_udpsock = NULL;
+ nfsrv_udp6sock = NULL;
}
#endif /* NFS_NOSERVER */