X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/b0d623f7f2ae71ed96e60569f61f9a9a27016e80..ecc0ceb4089d506a0b8d16686a95817b331af9cb:/bsd/nfs/nfs_syscalls.c?ds=inline diff --git a/bsd/nfs/nfs_syscalls.c b/bsd/nfs/nfs_syscalls.c index c28eac76c..1b082e748 100644 --- a/bsd/nfs/nfs_syscalls.c +++ b/bsd/nfs/nfs_syscalls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -131,7 +131,10 @@ extern int nfsrv_wg_delay; extern int nfsrv_wg_delay_v3; static int nfsrv_require_resv_port = 0; -static int nfsrv_deadsock_timer_on = 0; +static time_t nfsrv_idlesock_timer_on = 0; +static int nfsrv_sock_tcp_cnt = 0; +#define NFSD_MIN_IDLE_TIMEOUT 30 +static int nfsrv_sock_idle_timeout = 3600; /* One hour */ int nfssvc_export(user_addr_t argp); int nfssvc_nfsd(void); @@ -150,54 +153,145 @@ SYSCTL_NODE(_vfs_generic, OID_AUTO, nfs, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs hing #if NFSCLIENT SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, client, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs client hinge"); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW, &nfs_iosize, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfs_access_cache_timeout, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW, &nfs_allow_async, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW, &nfs_statfs_rate_limit, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW, &nfsiod_thread_max, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD, &nfsiod_thread_count, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD, &nfs_lockd_mounts, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW, &nfs_max_async_writes, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW, &nfs_single_des, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW, &nfs_access_delete, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, initialdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_initial_delay, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nextdowndelay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_tprintf_delay, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, iosize, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_iosize, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_cache_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_cache_timeout, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, allow_async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_allow_async, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, statfs_rate_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_statfs_rate_limit, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsiod_thread_max, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, nfsiod_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsiod_thread_count, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, lockd_mounts, CTLFLAG_RD | CTLFLAG_LOCKED, &nfs_lockd_mounts, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, max_async_writes, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_max_async_writes, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, single_des, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_single_des, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_delete, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_delete, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_dotzfs, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_dotzfs, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, access_for_getattr, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_access_for_getattr, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, idmap_ctrl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_idmap_ctrl, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, callback_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_callback_port, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, is_mobile, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_is_mobile, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, squishy_flags, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_squishy_flags, 0, ""); +SYSCTL_UINT(_vfs_generic_nfs_client, OID_AUTO, debug_ctl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_debug_ctl, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, readlink_nocache, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_readlink_nocache, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_client, OID_AUTO, root_steals_gss_context, CTLFLAG_RW | CTLFLAG_LOCKED, &nfs_root_steals_ctx, 0, ""); #endif /* NFSCLIENT */ #if NFSSERVER SYSCTL_NODE(_vfs_generic_nfs, OID_AUTO, server, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "nfs server hinge"); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW, &nfsrv_wg_delay, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW, &nfsrv_wg_delay_v3, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW, &nfsrv_require_resv_port, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW, &nfsrv_async, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW, &nfsrv_export_hash_size, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW, &nfsrv_reqcache_size, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW, &nfsrv_sock_max_rec_queue_length, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW, &nfsrv_user_stat_enabled, 0, ""); -SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW, &nfsrv_gss_context_ttl, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, wg_delay_v3, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_wg_delay_v3, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, require_resv_port, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_require_resv_port, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, async, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_async, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, export_hash_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_export_hash_size, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, reqcache_size, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_reqcache_size, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, request_queue_length, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_max_rec_queue_length, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, user_stats, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_user_stat_enabled, 0, ""); +SYSCTL_UINT(_vfs_generic_nfs_server, OID_AUTO, gss_context_ttl, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_gss_context_ttl, 0, ""); #if CONFIG_FSE -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW, &nfsrv_fsevents_enabled, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, fsevents, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_fsevents_enabled, 0, ""); +#endif +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsd_thread_max, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsd_thread_count, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_sock_idle_timeout, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_sock_idle_timeout, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_tcp_connections, CTLFLAG_RD | CTLFLAG_LOCKED, &nfsrv_sock_tcp_cnt, 0, ""); +#ifdef NFS_UC_Q_DEBUG +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, use_upcall_svc, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_use_proxy, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_limit, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_limit, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_max_seen, CTLFLAG_RW | CTLFLAG_LOCKED, &nfsrv_uc_queue_max_seen, 0, ""); +SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, upcall_queue_count, CTLFLAG_RD | CTLFLAG_LOCKED, __DECONST(int *, &nfsrv_uc_queue_count), 0, ""); #endif -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_max, CTLFLAG_RW, &nfsd_thread_max, 0, ""); -SYSCTL_INT(_vfs_generic_nfs_server, OID_AUTO, nfsd_thread_count, CTLFLAG_RD, &nfsd_thread_count, 0, ""); #endif /* NFSSERVER */ #if NFSCLIENT +static int +mapname2id(struct nfs_testmapid *map) +{ + int error; + + error = nfs4_id2guid(map->ntm_name, &map->ntm_guid, map->ntm_grpflag); + if (error) + return (error); + + if (map->ntm_grpflag) + error = kauth_cred_guid2gid(&map->ntm_guid, (gid_t *)&map->ntm_id); + else + error = kauth_cred_guid2uid(&map->ntm_guid, (uid_t *)&map->ntm_id); + + return (error); +} + +static int +mapid2name(struct nfs_testmapid *map) +{ + int error; + int len = sizeof(map->ntm_name); + + if (map->ntm_grpflag) + error = kauth_cred_gid2guid((gid_t)map->ntm_id, &map->ntm_guid); + else + error = kauth_cred_uid2guid((uid_t)map->ntm_id, &map->ntm_guid); + + if (error) + return (error); + + error = nfs4_guid2id(&map->ntm_guid, map->ntm_name, &len, map->ntm_grpflag); + + return (error); + +} + + +static int +nfsclnt_testidmap(proc_t p, user_addr_t argp) +{ + struct nfs_testmapid mapid; + int error, coerror; + + /* Let root make this call. */ + error = proc_suser(p); + if (error) + return (error); + + error = copyin(argp, &mapid, sizeof(mapid)); + if (error) + return (error); + if (mapid.ntm_name2id) + error = mapname2id(&mapid); + else + error = mapid2name(&mapid); + + coerror = copyout(&mapid, argp, sizeof(mapid)); + + return (error ? error : coerror); +} + int nfsclnt(proc_t p, struct nfsclnt_args *uap, __unused int *retval) { struct lockd_ans la; int error; - if (uap->flag == NFSCLNT_LOCKDANS) { + switch (uap->flag) { + case NFSCLNT_LOCKDANS: error = copyin(uap->argp, &la, sizeof(la)); - return (error != 0 ? error : nfslockdans(p, &la)); + if (!error) + error = nfslockdans(p, &la); + break; + case NFSCLNT_LOCKDNOTIFY: + error = nfslockdnotify(p, uap->argp); + break; + case NFSCLNT_TESTIDMAP: + error = nfsclnt_testidmap(p, uap->argp); + break; + default: + error = EINVAL; } - return EINVAL; + return (error); } + /* * Asynchronous I/O threads for client NFS. * They do read-ahead and write-behind operations on the block I/O cache. @@ -314,6 +408,11 @@ nfsiod_continue(int error) worktodo: while ((nmp = niod->niod_nmp)) { + if (nmp == NULL){ + niod->niod_nmp = NULL; + break; + } + /* * Service this mount's async I/O queue. * @@ -327,6 +426,13 @@ worktodo: /* grab the current contents of the queue */ TAILQ_INIT(&iodq); TAILQ_CONCAT(&iodq, &nmp->nm_iodq, r_achain); + /* Mark each iod request as being managed by an iod */ + TAILQ_FOREACH(req, &iodq, r_achain) { + lck_mtx_lock(&req->r_mtx); + assert(!(req->r_flags & R_IOD)); + req->r_flags |= R_IOD; + lck_mtx_unlock(&req->r_mtx); + } lck_mtx_unlock(nfsiod_mutex); /* process the queue */ @@ -340,8 +446,11 @@ worktodo: lck_mtx_lock(nfsiod_mutex); morework = !TAILQ_EMPTY(&nmp->nm_iodq); if (!morework || !TAILQ_EMPTY(&nfsiodmounts)) { - /* we're going to stop working on this mount */ - if (morework) /* mount still needs more work so queue it up */ + /* + * we're going to stop working on this mount but if the + * mount still needs more work so queue it up + */ + if (morework && nmp->nm_iodlink.tqe_next == NFSNOLIST) TAILQ_INSERT_TAIL(&nfsiodmounts, nmp, nm_iodlink); nmp->nm_niod = NULL; niod->niod_nmp = NULL; @@ -352,6 +461,7 @@ worktodo: if (!niod->niod_nmp && !TAILQ_EMPTY(&nfsiodmounts)) { niod->niod_nmp = TAILQ_FIRST(&nfsiodmounts); TAILQ_REMOVE(&nfsiodmounts, niod->niod_nmp, nm_iodlink); + niod->niod_nmp->nm_iodlink.tqe_next = NFSNOLIST; } if (niod->niod_nmp) goto worktodo; @@ -389,10 +499,10 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval) { vnode_t vp; struct nfs_filehandle nfh; - int error; + int error, fhlen, fidlen; struct nameidata nd; char path[MAXPATHLEN], *ptr; - u_int pathlen; + size_t pathlen; struct nfs_exportfs *nxfs; struct nfs_export *nx; @@ -403,14 +513,20 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval) if (error) return (error); - error = copyinstr(uap->fname, path, MAXPATHLEN, (size_t *)&pathlen); + error = copyinstr(uap->fname, path, MAXPATHLEN, &pathlen); + if (!error) + error = copyin(uap->fhp, &fhlen, sizeof(fhlen)); if (error) return (error); + /* limit fh size to length specified (or v3 size by default) */ + if ((fhlen != NFSV2_MAX_FH_SIZE) && (fhlen != NFSV3_MAX_FH_SIZE)) + fhlen = NFSV3_MAX_FH_SIZE; + fidlen = fhlen - sizeof(struct nfs_exphandle); if (!nfsrv_is_initialized()) return (EINVAL); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, + NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, UIO_SYSSPACE, CAST_USER_ADDR_T(path), vfs_context_current()); error = namei(&nd); if (error) @@ -452,9 +568,9 @@ getfh(proc_t p, struct getfh_args *uap, __unused int *retval) nfh.nfh_xh.nxh_expid = htonl(nx->nx_id); nfh.nfh_xh.nxh_flags = 0; nfh.nfh_xh.nxh_reserved = 0; - nfh.nfh_len = NFSV3_MAX_FID_SIZE; + nfh.nfh_len = fidlen; error = VFS_VPTOFH(vp, (int*)&nfh.nfh_len, &nfh.nfh_fid[0], NULL); - if (nfh.nfh_len > (int)NFSV3_MAX_FID_SIZE) + if (nfh.nfh_len > (uint32_t)fidlen) error = EOVERFLOW; nfh.nfh_xh.nxh_fidlen = nfh.nfh_len; nfh.nfh_len += sizeof(nfh.nfh_xh); @@ -465,11 +581,11 @@ out: vnode_put(vp); if (error) return (error); - error = copyout((caddr_t)&nfh, uap->fhp, sizeof(nfh)); + error = copyout((caddr_t)&nfh, uap->fhp, sizeof(fhandle_t)); return (error); } -extern struct fileops vnops; +extern const struct fileops vnops; /* * syscall for the rpc.lockd to use to translate a NFS file handle into @@ -553,6 +669,11 @@ fhopen( proc_t p, goto bad; } +#if CONFIG_MACF + if ((error = mac_vnode_check_open(ctx, vp, fmode))) + goto bad; +#endif + /* compute action to be authorized */ action = 0; if (fmode & FREAD) @@ -564,7 +685,7 @@ fhopen( proc_t p, if ((error = VNOP_OPEN(vp, fmode, ctx))) goto bad; - if ((error = vnode_ref_ext(vp, fmode))) + if ((error = vnode_ref_ext(vp, fmode, 0))) goto bad; /* @@ -579,7 +700,6 @@ fhopen( proc_t p, fp = nfp; fp->f_fglob->fg_flag = fmode & FMASK; - fp->f_fglob->fg_type = DTYPE_VNODE; fp->f_fglob->fg_ops = &vnops; fp->f_fglob->fg_data = (caddr_t)vp; @@ -595,7 +715,7 @@ fhopen( proc_t p, type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; - if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx))) { + if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) { struct vfs_context context = *vfs_context_current(); /* Modify local copy (to not damage thread copy) */ context.vc_ucred = fp->f_fglob->fg_cred; @@ -706,6 +826,7 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) { struct nfsrv_sock *slp; int error = 0, sodomain, sotype, soprotocol, on = 1; + int first; struct timeval timeo; /* make sure mbuf constants are set up */ @@ -714,15 +835,23 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) sock_gettype(so, &sodomain, &sotype, &soprotocol); - /* There should be only one UDP socket */ - if ((soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { + /* There should be only one UDP socket for each of IPv4 and IPv6 */ + if ((sodomain == AF_INET) && (soprotocol == IPPROTO_UDP) && nfsrv_udpsock) { + mbuf_freem(mynam); + return (EEXIST); + } + if ((sodomain == AF_INET6) && (soprotocol == IPPROTO_UDP) && nfsrv_udp6sock) { mbuf_freem(mynam); return (EEXIST); } /* Set protocol options and reserve some space (for UDP). */ - if (sotype == SOCK_STREAM) + if (sotype == SOCK_STREAM) { + error = nfsrv_check_exports_allow_address(mynam); + if (error) + return (error); sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on)); + } if ((sodomain == AF_INET) && (soprotocol == IPPROTO_TCP)) sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); if (sotype == SOCK_DGRAM) { /* set socket buffer sizes for UDP */ @@ -763,36 +892,95 @@ nfssvc_addsock(socket_t so, mbuf_t mynam) lck_mtx_lock(nfsd_mutex); if (soprotocol == IPPROTO_UDP) { - /* There should be only one UDP socket */ - if (nfsrv_udpsock) { - lck_mtx_unlock(nfsd_mutex); - nfsrv_slpfree(slp); - mbuf_freem(mynam); - return (EEXIST); + if (sodomain == AF_INET) { + /* There should be only one UDP/IPv4 socket */ + if (nfsrv_udpsock) { + lck_mtx_unlock(nfsd_mutex); + nfsrv_slpfree(slp); + mbuf_freem(mynam); + return (EEXIST); + } + nfsrv_udpsock = slp; + } + if (sodomain == AF_INET6) { + /* There should be only one UDP/IPv6 socket */ + if (nfsrv_udp6sock) { + lck_mtx_unlock(nfsd_mutex); + nfsrv_slpfree(slp); + mbuf_freem(mynam); + return (EEXIST); + } + nfsrv_udp6sock = slp; } - nfsrv_udpsock = slp; } /* add the socket to the list */ + first = TAILQ_EMPTY(&nfsrv_socklist); TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); + if (soprotocol == IPPROTO_TCP) { + nfsrv_sock_tcp_cnt++; + if (nfsrv_sock_idle_timeout < 0) + nfsrv_sock_idle_timeout = 0; + if (nfsrv_sock_idle_timeout && (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT)) + nfsrv_sock_idle_timeout = NFSD_MIN_IDLE_TIMEOUT; + /* + * Possibly start or stop the idle timer. We only start the idle timer when + * we have more than 2 * nfsd_thread_max connections. If the idle timer is + * on then we may need to turn it off based on the nvsrv_sock_idle_timeout or + * the number of connections. + */ + if ((nfsrv_sock_tcp_cnt > 2 * nfsd_thread_max) || nfsrv_idlesock_timer_on) { + if (nfsrv_sock_idle_timeout == 0 || nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { + if (nfsrv_idlesock_timer_on) { + thread_call_cancel(nfsrv_idlesock_timer_call); + nfsrv_idlesock_timer_on = 0; + } + } else { + struct nfsrv_sock *old_slp; + struct timeval now; + time_t time_to_wait = nfsrv_sock_idle_timeout; + /* + * Get the oldest tcp socket and calculate the + * earliest time for the next idle timer to fire + * based on the possibly updated nfsrv_sock_idle_timeout + */ + TAILQ_FOREACH(old_slp, &nfsrv_socklist, ns_chain) { + if (old_slp->ns_sotype == SOCK_STREAM) { + microuptime(&now); + time_to_wait -= now.tv_sec - old_slp->ns_timestamp; + if (time_to_wait < 1) + time_to_wait = 1; + break; + } + } + /* + * If we have a timer scheduled, but if its going to fire too late, + * turn it off. + */ + if (nfsrv_idlesock_timer_on > now.tv_sec + time_to_wait) { + thread_call_cancel(nfsrv_idlesock_timer_call); + nfsrv_idlesock_timer_on = 0; + } + /* Schedule the idle thread if it isn't already */ + if (!nfsrv_idlesock_timer_on) { + nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); + nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; + } + } + } + } sock_retain(so); /* grab a retain count on the socket */ slp->ns_so = so; slp->ns_sotype = sotype; slp->ns_nam = mynam; - /* set up the socket upcall */ - socket_lock(so, 1); - so->so_upcallarg = (caddr_t)slp; - so->so_upcall = nfsrv_rcv; - so->so_rcv.sb_flags |= SB_UPCALL; - socket_unlock(so, 1); - /* just playin' it safe */ - sock_setsockopt(so, SOL_SOCKET, SO_UPCALLCLOSEWAIT, &on, sizeof(on)); + /* set up the socket up-call */ + nfsrv_uc_addsock(slp, first); /* mark that the socket is not in the nfsrv_sockwg list */ slp->ns_wgq.tqe_next = SLPNOLIST; - + slp->ns_flag = SLP_VALID | SLP_NEEDQ; nfsrv_wakenfsd(slp); @@ -848,6 +1036,7 @@ nfssvc_nfsd(void) u_quad_t cur_usec; struct timeval now; struct vfs_context context; + struct timespec to; #ifndef nolint cacherep = RC_DOIT; @@ -861,11 +1050,16 @@ nfssvc_nfsd(void) lck_mtx_lock(nfsd_mutex); if (nfsd_thread_count++ == 0) nfsrv_initcache(); /* Init the server request cache */ + TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain); lck_mtx_unlock(nfsd_mutex); context.vc_thread = current_thread(); + /* Set time out so that nfsd threads can wake up a see if they are still needed. */ + to.tv_sec = 5; + to.tv_nsec = 0; + /* * Loop getting rpc requests until SIGKILL. */ @@ -893,12 +1087,14 @@ nfssvc_nfsd(void) } nfsd->nfsd_flag |= NFSD_WAITING; TAILQ_INSERT_HEAD(&nfsd_queue, nfsd, nfsd_queue); - error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", NULL); + error = msleep(nfsd, nfsd_mutex, PSOCK | PCATCH, "nfsd", &to); if (error) { if (nfsd->nfsd_flag & NFSD_WAITING) { TAILQ_REMOVE(&nfsd_queue, nfsd, nfsd_queue); nfsd->nfsd_flag &= ~NFSD_WAITING; } + if (error == EWOULDBLOCK) + continue; goto done; } } @@ -932,6 +1128,11 @@ nfssvc_nfsd(void) if (!nfsd->nfsd_slp && slp) { /* we found a socket to work on, grab a reference */ slp->ns_sref++; + microuptime(&now); + slp->ns_timestamp = now.tv_sec; + /* We keep the socket list in least recently used order for reaping idle sockets */ + TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); + TAILQ_INSERT_TAIL(&nfsrv_socklist, slp, ns_chain); nfsd->nfsd_slp = slp; opcnt = 0; /* and put it at the back of the work queue */ @@ -978,6 +1179,8 @@ nfssvc_nfsd(void) mbuf_freem(nd->nd_nam2); if (IS_VALID_CRED(nd->nd_cr)) kauth_cred_unref(&nd->nd_cr); + if (nd->nd_gss_context) + nfs_gss_svc_ctx_deref(nd->nd_gss_context); FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); nd = NULL; } @@ -1000,21 +1203,17 @@ nfssvc_nfsd(void) if (nfsrv_require_resv_port) { /* Check if source port is a reserved port */ - u_short port; - struct sockaddr *nam = mbuf_data(nd->nd_nam); - struct sockaddr_in *sin; - - sin = (struct sockaddr_in *)nam; - port = ntohs(sin->sin_port); - if (port >= IPPORT_RESERVED && - nd->nd_procnum != NFSPROC_NULL) { - char strbuf[MAX_IPv4_STR_LEN]; + in_port_t port = 0; + struct sockaddr *saddr = mbuf_data(nd->nd_nam); + + if (saddr->sa_family == AF_INET) + port = ntohs(((struct sockaddr_in*)saddr)->sin_port); + else if (saddr->sa_family == AF_INET6) + port = ntohs(((struct sockaddr_in6*)saddr)->sin6_port); + if ((port >= IPPORT_RESERVED) && (nd->nd_procnum != NFSPROC_NULL)) { nd->nd_procnum = NFSPROC_NOOP; nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK); cacherep = RC_DOIT; - printf("NFS request from unprivileged port (%s:%d)\n", - inet_ntop(AF_INET, &sin->sin_addr, strbuf, sizeof(strbuf)), - port); } } @@ -1055,7 +1254,7 @@ nfssvc_nfsd(void) } if (error) { - OSAddAtomic(1, &nfsstats.srv_errs); + OSAddAtomic64(1, &nfsstats.srv_errs); nfsrv_updatecache(nd, FALSE, mrep); if (nd->nd_nam2) { mbuf_freem(nd->nd_nam2); @@ -1063,7 +1262,7 @@ nfssvc_nfsd(void) } break; } - OSAddAtomic(1, &nfsstats.srvrpccnt[nd->nd_procnum]); + OSAddAtomic64(1, &nfsstats.srvrpccnt[nd->nd_procnum]); nfsrv_updatecache(nd, TRUE, mrep); /* FALLTHRU */ @@ -1130,6 +1329,8 @@ nfssvc_nfsd(void) nfsm_chain_cleanup(&nd->nd_nmreq); if (IS_VALID_CRED(nd->nd_cr)) kauth_cred_unref(&nd->nd_cr); + if (nd->nd_gss_context) + nfs_gss_svc_ctx_deref(nd->nd_gss_context); FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); nfsrv_slpderef(slp); lck_mtx_lock(nfsd_mutex); @@ -1148,6 +1349,8 @@ nfssvc_nfsd(void) mbuf_freem(nd->nd_nam2); if (IS_VALID_CRED(nd->nd_cr)) kauth_cred_unref(&nd->nd_cr); + if (nd->nd_gss_context) + nfs_gss_svc_ctx_deref(nd->nd_gss_context); FREE_ZONE(nd, sizeof(*nd), M_NFSRVDESC); nd = NULL; } @@ -1249,16 +1452,13 @@ nfsrv_zapsock(struct nfsrv_sock *slp) if (so == NULL) return; + sock_setupcall(so, NULL, NULL); + sock_shutdown(so, SHUT_RDWR); + /* - * Attempt to deter future upcalls, but leave the - * upcall info in place to avoid a race with the - * networking code. + * Remove from the up-call queue */ - socket_lock(so, 1); - so->so_rcv.sb_flags &= ~SB_UPCALL; - socket_unlock(so, 1); - - sock_shutdown(so, SHUT_RDWR); + nfsrv_uc_dequeue(slp); } /* @@ -1294,6 +1494,8 @@ nfsrv_slpfree(struct nfsrv_sock *slp) mbuf_freem(nwp->nd_nam2); if (IS_VALID_CRED(nwp->nd_cr)) kauth_cred_unref(&nwp->nd_cr); + if (nwp->nd_gss_context) + nfs_gss_svc_ctx_deref(nwp->nd_gss_context); FREE_ZONE(nwp, sizeof(*nwp), M_NFSRVDESC); } LIST_INIT(&slp->ns_tq); @@ -1307,12 +1509,9 @@ nfsrv_slpfree(struct nfsrv_sock *slp) * Derefence a server socket structure. If it has no more references and * is no longer valid, you can throw it away. */ -void -nfsrv_slpderef(struct nfsrv_sock *slp) +static void +nfsrv_slpderef_locked(struct nfsrv_sock *slp) { - struct timeval now; - - lck_mtx_lock(nfsd_mutex); lck_rw_lock_exclusive(&slp->ns_rwlock); slp->ns_sref--; @@ -1326,7 +1525,6 @@ nfsrv_slpderef(struct nfsrv_sock *slp) slp->ns_flag &= ~SLP_QUEUED; } lck_rw_done(&slp->ns_rwlock); - lck_mtx_unlock(nfsd_mutex); return; } @@ -1339,66 +1537,88 @@ nfsrv_slpderef(struct nfsrv_sock *slp) TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); slp->ns_flag &= ~SLP_QUEUED; } + lck_rw_done(&slp->ns_rwlock); - /* - * Queue the socket up for deletion - * and start the timer to delete it - * after it has been in limbo for - * a while. - */ - microuptime(&now); - slp->ns_timestamp = now.tv_sec; TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); - TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); - if (!nfsrv_deadsock_timer_on) { - nfsrv_deadsock_timer_on = 1; - nfs_interval_timer_start(nfsrv_deadsock_timer_call, - NFSRV_DEADSOCKDELAY * 1000); - } + if (slp->ns_sotype == SOCK_STREAM) + nfsrv_sock_tcp_cnt--; - lck_rw_done(&slp->ns_rwlock); /* now remove from the write gather socket list */ if (slp->ns_wgq.tqe_next != SLPNOLIST) { TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); slp->ns_wgq.tqe_next = SLPNOLIST; } + nfsrv_slpfree(slp); +} + +void +nfsrv_slpderef(struct nfsrv_sock *slp) +{ + lck_mtx_lock(nfsd_mutex); + nfsrv_slpderef_locked(slp); lck_mtx_unlock(nfsd_mutex); } /* - * Check periodically for dead sockets pending delete. - * If a socket has been dead for more than NFSRV_DEADSOCKDELAY - * seconds then we assume it's safe to free. + * Check periodically for idle sockest if needed and + * zap them. */ void -nfsrv_deadsock_timer(__unused void *param0, __unused void *param1) +nfsrv_idlesock_timer(__unused void *param0, __unused void *param1) { - struct nfsrv_sock *slp; + struct nfsrv_sock *slp, *tslp; struct timeval now; - time_t time_to_wait; + time_t time_to_wait = nfsrv_sock_idle_timeout; microuptime(&now); lck_mtx_lock(nfsd_mutex); - while ((slp = TAILQ_FIRST(&nfsrv_deadsocklist))) { - if ((slp->ns_timestamp + NFSRV_DEADSOCKDELAY) > now.tv_sec) - break; - TAILQ_REMOVE(&nfsrv_deadsocklist, slp, ns_chain); - nfsrv_slpfree(slp); - } - if (TAILQ_EMPTY(&nfsrv_deadsocklist)) { - nfsrv_deadsock_timer_on = 0; + /* Turn off the timer if we're suppose to and get out */ + if (nfsrv_sock_idle_timeout < NFSD_MIN_IDLE_TIMEOUT) + nfsrv_sock_idle_timeout = 0; + if ((nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) || (nfsrv_sock_idle_timeout == 0)) { + nfsrv_idlesock_timer_on = 0; lck_mtx_unlock(nfsd_mutex); return; } - time_to_wait = (slp->ns_timestamp + NFSRV_DEADSOCKDELAY) - now.tv_sec; - if (time_to_wait < 1) - time_to_wait = 1; - lck_mtx_unlock(nfsd_mutex); + TAILQ_FOREACH_SAFE(slp, &nfsrv_socklist, ns_chain, tslp) { + lck_rw_lock_exclusive(&slp->ns_rwlock); + /* Skip udp and referenced sockets */ + if (slp->ns_sotype == SOCK_DGRAM || slp->ns_sref) { + lck_rw_done(&slp->ns_rwlock); + continue; + } + /* + * If this is the first non-referenced socket that hasn't idle out, + * use its time stamp to calculate the earlist time in the future + * to start the next invocation of the timer. Since the nfsrv_socklist + * is sorted oldest access to newest. Once we find the first one, + * we're done and break out of the loop. + */ + if (((slp->ns_timestamp + nfsrv_sock_idle_timeout) > now.tv_sec) || + nfsrv_sock_tcp_cnt <= 2 * nfsd_thread_max) { + time_to_wait -= now.tv_sec - slp->ns_timestamp; + if (time_to_wait < 1) + time_to_wait = 1; + lck_rw_done(&slp->ns_rwlock); + break; + } + /* + * Bump the ref count. nfsrv_slpderef below will destroy + * the socket, since nfsrv_zapsock has closed it. + */ + slp->ns_sref++; + nfsrv_zapsock(slp); + lck_rw_done(&slp->ns_rwlock); + nfsrv_slpderef_locked(slp); + } - nfs_interval_timer_start(nfsrv_deadsock_timer_call, - time_to_wait * 1000); + /* Start ourself back up */ + nfs_interval_timer_start(nfsrv_idlesock_timer_call, time_to_wait * 1000); + /* Remember when the next timer will fire for nfssvc_addsock. */ + nfsrv_idlesock_timer_on = now.tv_sec + time_to_wait; + lck_mtx_unlock(nfsd_mutex); } /* @@ -1417,33 +1637,14 @@ nfsrv_cleanup(void) microuptime(&now); for (slp = TAILQ_FIRST(&nfsrv_socklist); slp != 0; slp = nslp) { nslp = TAILQ_NEXT(slp, ns_chain); - if (slp->ns_flag & SLP_VALID) { - lck_rw_lock_exclusive(&slp->ns_rwlock); + lck_rw_lock_exclusive(&slp->ns_rwlock); + slp->ns_sref++; + if (slp->ns_flag & SLP_VALID) nfsrv_zapsock(slp); - lck_rw_done(&slp->ns_rwlock); - } - if (slp->ns_flag & SLP_QUEUED) { - if (slp->ns_flag & SLP_WAITQ) - TAILQ_REMOVE(&nfsrv_sockwait, slp, ns_svcq); - else - TAILQ_REMOVE(&nfsrv_sockwork, slp, ns_svcq); - slp->ns_flag &= ~SLP_QUEUED; - } - if (slp->ns_wgq.tqe_next != SLPNOLIST) { - TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); - slp->ns_wgq.tqe_next = SLPNOLIST; - } - /* queue the socket up for deletion */ - slp->ns_timestamp = now.tv_sec; - TAILQ_REMOVE(&nfsrv_socklist, slp, ns_chain); - TAILQ_INSERT_TAIL(&nfsrv_deadsocklist, slp, ns_chain); - if (!nfsrv_deadsock_timer_on) { - nfsrv_deadsock_timer_on = 1; - nfs_interval_timer_start(nfsrv_deadsock_timer_call, - NFSRV_DEADSOCKDELAY * 1000); - } + lck_rw_done(&slp->ns_rwlock); + nfsrv_slpderef_locked(slp); } - +# #if CONFIG_FSE /* * Flush pending file write fsevents @@ -1455,10 +1656,12 @@ nfsrv_cleanup(void) * Fire off the content modified fsevent for each * entry, remove it from the list, and free it. */ - if (nfsrv_fsevents_enabled) + if (nfsrv_fsevents_enabled) { + fp->fm_context.vc_thread = current_thread(); add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, FSE_ARG_VNODE, fp->fm_vp, FSE_ARG_DONE); + } vnode_put(fp->fm_vp); kauth_cred_unref(&fp->fm_context.vc_ucred); nfp = LIST_NEXT(fp, fm_link); @@ -1470,11 +1673,14 @@ nfsrv_cleanup(void) lck_mtx_unlock(nfsrv_fmod_mutex); #endif + nfsrv_uc_cleanup(); /* Stop nfs socket up-call threads */ + nfs_gss_svc_cleanup(); /* Remove any RPCSEC_GSS contexts */ nfsrv_cleancache(); /* And clear out server cache */ nfsrv_udpsock = NULL; + nfsrv_udp6sock = NULL; } #endif /* NFS_NOSERVER */