X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/e5568f75972dfc723778653c11cb6b4dc825716a..15129b1c8dbb3650c63b70adb1cad9af601c6c17:/bsd/nfs/nfs_serv.c diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index 271a85b6f..8cc717b8e 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ /* @@ -59,1321 +65,1716 @@ * FreeBSD-Id: nfs_serv.c,v 1.52 1997/10/28 15:59:05 bde Exp $ */ -/* - * nfs version 2 and 3 server calls to vnode ops - * - these routines generally have 3 phases - * 1 - break down and validate rpc request in mbuf list - * 2 - do the vnode ops for the request - * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) - * 3 - build the rpc reply in an mbuf list - * nb: - * - do not mix the phases, since the nfsm_?? macros can return failures - * on a bad rpc or similar and do not do any vrele() or vput()'s - * - * - the nfsm_reply() macro generates an nfs rpc reply with the nfs - * error number iff error != 0 whereas - * returning an error from the server function implies a fatal error - * such as a badly constructed rpc request that should be dropped without - * a reply. - * For Version 3, nfsm_reply() does not return for the error case, since - * most version 3 rpcs return more than the status for error cases. - */ - #include #include #include -#include +#include #include #include #include -#include +#include #include #include -#include +#include #include #include #include -#include #include +#include +#include +#include +#include +#include #include #include -#include + +#include #include #include #include #include #include -#include - -nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, - NFFIFO, NFNON }; -#ifndef NFS_NOSERVER -nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, - NFCHR, NFNON }; -/* Global vars */ -extern u_long nfs_xdrneg1; -extern u_long nfs_false, nfs_true; -extern enum vtype nv3tov_type[8]; -extern struct nfsstats nfsstats; - -int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000; -int nfsrvw_procrastinate_v3 = 0; - -int nfs_async = 0; -#ifdef notyet -/* XXX CSM 11/25/97 Upgrade sysctl.h someday */ -SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); +#include +#include + +#if NFSSERVER + +/* + * NFS server globals + */ + +int nfsd_thread_count = 0; +int nfsd_thread_max = 0; +lck_grp_t *nfsd_lck_grp; +lck_mtx_t *nfsd_mutex; +struct nfsd_head nfsd_head, nfsd_queue; + +lck_grp_t *nfsrv_slp_rwlock_group; +lck_grp_t *nfsrv_slp_mutex_group; +struct nfsrv_sockhead nfsrv_socklist, nfsrv_deadsocklist, nfsrv_sockwg, + nfsrv_sockwait, nfsrv_sockwork; +struct nfsrv_sock *nfsrv_udpsock = NULL; +struct nfsrv_sock *nfsrv_udp6sock = NULL; + +/* NFS exports */ +struct nfsrv_expfs_list nfsrv_exports; +struct nfsrv_export_hashhead *nfsrv_export_hashtbl = NULL; +int nfsrv_export_hash_size = NFSRVEXPHASHSZ; +u_long nfsrv_export_hash; +lck_grp_t *nfsrv_export_rwlock_group; +lck_rw_t nfsrv_export_rwlock; + +#if CONFIG_FSE +/* NFS server file modification event generator */ +struct nfsrv_fmod_hashhead *nfsrv_fmod_hashtbl; +u_long nfsrv_fmod_hash; +lck_grp_t *nfsrv_fmod_grp; +lck_mtx_t *nfsrv_fmod_mutex; +static int nfsrv_fmod_timer_on = 0; +int nfsrv_fsevents_enabled = 1; +#endif + +/* NFS server timers */ +#if CONFIG_FSE +thread_call_t nfsrv_fmod_timer_call; #endif +thread_call_t nfsrv_deadsock_timer_call; +thread_call_t nfsrv_wg_timer_call; +int nfsrv_wg_timer_on; -static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, - struct proc *, int)); -static void nfsrvw_coalesce __P((struct nfsrv_descript *, - struct nfsrv_descript *)); +/* globals for the active user list */ +uint32_t nfsrv_user_stat_enabled = 1; +uint32_t nfsrv_user_stat_node_count = 0; +uint32_t nfsrv_user_stat_max_idle_sec = NFSRV_USER_STAT_DEF_IDLE_SEC; +uint32_t nfsrv_user_stat_max_nodes = NFSRV_USER_STAT_DEF_MAX_NODES; +lck_grp_t *nfsrv_active_user_mutex_group; + +int nfsrv_wg_delay = NFSRV_WGATHERDELAY * 1000; +int nfsrv_wg_delay_v3 = 0; + +int nfsrv_async = 0; + +int nfsrv_authorize(vnode_t,vnode_t,kauth_action_t,vfs_context_t,struct nfs_export_options*,int); +int nfsrv_wg_coalesce(struct nfsrv_descript *, struct nfsrv_descript *); +void nfsrv_modified(vnode_t, vfs_context_t); + +extern void IOSleep(int); +extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path); /* - * nfs v3 access service + * Initialize the data structures for the server. */ + +#define NFSRV_NOT_INITIALIZED 0 +#define NFSRV_INITIALIZING 1 +#define NFSRV_INITIALIZED 2 +static volatile UInt32 nfsrv_initted = NFSRV_NOT_INITIALIZED; + int -nfsrv3_access(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_is_initialized(void) +{ + return (nfsrv_initted == NFSRV_INITIALIZED); +} + +void +nfsrv_init(void) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, getret; - char *cp2; - struct mbuf *mb, *mreq, *mb2; - struct vattr vattr, *vap = &vattr; - u_long testmode, nfsmode; - u_quad_t frev; - -#ifndef nolint - cache = 0; + /* make sure we init only once */ + if (!OSCompareAndSwap(NFSRV_NOT_INITIALIZED, NFSRV_INITIALIZING, &nfsrv_initted)) { + /* wait until initialization is complete */ + while (!nfsrv_is_initialized()) + IOSleep(500); + return; + } + + if (sizeof (struct nfsrv_sock) > NFS_SVCALLOC) + printf("struct nfsrv_sock bloated (> %dbytes)\n",NFS_SVCALLOC); + + /* init nfsd mutex */ + nfsd_lck_grp = lck_grp_alloc_init("nfsd", LCK_GRP_ATTR_NULL); + nfsd_mutex = lck_mtx_alloc_init(nfsd_lck_grp, LCK_ATTR_NULL); + + /* init slp rwlock */ + nfsrv_slp_rwlock_group = lck_grp_alloc_init("nfsrv-slp-rwlock", LCK_GRP_ATTR_NULL); + nfsrv_slp_mutex_group = lck_grp_alloc_init("nfsrv-slp-mutex", LCK_GRP_ATTR_NULL); + + /* init export data structures */ + LIST_INIT(&nfsrv_exports); + nfsrv_export_rwlock_group = lck_grp_alloc_init("nfsrv-export-rwlock", LCK_GRP_ATTR_NULL); + lck_rw_init(&nfsrv_export_rwlock, nfsrv_export_rwlock_group, LCK_ATTR_NULL); + + /* init active user list mutex structures */ + nfsrv_active_user_mutex_group = lck_grp_alloc_init("nfs-active-user-mutex", LCK_GRP_ATTR_NULL); + + /* init nfs server request cache mutex */ + nfsrv_reqcache_lck_grp = lck_grp_alloc_init("nfsrv_reqcache", LCK_GRP_ATTR_NULL); + nfsrv_reqcache_mutex = lck_mtx_alloc_init(nfsrv_reqcache_lck_grp, LCK_ATTR_NULL); + +#if CONFIG_FSE + /* init NFS server file modified event generation */ + nfsrv_fmod_hashtbl = hashinit(NFSRVFMODHASHSZ, M_TEMP, &nfsrv_fmod_hash); + nfsrv_fmod_grp = lck_grp_alloc_init("nfsrv_fmod", LCK_GRP_ATTR_NULL); + nfsrv_fmod_mutex = lck_mtx_alloc_init(nfsrv_fmod_grp, LCK_ATTR_NULL); #endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(1, (struct vattr *)0); - return (0); + + /* initialize NFS server timer callouts */ +#if CONFIG_FSE + nfsrv_fmod_timer_call = thread_call_allocate(nfsrv_fmod_timer, NULL); +#endif + nfsrv_deadsock_timer_call = thread_call_allocate(nfsrv_deadsock_timer, NULL); + nfsrv_wg_timer_call = thread_call_allocate(nfsrv_wg_timer, NULL); + + /* Init server data structures */ + TAILQ_INIT(&nfsrv_socklist); + TAILQ_INIT(&nfsrv_sockwait); + TAILQ_INIT(&nfsrv_sockwork); + TAILQ_INIT(&nfsrv_deadsocklist); + TAILQ_INIT(&nfsrv_sockwg); + TAILQ_INIT(&nfsd_head); + TAILQ_INIT(&nfsd_queue); + nfsrv_udpsock = NULL; + nfsrv_udp6sock = NULL; + + /* Setup the up-call handling */ + nfsrv_uc_init(); + + /* initialization complete */ + nfsrv_initted = NFSRV_INITIALIZED; +} + + +/* + * + * NFS version 2 and 3 server request processing functions + * + * These functions take the following parameters: + * + * struct nfsrv_descript *nd - the NFS request descriptor + * struct nfsrv_sock *slp - the NFS socket the request came in on + * vfs_context_t ctx - VFS context + * mbuf_t *mrepp - pointer to hold the reply mbuf list + * + * These routines generally have 3 phases: + * + * 1 - break down and validate the RPC request in the mbuf chain + * provided in nd->nd_nmreq. + * 2 - perform the vnode operations for the request + * (many are very similar to syscalls in vfs_syscalls.c and + * should therefore be kept in sync with those implementations) + * 3 - build the RPC reply in an mbuf chain (nmrep) and return the mbuf chain + * + */ + +/* + * nfs v3 access service + */ +int +nfsrv_access( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) +{ + struct nfsm_chain *nmreq, nmrep; + vnode_t vp; + int error, attrerr; + struct vnode_attr vattr; + struct nfs_filehandle nfh; + u_int32_t nfsmode; + kauth_action_t testaction; + struct nfs_export *nx; + struct nfs_export_options *nxo; + + error = 0; + attrerr = ENOENT; + nfsmode = 0; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + *mrepp = NULL; + vp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, nfsmode); + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + /* + * Each NFS mode bit is tested separately. + * + * XXX this code is nominally correct, but returns a pessimistic + * rather than optimistic result. It will be necessary to add + * an NFS-specific interface to the vnode_authorize code to + * obtain good performance in the optimistic mode. + */ + if (nfsmode & NFS_ACCESS_READ) { + testaction = vnode_isdir(vp) ? KAUTH_VNODE_LIST_DIRECTORY : KAUTH_VNODE_READ_DATA; + if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0)) + nfsmode &= ~NFS_ACCESS_READ; + } + if ((nfsmode & NFS_ACCESS_LOOKUP) && + (!vnode_isdir(vp) || + nfsrv_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx, nxo, 0))) + nfsmode &= ~NFS_ACCESS_LOOKUP; + if (nfsmode & NFS_ACCESS_MODIFY) { + if (vnode_isdir(vp)) { + testaction = + KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY | + KAUTH_VNODE_DELETE_CHILD; + } else { + testaction = + KAUTH_VNODE_WRITE_DATA; + } + if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0)) + nfsmode &= ~NFS_ACCESS_MODIFY; + } + if (nfsmode & NFS_ACCESS_EXTEND) { + if (vnode_isdir(vp)) { + testaction = + KAUTH_VNODE_ADD_FILE | + KAUTH_VNODE_ADD_SUBDIRECTORY; + } else { + testaction = + KAUTH_VNODE_WRITE_DATA | + KAUTH_VNODE_APPEND_DATA; + } + if (nfsrv_authorize(vp, NULL, testaction, ctx, nxo, 0)) + nfsmode &= ~NFS_ACCESS_EXTEND; } - nfsmode = fxdr_unsigned(u_long, *tl); - if ((nfsmode & NFSV3ACCESS_READ) && - nfsrv_access(vp, VREAD, cred, rdonly, procp, 0)) - nfsmode &= ~NFSV3ACCESS_READ; - if (vp->v_type == VDIR) - testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | - NFSV3ACCESS_DELETE); - else - testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); - if ((nfsmode & testmode) && - nfsrv_access(vp, VWRITE, cred, rdonly, procp, 0)) - nfsmode &= ~testmode; - if (vp->v_type == VDIR) - testmode = NFSV3ACCESS_LOOKUP; - else - testmode = NFSV3ACCESS_EXECUTE; - if ((nfsmode & testmode) && - nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0)) - nfsmode &= ~testmode; - getret = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); - nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, vap); - nfsm_build(tl, u_long *, NFSX_UNSIGNED); - *tl = txdr_unsigned(nfsmode); - nfsm_srvdone; + + /* + * Note concerning NFS_ACCESS_DELETE: + * For hard links, the answer may be wrong if the vnode + * has multiple parents with different permissions. + * Also, some clients (e.g. MacOSX 10.3) may incorrectly + * interpret the missing/cleared DELETE bit. + * So we'll just leave the DELETE bit alone. At worst, + * we're telling the client it might be able to do + * something it really can't. + */ + + if ((nfsmode & NFS_ACCESS_EXECUTE) && + (vnode_isdir(vp) || + nfsrv_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx, nxo, 0))) + nfsmode &= ~NFS_ACCESS_EXECUTE; + + /* get postop attributes */ + nfsm_srv_vattr_init(&vattr, NFS_VER3); + attrerr = vnode_getattr(vp, &vattr, ctx); + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(NFS_VER3) + NFSX_UNSIGNED); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &vattr); + if (!nd->nd_repstat) + nfsm_chain_add_32(error, &nmrep, nfsmode); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (vp) + vnode_put(vp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* * nfs getattr service */ int -nfsrv_getattr(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_getattr( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; - struct vattr va; - register struct vattr *vap = &va; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - u_quad_t frev; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(0); - return (0); + struct nfsm_chain *nmreq, nmrep; + struct vnode_attr vattr; + vnode_t vp; + int error; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + + error = 0; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + *mrepp = NULL; + vp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + nfsm_srv_vattr_init(&vattr, nd->nd_vers); + error = vnode_getattr(vp, &vattr, ctx); + vnode_put(vp); + vp = NULL; + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_FATTR(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_if(nd->nd_repstat); + error = nfsm_chain_add_fattr(nd, &nmrep, &vattr); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (vp) + vnode_put(vp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nqsrv_getl(vp, ND_READ); - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); - nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); - if (error) - return (0); - nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); - nfsm_srvfillattr(vap, fp); - nfsm_srvdone; + return (error); } /* * nfs setattr service */ int -nfsrv_setattr(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_setattr( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, preat; - register struct vattr *vap = &va; - register struct nfsv2_sattr *sp; - register struct nfs_fattr *fp; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - u_quad_t frev; - struct timespec guard; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - VATTR_NULL(vap); - if (v3) { - nfsm_srvsattr(vap); - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - gcheck = fxdr_unsigned(int, *tl); - if (gcheck) { - nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); - fxdr_nfsv3time(tl, &guard); - } - } else { - nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - /* - * Nah nah nah nah na nah - * There is a bug in the Sun client that puts 0xffff in the mode - * field of sattr when it should put in 0xffffffff. The u_short - * doesn't sign extend. - * --> check the low order 2 bytes for 0xffff - */ - if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) - vap->va_mode = nfstov_mode(sp->sa_mode); - if (sp->sa_uid != nfs_xdrneg1) - vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); - if (sp->sa_gid != nfs_xdrneg1) - vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); - if (sp->sa_size != nfs_xdrneg1) - vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); - if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { -#ifdef notyet - fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); -#else - vap->va_atime.tv_sec = - fxdr_unsigned(long, sp->sa_atime.nfsv2_sec); - vap->va_atime.tv_nsec = 0; -#endif - } - if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) - fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); + struct nfsm_chain *nmreq, nmrep; + struct vnode_attr preattr, postattr; + struct vnode_attr vattr, *vap = &vattr; + vnode_t vp; + struct nfs_export *nx; + struct nfs_export_options *nxo; + int error, preattrerr, postattrerr, gcheck; + struct nfs_filehandle nfh; + struct timespec guard = { 0, 0 }; + kauth_action_t action; + uid_t saved_uid; - } + error = 0; + preattrerr = postattrerr = ENOENT; + gcheck = 0; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + *mrepp = NULL; + vp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + + VATTR_INIT(vap); + error = nfsm_chain_get_sattr(nd, nmreq, vap); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_get_32(error, nmreq, gcheck); + if (gcheck) + nfsm_chain_get_time(error, nmreq, nd->nd_vers, guard.tv_sec, guard.tv_nsec); + } + nfsmerr_if(error); + + /* + * Save the original credential UID in case they are + * mapped and we need to map the IDs in the attributes. + */ + saved_uid = kauth_cred_getuid(nd->nd_cr); /* * Now that we have all the fields, lets do it. */ - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); - return (0); - } - nqsrv_getl(vp, ND_WRITE); - if (v3) { - error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); - if (!error && gcheck && - (preat.va_ctime.tv_sec != guard.tv_sec || - preat.va_ctime.tv_nsec != guard.tv_nsec)) + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&preattr); + error = preattrerr = vnode_getattr(vp, &preattr, ctx); + if (!error && gcheck && VATTR_IS_SUPPORTED(&preattr, va_change_time) && + (preattr.va_change_time.tv_sec != guard.tv_sec || + preattr.va_change_time.tv_nsec != guard.tv_nsec)) error = NFSERR_NOT_SYNC; - if (error) { - vput(vp); - nfsm_reply(NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); - return (0); - } + if (!preattrerr && !VATTR_ALL_SUPPORTED(&preattr)) + preattrerr = ENOENT; + nfsmerr_if(error); } /* - * If the size is being changed write acces is required, otherwise - * just check for a read only file system. + * If the credentials were mapped, we should + * map the same values in the attributes. */ - if (vap->va_size == ((u_quad_t)((quad_t) -1))) { - if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { - error = EROFS; - goto out; - } - } else { - if (vp->v_type == VDIR) { - error = EISDIR; - goto out; - } else if ((error = nfsrv_access(vp, VWRITE, cred, rdonly, - procp, 0))) - goto out; + if ((vap->va_uid == saved_uid) && (kauth_cred_getuid(nd->nd_cr) != saved_uid)) { + int ismember; + VATTR_SET(vap, va_uid, kauth_cred_getuid(nd->nd_cr)); + if (kauth_cred_ismember_gid(nd->nd_cr, vap->va_gid, &ismember) || !ismember) + VATTR_SET(vap, va_gid, kauth_cred_getgid(nd->nd_cr)); } - error = VOP_SETATTR(vp, vap, cred, procp); - postat_ret = VOP_GETATTR(vp, vap, cred, procp); + + /* Authorize the attribute changes. */ + error = vnode_authattr(vp, vap, &action, ctx); if (!error) - error = postat_ret; -out: - vput(vp); - nfsm_reply(NFSX_WCCORFATTR(v3)); - if (v3) { - nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); - return (0); - } else { - nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + error = nfsrv_authorize(vp, NULL, action, ctx, nxo, 0); + + /* set the new attributes */ + if (!error) + error = vnode_setattr(vp, vap, ctx); + + if (!error || (nd->nd_vers == NFS_VER3)) { + nfsm_srv_vattr_init(&postattr, nd->nd_vers); + postattrerr = vnode_getattr(vp, &postattr, ctx); + if (!error) + error = postattrerr; + } + +nfsmerr: + if (vp) + vnode_put(vp); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCORFATTR(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_wcc_data(error, nd, &nmrep, + preattrerr, &preattr, postattrerr, &postattr); + else + error = nfsm_chain_add_fattr(nd, &nmrep, &postattr); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nfsm_srvdone; + return (error); } /* * nfs lookup rpc */ int -nfsrv_lookup(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_lookup( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; - struct nameidata nd, *ndp = &nd; -#ifdef notdef - struct nameidata ind; + struct nameidata ni; + vnode_t vp, dirp = NULL; + struct nfs_filehandle dnfh, nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + int error, attrerr, dirattrerr, isdotdot; + uint32_t len = 0; + uid_t saved_uid; + struct vnode_attr va, dirattr, *vap = &va; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = dirattrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + saved_uid = kauth_cred_getuid(nd->nd_cr); + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, dnfh.nfh_fhp, dnfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = LOOKUP; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LOOKUP; #endif - struct vnode *vp, *dirp; - nfsfh_t nfh; - fhandle_t *fhp; - register caddr_t cp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, cache, len, dirattr_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vattr va, dirattr, *vap = &va; - u_quad_t frev; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - - pubflag = nfs_ispublicfh(fhp); - - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), pubflag); - -/* XXX CSM 12/4/97 Revisit when enabling WebNFS */ -#ifdef notyet - if (!error && pubflag) { - if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) { - /* - * Setup call to lookup() to see if we can find - * the index file. Arguably, this doesn't belong - * in a kernel.. Ugh. - */ - ind = nd; - VOP_UNLOCK(nd.ni_vp, 0, procp); - ind.ni_pathlen = strlen(nfs_pub.np_index); - ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf = - nfs_pub.np_index; - ind.ni_startdir = nd.ni_vp; - VREF(ind.ni_startdir); - error = lookup(&ind); - if (!error) { - /* - * Found an index file. Get rid of - * the old references. - */ - if (dirp) - vrele(dirp); - dirp = nd.ni_vp; - vrele(nd.ni_startdir); - ndp = &ind; - } else - error = 0; - } - /* - * If the public filehandle was used, check that this lookup - * didn't result in a filehandle outside the publicly exported - * filesystem. - */ + ni.ni_cnd.cn_flags = LOCKLEAF; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + isdotdot = ((len == 2) && (ni.ni_cnd.cn_pnbuf[0] == '.') && (ni.ni_cnd.cn_pnbuf[1] == '.')); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &dnfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); - if (!error && ndp->ni_vp->v_mount != nfs_pub.np_mount) { - vput(nd.ni_vp); - error = EPERM; + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); } } -#endif if (dirp) { - if (v3) - dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, - procp); - vrele(dirp); + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_vattr_init(&dirattr, NFS_VER3); + dirattrerr = vnode_getattr(dirp, &dirattr, ctx); + } + vnode_put(dirp); } + nfsmerr_if(error); - if (error) { - nfsm_reply(NFSX_POSTOPATTR(v3)); - nfsm_srvpostop_attr(dirattr_ret, &dirattr); - return (0); + nameidone(&ni); + + vp = ni.ni_vp; + error = nfsrv_vptofh(nx, nd->nd_vers, (isdotdot ? &dnfh : NULL), vp, ctx, &nfh); + if (!error) { + nfsm_srv_vattr_init(vap, nd->nd_vers); + attrerr = vnode_getattr(vp, vap, ctx); } + vnode_put(vp); - nqsrv_getl(ndp->ni_startdir, ND_READ); - vrele(ndp->ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - vp = ndp->ni_vp; - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); - nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); - if (error) { - nfsm_srvpostop_attr(dirattr_ret, &dirattr); - return (0); +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) + + NFSX_POSTOPORFATTR(nd->nd_vers) + NFSX_POSTOPATTR(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + if (nd->nd_repstat) { + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_postop_attr(error, nd, &nmrep, dirattrerr, &dirattr); + goto nfsmout; + } + nfsm_chain_add_fh(error, &nmrep, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, vap); + nfsm_chain_add_postop_attr(error, nd, &nmrep, dirattrerr, &dirattr); + } else if (!error) { + error = nfsm_chain_add_fattr(nd, &nmrep, vap); } - nfsm_srvfhtom(fhp, v3); - if (v3) { - nfsm_srvpostop_attr(0, vap); - nfsm_srvpostop_attr(dirattr_ret, &dirattr); - } else { - nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nfsm_srvdone; + return (error); } /* * nfs readlink service */ int -nfsrv_readlink(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_readlink( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; - register struct iovec *ivp = iv; - register struct mbuf *mp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, i, tlen, len, getret; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; - struct vnode *vp; - struct vattr attr; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io, *uiop = &io; - u_quad_t frev; - -#ifndef nolint - mp2 = mp3 = (struct mbuf *)0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - len = 0; - i = 0; - while (len < NFS_MAXPATHLEN) { - MGET(mp, M_WAIT, MT_DATA); - MCLGET(mp, M_WAIT); - mp->m_len = NFSMSIZ(mp); - if (len == 0) - mp3 = mp2 = mp; - else { - mp2->m_next = mp; - mp2 = mp; - } - if ((len+mp->m_len) > NFS_MAXPATHLEN) { - mp->m_len = NFS_MAXPATHLEN-len; - len = NFS_MAXPATHLEN; - } else - len += mp->m_len; - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - i++; - ivp++; - } - uiop->uio_iov = iv; - uiop->uio_iovcnt = i; - uiop->uio_offset = 0; - uiop->uio_resid = len; - uiop->uio_rw = UIO_READ; - uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = (struct proc *)0; - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - m_freem(mp3); - nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvpostop_attr(1, (struct vattr *)0); - return (0); - } - if (vp->v_type != VLNK) { - if (v3) + int error, mpcnt, tlen, len, attrerr; + vnode_t vp; + struct vnode_attr vattr; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct nfsm_chain *nmreq, nmrep; + mbuf_t mpath, mp; + uio_t auio = NULL; + char uio_buf[ UIO_SIZEOF(4) ]; + char *uio_bufp = &uio_buf[0]; + int uio_buflen = UIO_SIZEOF(4); + + error = 0; + attrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + mpath = NULL; + vp = NULL; + len = NFS_MAXPATHLEN; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + + /* get mbuf list to hold symlink path */ + error = nfsm_mbuf_get_list(len, &mpath, &mpcnt); + nfsmerr_if(error); + if (mpcnt > 4) { + uio_buflen = UIO_SIZEOF(mpcnt); + MALLOC(uio_bufp, char*, uio_buflen, M_TEMP, M_WAITOK); + if (!uio_bufp) + error = ENOMEM; + nfsmerr_if(error); + } + auio = uio_createwithbuffer(mpcnt, 0, UIO_SYSSPACE, UIO_READ, uio_bufp, uio_buflen); + if (!auio) + error = ENOMEM; + nfsmerr_if(error); + + for (mp = mpath; mp; mp = mbuf_next(mp)) + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(mp)), mbuf_len(mp)); + + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + if (vnode_vtype(vp) != VLNK) { + if (nd->nd_vers == NFS_VER3) error = EINVAL; else error = ENXIO; - goto out; } - nqsrv_getl(vp, ND_READ); - error = VOP_READLINK(vp, uiop, cred); -out: - getret = VOP_GETATTR(vp, &attr, cred, procp); - vput(vp); - if (error) { - m_freem(mp3); - mp3 = NULL; + + if (!error) + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx, nxo, 0); + if (!error) + error = VNOP_READLINK(vp, auio, ctx); + if (vp) { + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_vattr_init(&vattr, NFS_VER3); + attrerr = vnode_getattr(vp, &vattr, ctx); + } + vnode_put(vp); + vp = NULL; } - nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); - if (v3) { - nfsm_srvpostop_attr(getret, &attr); - if (error) - return (0); + if (error) { + mbuf_freem(mpath); + mpath = NULL; } - if (!error) { - if (uiop->uio_resid > 0) { - len -= uiop->uio_resid; - tlen = nfsm_rndup(len); - nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); - } - nfsm_build(tl, u_long *, NFSX_UNSIGNED); - *tl = txdr_unsigned(len); - mb->m_next = mp3; + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + NFSX_UNSIGNED); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &vattr); + if (error || nd->nd_repstat) { + nfsm_chain_build_done(error, &nmrep); + goto nfsmout; + } + if (auio && (uio_resid(auio) > 0)) { + len -= uio_resid(auio); + tlen = nfsm_rndup(len); + nfsm_adj(mpath, NFS_MAXPATHLEN-tlen, tlen-len); + } + nfsm_chain_add_32(error, &nmrep, len); + nfsm_chain_build_done(error, &nmrep); + nfsmout_if(error); + error = mbuf_setnext(nmrep.nmc_mcur, mpath); + if (!error) + mpath = NULL; +nfsmout: + if (vp) + vnode_put(vp); + if (mpath) + mbuf_freem(mpath); + if (uio_bufp != &uio_buf[0]) + FREE(uio_bufp, M_TEMP); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nfsm_srvdone; + return (error); } /* * nfs read service */ int -nfsrv_read(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_read( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct iovec *iv; - struct iovec *iv2; - register struct mbuf *m; - register struct nfs_fattr *fp; - register u_long *tl; - register long t1; - register int i; - caddr_t bpos; - int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; - int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct mbuf *m2; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io, *uiop = &io; - struct vattr va, *vap = &va; + int error, attrerr, mreadcnt; + uint32_t reqlen, maxlen, count, len, tlen, left; + mbuf_t mread, m; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio = NULL; + char *uio_bufp = NULL; + struct vnode_attr vattr, *vap = &vattr; off_t off; - u_quad_t frev; - int didhold = 0; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if (v3) { - nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); - fxdr_hyper(tl, &off); - } else { - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - off = (off_t)fxdr_unsigned(u_long, *tl); - } - nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvpostop_attr(1, (struct vattr *)0); - return (0); - } - if (vp->v_type != VREG) { - if (v3) + uid_t saved_uid; + char uio_buf[ UIO_SIZEOF(0) ]; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + mread = NULL; + vp = NULL; + len = reqlen = 0; + saved_uid = kauth_cred_getuid(nd->nd_cr); + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_get_64(error, nmreq, off); + else + nfsm_chain_get_32(error, nmreq, off); + nfsm_chain_get_32(error, nmreq, reqlen); + maxlen = NFSRV_NDMAXDATA(nd); + if (reqlen > maxlen) + reqlen = maxlen; + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + if (vnode_vtype(vp) != VREG) { + if (nd->nd_vers == NFS_VER3) error = EINVAL; else - error = (vp->v_type == VDIR) ? EISDIR : EACCES; + error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES; } + if (!error) { - nqsrv_getl(vp, ND_READ); - if ((error = nfsrv_access(vp, VREAD, cred, rdonly, procp, 1))) - error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 1); + if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx, nxo, 1))) + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx, nxo, 1); } - getret = VOP_GETATTR(vp, vap, cred, procp); + nfsm_srv_vattr_init(vap, nd->nd_vers); + attrerr = vnode_getattr(vp, vap, ctx); if (!error) - error = getret; - if (error) { - vput(vp); - nfsm_reply(NFSX_POSTOPATTR(v3)); - nfsm_srvpostop_attr(getret, vap); - return (0); - } - if (off >= vap->va_size) - cnt = 0; - else if ((off + reqlen) > vap->va_size) - cnt = nfsm_rndup(vap->va_size - off); + error = attrerr; + nfsmerr_if(error); + + if ((u_quad_t)off >= vap->va_data_size) + count = 0; + else if (((u_quad_t)off + reqlen) > vap->va_data_size) + count = nfsm_rndup(vap->va_data_size - off); else - cnt = reqlen; - nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); - if (v3) { - nfsm_build(tl, u_long *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); - *tl++ = nfs_true; - fp = (struct nfs_fattr *)tl; - tl += (NFSX_V3FATTR / sizeof (u_long)); - } else { - nfsm_build(tl, u_long *, NFSX_V2FATTR + NFSX_UNSIGNED); - fp = (struct nfs_fattr *)tl; - tl += (NFSX_V2FATTR / sizeof (u_long)); - } - len = left = cnt; - if (cnt > 0) { - /* - * Generate the mbuf list with the uio_iov ref. to it. - */ - i = 0; - m = m2 = mb; - while (left > 0) { - siz = min(M_TRAILINGSPACE(m), left); - if (siz > 0) { - left -= siz; - i++; - } - if (left > 0) { - MGET(m, M_WAIT, MT_DATA); - MCLGET(m, M_WAIT); - m->m_len = 0; - m2->m_next = m; - m2 = m; - } + count = reqlen; + + len = left = count; + if (count > 0) { + /* get mbuf list to hold read data */ + error = nfsm_mbuf_get_list(count, &mread, &mreadcnt); + nfsmerr_if(error); + MALLOC(uio_bufp, char *, UIO_SIZEOF(mreadcnt), M_TEMP, M_WAITOK); + if (uio_bufp) + auio = uio_createwithbuffer(mreadcnt, off, UIO_SYSSPACE, + UIO_READ, uio_bufp, UIO_SIZEOF(mreadcnt)); + if (!uio_bufp || !auio) { + error = ENOMEM; + goto errorexit; } - MALLOC(iv, struct iovec *, i * sizeof (struct iovec), - M_TEMP, M_WAITOK); - uiop->uio_iov = iv2 = iv; - m = mb; - left = cnt; - i = 0; - while (left > 0) { - if (m == NULL) - panic("nfsrv_read iov"); - siz = min(M_TRAILINGSPACE(m), left); - if (siz > 0) { - iv->iov_base = mtod(m, caddr_t) + m->m_len; - iv->iov_len = siz; - m->m_len += siz; - left -= siz; - iv++; - i++; - } - m = m->m_next; - } - uiop->uio_iovcnt = i; - uiop->uio_offset = off; - uiop->uio_resid = cnt; - uiop->uio_rw = UIO_READ; - uiop->uio_segflg = UIO_SYSSPACE; - didhold = ubc_hold(vp); - error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); - off = uiop->uio_offset; - FREE((caddr_t)iv2, M_TEMP); - /* - * This may seem a little weird that we drop the whole - * successful read if we get an error on the getattr. - * The reason is because we've already set up the reply - * to have postop attrs and omitting these optional bits - * would require shifting all the data in the reply. - * - * It would be more correct if we would simply drop the - * postop attrs if the getattr fails. We might be able to - * do that easier if we allocated separate mbufs for the data. - */ - if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - if (!error) - error = getret; - m_freem(mreq); - vrele(vp); - nfsm_reply(NFSX_POSTOPATTR(v3)); - nfsm_srvpostop_attr(getret, vap); - return (0); - } - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); + for (m = mread; m; m = mbuf_next(m)) + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mbuf_len(m)); + error = VNOP_READ(vp, auio, IO_NODELOCKED, ctx); } else { - uiop->uio_resid = 0; - vput(vp); + auio = uio_createwithbuffer(0, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); + if (!auio) { + error = ENOMEM; + goto errorexit; + } } - nfsm_srvfillattr(vap, fp); - len -= uiop->uio_resid; + +errorexit: + if (!error || (nd->nd_vers == NFS_VER3)) { + nfsm_srv_vattr_init(vap, nd->nd_vers); + attrerr = vnode_getattr(vp, vap, ctx); + if (!error && (nd->nd_vers == NFS_VER2)) + error = attrerr; /* NFSv2 must have attributes to return */ + } + nfsmerr_if(error); + + vnode_put(vp); + vp = NULL; + + /* trim off any data not actually read */ + len -= uio_resid(auio); tlen = nfsm_rndup(len); - if (cnt != tlen || tlen != len) - nfsm_adj(mb, cnt - tlen, tlen - len); - if (v3) { - *tl++ = txdr_unsigned(len); - if (len < reqlen) - *tl++ = nfs_true; - else - *tl++ = nfs_false; + if (count != tlen || tlen != len) + nfsm_adj(mread, count - tlen, tlen - len); + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPORFATTR(nd->nd_vers) + 3 * NFSX_UNSIGNED); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, vap); + if (error || nd->nd_repstat) { + nfsm_chain_build_done(error, &nmrep); + goto nfsmout; + } + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_32(error, &nmrep, len); + nfsm_chain_add_32(error, &nmrep, (len < reqlen) ? TRUE : FALSE); + } else { + error = nfsm_chain_add_fattr(nd, &nmrep, vap); + } + nfsm_chain_add_32(error, &nmrep, len); + nfsm_chain_build_done(error, &nmrep); + nfsmout_if(error); + error = mbuf_setnext(nmrep.nmc_mcur, mread); + if (!error) + mread = NULL; + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.bytes_read, len); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, len, 0); +nfsmout: + if (vp) + vnode_put(vp); + if (mread) + mbuf_freem(mread); + if (uio_bufp != NULL) + FREE(uio_bufp, M_TEMP); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - *tl = txdr_unsigned(len); - nfsm_srvdone; + return (error); } +#if CONFIG_FSE /* - * nfs write service + * NFS File modification reporting + * + * When the contents of a file are changed, a "content modified" + * fsevent needs to be issued. Normally this would be done at + * file close time. This is difficult for NFS because the protocol + * has no "close" operation. The client sends a stream of write + * requests that just stop. So we keep a hash table full of + * vnodes that have been written to recently, and issue a + * "content modified" fsevent only if there are no writes to + * a vnode for nfsrv_fmod_pendtime milliseconds. */ -int -nfsrv_write(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +int nfsrv_fmod_pending; /* count of vnodes being written to */ +int nfsrv_fmod_pendtime = 1000; /* msec to wait */ +int nfsrv_fmod_min_interval = 100; /* msec min interval between callbacks */ + +/* + * This function is called via the kernel's callout + * mechanism. Calls are made only when there are + * vnodes pending a fsevent creation, and no more + * frequently than every nfsrv_fmod_min_interval ms. + */ +void +nfsrv_fmod_timer(__unused void *param0, __unused void *param1) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct iovec *ivp; - register int i, cnt; - register struct mbuf *mp; - register struct nfs_fattr *fp; - struct iovec *iv; - struct vattr va, forat; - register struct vattr *vap = &va; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, len, forat_ret = 1; - int ioflags, aftat_ret = 1, retlen, zeroing, adjust; - int stable = NFSV3WRITE_FILESYNC; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io, *uiop = &io; - off_t off; - u_quad_t frev; - int didhold = 0; + struct nfsrv_fmod_hashhead *headp, firehead; + struct nfsrv_fmod *fp, *nfp, *pfp; + uint64_t timenow, next_deadline; + int interval = 0, i, fmod_fire; - if (mrep == NULL) { - *mrq = NULL; - return (0); - } - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if (v3) { - nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); - fxdr_hyper(tl, &off); - tl += 3; - stable = fxdr_unsigned(int, *tl++); - } else { - nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); - off = (off_t)fxdr_unsigned(u_long, *++tl); - tl += 2; - if (nfs_async) - stable = NFSV3WRITE_UNSTABLE; - } - retlen = len = fxdr_unsigned(long, *tl); - cnt = i = 0; + LIST_INIT(&firehead); + lck_mtx_lock(nfsrv_fmod_mutex); +again: + clock_get_uptime(&timenow); + clock_interval_to_deadline(nfsrv_fmod_pendtime, 1000 * 1000, + &next_deadline); /* - * For NFS Version 2, it is not obvious what a write of zero length - * should do, but I might as well be consistent with Version 3, - * which is to return ok so long as there are no permission problems. + * Scan all the hash chains */ - if (len > 0) { - zeroing = 1; - mp = mrep; - while (mp) { - if (mp == md) { - zeroing = 0; - adjust = dpos - mtod(mp, caddr_t); - mp->m_len -= adjust; - if (mp->m_len > 0 && adjust > 0) - NFSMADV(mp, adjust); - } - if (zeroing) - mp->m_len = 0; - else if (mp->m_len > 0) { - i += mp->m_len; - if (i > len) { - mp->m_len -= (i - len); - zeroing = 1; - } - if (mp->m_len > 0) - cnt++; + fmod_fire = 0; + for (i = 0; i < NFSRVFMODHASHSZ; i++) { + /* + * For each hash chain, look for an entry + * that has exceeded the deadline. + */ + headp = &nfsrv_fmod_hashtbl[i]; + LIST_FOREACH(fp, headp, fm_link) { + if (timenow >= fp->fm_deadline) + break; + if (fp->fm_deadline < next_deadline) + next_deadline = fp->fm_deadline; + } + + /* + * If we have an entry that's exceeded the + * deadline, then the same is true for all + * following entries in the chain, since they're + * sorted in time order. + */ + pfp = NULL; + while (fp) { + /* move each entry to the fire list */ + nfp = LIST_NEXT(fp, fm_link); + LIST_REMOVE(fp, fm_link); + fmod_fire++; + if (pfp) + LIST_INSERT_AFTER(pfp, fp, fm_link); + else + LIST_INSERT_HEAD(&firehead, fp, fm_link); + pfp = fp; + fp = nfp; } - mp = mp->m_next; - } - } - if (len > NFS_MAXDATA || len < 0 || i < len) { - error = EIO; - nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); - return (0); - } - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); - return (0); } - if (v3) - forat_ret = VOP_GETATTR(vp, &forat, cred, procp); - if (vp->v_type != VREG) { - if (v3) - error = EINVAL; - else - error = (vp->v_type == VDIR) ? EISDIR : EACCES; + + if (fmod_fire) { + lck_mtx_unlock(nfsrv_fmod_mutex); + /* + * Fire off the content modified fsevent for each + * entry and free it. + */ + LIST_FOREACH_SAFE(fp, &firehead, fm_link, nfp) { + if (nfsrv_fsevents_enabled) { + fp->fm_context.vc_thread = current_thread(); + add_fsevent(FSE_CONTENT_MODIFIED, &fp->fm_context, + FSE_ARG_VNODE, fp->fm_vp, + FSE_ARG_DONE); + } + vnode_put(fp->fm_vp); + kauth_cred_unref(&fp->fm_context.vc_ucred); + LIST_REMOVE(fp, fm_link); + FREE(fp, M_TEMP); + } + lck_mtx_lock(nfsrv_fmod_mutex); + nfsrv_fmod_pending -= fmod_fire; + goto again; } - if (!error) { - nqsrv_getl(vp, ND_WRITE); - error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1); + + /* + * If there are still pending entries, set up another + * callout to handle them later. Set the timeout deadline + * so that the callout happens when the oldest pending + * entry is ready to send its fsevent. + */ + if (nfsrv_fmod_pending > 0) { + interval = (next_deadline - timenow) / (1000 * 1000); + if (interval < nfsrv_fmod_min_interval) + interval = nfsrv_fmod_min_interval; } - if (error) { - vput(vp); - nfsm_reply(NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); + + nfsrv_fmod_timer_on = interval > 0; + if (nfsrv_fmod_timer_on) + nfs_interval_timer_start(nfsrv_fmod_timer_call, interval); + + lck_mtx_unlock(nfsrv_fmod_mutex); +} + +/* + * When a vnode has been written to, enter it in the hash + * table of vnodes pending creation of an fsevent. If the + * callout timer isn't already running, schedule a callback + * for nfsrv_fmod_pendtime msec from now. + */ +void +nfsrv_modified(vnode_t vp, vfs_context_t ctx) +{ + uint64_t deadline; + struct nfsrv_fmod *fp; + struct nfsrv_fmod_hashhead *head; + + lck_mtx_lock(nfsrv_fmod_mutex); + + /* + * Compute the time in the future when the + * content modified fsevent is to be issued. + */ + clock_interval_to_deadline(nfsrv_fmod_pendtime, 1000 * 1000, &deadline); + + /* + * Check if there's already a file content change fsevent + * pending for this vnode. If there is, update its + * timestamp and make sure it's at the front of the hash chain. + */ + head = &nfsrv_fmod_hashtbl[NFSRVFMODHASH(vp)]; + LIST_FOREACH(fp, head, fm_link) { + if (vp == fp->fm_vp) { + fp->fm_deadline = deadline; + if (fp != LIST_FIRST(head)) { + LIST_REMOVE(fp, fm_link); + LIST_INSERT_HEAD(head, fp, fm_link); + } + lck_mtx_unlock(nfsrv_fmod_mutex); + return; + } + } + + /* + * First content change fsevent for this vnode. + * Allocate a new file mod entry and add it + * on the front of the hash chain. + */ + if (vnode_get(vp) != 0) + goto done; + MALLOC(fp, struct nfsrv_fmod *, sizeof(*fp), M_TEMP, M_WAITOK); + if (fp == NULL) { + vnode_put(vp); + goto done; + } + fp->fm_vp = vp; + kauth_cred_ref(vfs_context_ucred(ctx)); + fp->fm_context = *ctx; + fp->fm_deadline = deadline; + LIST_INSERT_HEAD(head, fp, fm_link); + + /* + * If added to an empty hash table, then set the + * callout timer to go off after nfsrv_fmod_pendtime. + */ + nfsrv_fmod_pending++; + if (!nfsrv_fmod_timer_on) { + nfsrv_fmod_timer_on = 1; + nfs_interval_timer_start(nfsrv_fmod_timer_call, + nfsrv_fmod_pendtime); + } +done: + lck_mtx_unlock(nfsrv_fmod_mutex); + return; +} +#endif /* CONFIG_FSE */ + +/* + * nfs write service + */ +int +nfsrv_write( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) +{ + struct vnode_attr preattr, postattr; + int error, preattrerr, postattrerr; + int ioflags, len, retlen; + int mlen, mcount; + int stable = NFS_WRITE_FILESYNC; + mbuf_t m; + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio = NULL; + char *uio_bufp = NULL; + off_t off; + uid_t saved_uid; + struct nfsm_chain *nmreq, nmrep; + + if (nd->nd_nmreq.nmc_mhead == NULL) { + *mrepp = NULL; return (0); } + error = 0; + preattrerr = postattrerr = ENOENT; + saved_uid = kauth_cred_getuid(nd->nd_cr); + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = NULL; + len = retlen = 0; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_get_64(error, nmreq, off); + nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); + nfsm_chain_get_32(error, nmreq, stable); + } else { + nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); + nfsm_chain_get_32(error, nmreq, off); + nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); + if (nfsrv_async) + stable = NFS_WRITE_UNSTABLE; + } + nfsm_chain_get_32(error, nmreq, len); + nfsmerr_if(error); + retlen = len; + + /* + * For NFS Version 2, it is not obvious what a write of zero length + * should do, but I might as well be consistent with Version 3, + * which is to return ok so long as there are no permission problems. + */ + if (len > 0) { - MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, - M_WAITOK); - uiop->uio_iov = iv = ivp; - uiop->uio_iovcnt = cnt; - mp = mrep; - while (mp) { - if (mp->m_len > 0) { - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - ivp++; - } - mp = mp->m_next; - } + error = nfsm_chain_trim_data(nmreq, len, &mlen); + nfsmerr_if(error); + } else { + mlen = 0; + } + if ((len > NFSRV_MAXDATA) || (len < 0) || (mlen < len)) { + error = EIO; + goto nfsmerr; + } + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); - /* - * XXX - * The IO_METASYNC flag indicates that all metadata (and not just - * enough to ensure data integrity) mus be written to stable storage - * synchronously. - * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) - */ - if (stable == NFSV3WRITE_UNSTABLE) - ioflags = IO_NODELOCKED; - else if (stable == NFSV3WRITE_DATASYNC) - ioflags = (IO_SYNC | IO_NODELOCKED); - else - ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); - uiop->uio_resid = len; - uiop->uio_rw = UIO_WRITE; - uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = (struct proc *)0; - uiop->uio_offset = off; - didhold = ubc_hold(vp); - error = VOP_WRITE(vp, uiop, ioflags, cred); - nfsstats.srvvop_writes++; - FREE((caddr_t)iv, M_TEMP); - } - aftat_ret = VOP_GETATTR(vp, vap, cred, procp); - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&preattr); + preattrerr = vnode_getattr(vp, &preattr, ctx); + } + if (vnode_vtype(vp) != VREG) { + if (nd->nd_vers == NFS_VER3) + error = EINVAL; + else + error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES; + } if (!error) - error = aftat_ret; - nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + - 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); - if (v3) { - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); - if (error) - return (0); - nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(retlen); + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx, nxo, 1); + nfsmerr_if(error); + + if (len > 0) { + for (mcount=0, m=nmreq->nmc_mcur; m; m = mbuf_next(m)) + if (mbuf_len(m) > 0) + mcount++; + MALLOC(uio_bufp, char *, UIO_SIZEOF(mcount), M_TEMP, M_WAITOK); + if (uio_bufp) + auio = uio_createwithbuffer(mcount, off, UIO_SYSSPACE, UIO_WRITE, uio_bufp, UIO_SIZEOF(mcount)); + if (!uio_bufp || !auio) + error = ENOMEM; + nfsmerr_if(error); + for (m = nmreq->nmc_mcur; m; m = mbuf_next(m)) + if ((mlen = mbuf_len(m)) > 0) + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), mlen); /* - * If nfs_async is set, then pretend the write was FILESYNC. + * XXX The IO_METASYNC flag indicates that all metadata (and not just + * enough to ensure data integrity) mus be written to stable storage + * synchronously. (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) */ - if (stable == NFSV3WRITE_UNSTABLE && !nfs_async) - *tl++ = txdr_unsigned(stable); + if (stable == NFS_WRITE_UNSTABLE) + ioflags = IO_NODELOCKED; + else if (stable == NFS_WRITE_DATASYNC) + ioflags = (IO_SYNC | IO_NODELOCKED); else - *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); - /* - * Actually, there is no need to txdr these fields, - * but it may make the values more human readable, - * for debugging purposes. - */ - *tl++ = txdr_unsigned(boottime.tv_sec); - *tl = txdr_unsigned(boottime.tv_usec); + ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); + + error = VNOP_WRITE(vp, auio, ioflags, ctx); + OSAddAtomic64(1, &nfsstats.srvvop_writes); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.bytes_written, len); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, len); + +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CONTENT_MODIFIED, vp)) + nfsrv_modified(vp, ctx); +#endif + } + nfsm_srv_vattr_init(&postattr, nd->nd_vers); + postattrerr = vnode_getattr(vp, &postattr, ctx); + if (!error && (nd->nd_vers == NFS_VER2)) + error = postattrerr; /* NFSv2 must have attributes to return */ + vnode_put(vp); + vp = NULL; + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_PREOPATTR(nd->nd_vers) + + NFSX_POSTOPORFATTR(nd->nd_vers) + 2 * NFSX_UNSIGNED + + NFSX_WRITEVERF(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_wcc_data(error, nd, &nmrep, + preattrerr, &preattr, postattrerr, &postattr); + nfsmout_if(error || nd->nd_repstat); + nfsm_chain_add_32(error, &nmrep, retlen); + /* If nfsrv_async is set, then pretend the write was FILESYNC. */ + if ((stable == NFS_WRITE_UNSTABLE) && !nfsrv_async) + nfsm_chain_add_32(error, &nmrep, stable); + else + nfsm_chain_add_32(error, &nmrep, NFS_WRITE_FILESYNC); + /* write verifier */ + nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_sec); + nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_usec); } else { - nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + error = nfsm_chain_add_fattr(nd, &nmrep, &postattr); } - nfsm_srvdone; +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (vp) + vnode_put(vp); + if (uio_bufp != NULL) + FREE(uio_bufp, M_TEMP); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* * NFS write service with write gathering support. Called when - * nfsrvw_procrastinate > 0. + * nfsrv_wg_delay > 0. * See: Chet Juszczak, "Improving the Write Performance of an NFS Server", * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco, * Jan. 1994. */ + +#define NWDELAYHASH(sock, f) \ + (&(sock)->ns_wdelayhashtbl[(*((u_int32_t *)(f))) % NFS_WDELAYHASHSIZ]) +/* These macros compare nfsrv_descript structures. */ +#define NFSW_CONTIG(o, n) \ + (((o)->nd_eoff >= (n)->nd_off) && nfsrv_fhmatch(&(o)->nd_fh, &(n)->nd_fh)) +/* + * XXX The following is an incorrect comparison; it fails to take into account + * XXX scoping of MAC labels, but we currently lack KPI for credential + * XXX comparisons. + */ +#define NFSW_SAMECRED(o, n) \ + (!bcmp((caddr_t)(o)->nd_cr, (caddr_t)(n)->nd_cr, \ + sizeof (struct ucred))) + int -nfsrv_writegather(ndp, slp, procp, mrq) - struct nfsrv_descript **ndp; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_writegather( + struct nfsrv_descript **ndp, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - register struct iovec *ivp; - register struct mbuf *mp; - register struct nfsrv_descript *wp, *nfsd, *owp, *swp; - register struct nfs_fattr *fp; - register int i; - struct iovec *iov; - struct nfsrvw_delayhash *wpp; - struct ucred *cred; - struct vattr va, forat; - register u_long *tl; - register long t1; - caddr_t bpos, dpos; - int error = 0, rdonly, cache, len, forat_ret = 1; - int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; - char *cp2; - struct mbuf *mb, *mb2, *mreq, *mrep, *md; - struct vnode *vp; - struct uio io, *uiop = &io; - u_quad_t frev, cur_usec; - int didhold; + struct nfsrv_descript *nd, *wp, *owp, *swp; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct nfsrv_wg_delayhash *wpp; + uid_t saved_uid; + struct vnode_attr preattr, postattr; + int error, mlen, i, ioflags, tlen; + int preattrerr, postattrerr; + vnode_t vp; + mbuf_t m; + uio_t auio = NULL; + char *uio_bufp = NULL; + u_quad_t cur_usec; struct timeval now; + struct nfsm_chain *nmreq, nmrep; -#ifndef nolint - i = 0; - len = 0; -#endif - *mrq = NULL; + error = 0; + preattrerr = postattrerr = ENOENT; + nfsm_chain_null(&nmrep); + vp = NULL; + + *mrepp = NULL; if (*ndp) { - nfsd = *ndp; + nd = *ndp; *ndp = NULL; - mrep = nfsd->nd_mrep; - md = nfsd->nd_md; - dpos = nfsd->nd_dpos; - cred = &nfsd->nd_cr; - v3 = (nfsd->nd_flag & ND_NFSV3); - LIST_INIT(&nfsd->nd_coalesce); - nfsd->nd_mreq = NULL; - nfsd->nd_stable = NFSV3WRITE_FILESYNC; + nmreq = &nd->nd_nmreq; + LIST_INIT(&nd->nd_coalesce); + nd->nd_mrep = NULL; + nd->nd_stable = NFS_WRITE_FILESYNC; microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - nfsd->nd_time = cur_usec + - (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate); - - /* - * Now, get the write header.. - */ - nfsm_srvmtofh(&nfsd->nd_fh); - if (v3) { - nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); - fxdr_hyper(tl, &nfsd->nd_off); - tl += 3; - nfsd->nd_stable = fxdr_unsigned(int, *tl++); + nd->nd_time = cur_usec + + ((nd->nd_vers == NFS_VER3) ? nfsrv_wg_delay_v3 : nfsrv_wg_delay); + + /* Now, get the write header... */ + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nd->nd_fh.nfh_fhp, nd->nd_fh.nfh_len); + /* XXX shouldn't we be checking for invalid FHs before doing any more work? */ + nfsmerr_if(error); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_get_64(error, nmreq, nd->nd_off); + nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); + nfsm_chain_get_32(error, nmreq, nd->nd_stable); } else { - nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); - nfsd->nd_off = (off_t)fxdr_unsigned(u_long, *++tl); - tl += 2; - if (nfs_async) - nfsd->nd_stable = NFSV3WRITE_UNSTABLE; + nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); + nfsm_chain_get_32(error, nmreq, nd->nd_off); + nfsm_chain_adv(error, nmreq, NFSX_UNSIGNED); + if (nfsrv_async) + nd->nd_stable = NFS_WRITE_UNSTABLE; } - len = fxdr_unsigned(long, *tl); - nfsd->nd_len = len; - nfsd->nd_eoff = nfsd->nd_off + len; - - /* - * Trim the header out of the mbuf list and trim off any trailing - * junk so that the mbuf list has only the write data. - */ - zeroing = 1; - i = 0; - mp = mrep; - while (mp) { - if (mp == md) { - zeroing = 0; - adjust = dpos - mtod(mp, caddr_t); - mp->m_len -= adjust; - if (mp->m_len > 0 && adjust > 0) - NFSMADV(mp, adjust); - } - if (zeroing) - mp->m_len = 0; - else { - i += mp->m_len; - if (i > len) { - mp->m_len -= (i - len); - zeroing = 1; - } - } - mp = mp->m_next; + nfsm_chain_get_32(error, nmreq, nd->nd_len); + nfsmerr_if(error); + nd->nd_eoff = nd->nd_off + nd->nd_len; + + if (nd->nd_len > 0) { + error = nfsm_chain_trim_data(nmreq, nd->nd_len, &mlen); + nfsmerr_if(error); + } else { + mlen = 0; } - if (len > NFS_MAXDATA || len < 0 || i < len) { -nfsmout: - m_freem(mrep); - mrep = NULL; + + if ((nd->nd_len > NFSRV_MAXDATA) || (nd->nd_len < 0) || (mlen < nd->nd_len)) { error = EIO; - nfsm_writereply(2 * NFSX_UNSIGNED, v3); - if (v3) - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); - nfsd->nd_mreq = mreq; - nfsd->nd_mrep = NULL; - nfsd->nd_time = 0; +nfsmerr: + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers)); + if (!error) { + nd->nd_mrep = nmrep.nmc_mhead; + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_wcc_data(error, nd, &nmrep, + preattrerr, &preattr, postattrerr, &postattr); + } + nfsm_chain_build_done(error, &nmrep); + nd->nd_time = 1; } - + /* * Add this entry to the hash and time queues. */ - s = splsoftclock(); + lck_mtx_lock(&slp->ns_wgmutex); owp = NULL; wp = slp->ns_tq.lh_first; - while (wp && wp->nd_time < nfsd->nd_time) { + while (wp && wp->nd_time < nd->nd_time) { owp = wp; wp = wp->nd_tq.le_next; } - NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff)); if (owp) { - LIST_INSERT_AFTER(owp, nfsd, nd_tq); + LIST_INSERT_AFTER(owp, nd, nd_tq); } else { - LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); + LIST_INSERT_HEAD(&slp->ns_tq, nd, nd_tq); } - if (nfsd->nd_mrep) { - wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); + if (!error) { + wpp = NWDELAYHASH(slp, nd->nd_fh.nfh_fid); owp = NULL; wp = wpp->lh_first; - while (wp && - bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { + while (wp && !nfsrv_fhmatch(&nd->nd_fh, &wp->nd_fh)) { owp = wp; wp = wp->nd_hash.le_next; } - while (wp && wp->nd_off < nfsd->nd_off && - !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { + while (wp && (wp->nd_off < nd->nd_off) && + nfsrv_fhmatch(&nd->nd_fh, &wp->nd_fh)) { owp = wp; wp = wp->nd_hash.le_next; } if (owp) { - LIST_INSERT_AFTER(owp, nfsd, nd_hash); - + LIST_INSERT_AFTER(owp, nd, nd_hash); /* * Search the hash list for overlapping entries and * coalesce. */ - for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) { - wp = nfsd->nd_hash.le_next; - if (NFSW_SAMECRED(owp, nfsd)) - nfsrvw_coalesce(owp, nfsd); + for(; nd && NFSW_CONTIG(owp, nd); nd = wp) { + wp = nd->nd_hash.le_next; + if (NFSW_SAMECRED(owp, nd)) + nfsrv_wg_coalesce(owp, nd); } } else { - LIST_INSERT_HEAD(wpp, nfsd, nd_hash); + LIST_INSERT_HEAD(wpp, nd, nd_hash); } } - splx(s); + } else { + lck_mtx_lock(&slp->ns_wgmutex); } - + /* - * Now, do VOP_WRITE()s for any one(s) that need to be done now + * Now, do VNOP_WRITE()s for any one(s) that need to be done now * and generate the associated reply mbuf list(s). */ loop1: microuptime(&now); cur_usec = (u_quad_t)now.tv_sec * 1000000 + (u_quad_t)now.tv_usec; - s = splsoftclock(); - for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { - owp = nfsd->nd_tq.le_next; - if (nfsd->nd_time > cur_usec) + for (nd = slp->ns_tq.lh_first; nd; nd = owp) { + owp = nd->nd_tq.le_next; + if (nd->nd_time > cur_usec) break; - if (nfsd->nd_mreq) + if (nd->nd_mrep) continue; - NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff)); - LIST_REMOVE(nfsd, nd_tq); - LIST_REMOVE(nfsd, nd_hash); - splx(s); - mrep = nfsd->nd_mrep; - nfsd->nd_mrep = NULL; - cred = &nfsd->nd_cr; - v3 = (nfsd->nd_flag & ND_NFSV3); - forat_ret = aftat_ret = 1; - error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, - nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); + LIST_REMOVE(nd, nd_tq); + LIST_REMOVE(nd, nd_hash); + nmreq = &nd->nd_nmreq; + preattrerr = postattrerr = ENOENT; + + /* save the incoming uid before mapping, */ + /* for updating active user stats later */ + saved_uid = kauth_cred_getuid(nd->nd_cr); + + error = nfsrv_fhtovp(&nd->nd_fh, nd, &vp, &nx, &nxo); + if (!error) { + /* update per-export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + if (error) + vnode_put(vp); + } if (!error) { - if (v3) - forat_ret = VOP_GETATTR(vp, &forat, cred, procp); - if (vp->v_type != VREG) { - if (v3) + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&preattr); + preattrerr = vnode_getattr(vp, &preattr, ctx); + } + if (vnode_vtype(vp) != VREG) { + if (nd->nd_vers == NFS_VER3) error = EINVAL; else - error = (vp->v_type == VDIR) ? EISDIR : EACCES; + error = (vnode_vtype(vp) == VDIR) ? EISDIR : EACCES; } } else vp = NULL; - if (!error) { - nqsrv_getl(vp, ND_WRITE); - error = nfsrv_access(vp, VWRITE, cred, rdonly, procp, 1); - } - - if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) + if (!error) + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx, nxo, 1); + + if (nd->nd_stable == NFS_WRITE_UNSTABLE) ioflags = IO_NODELOCKED; - else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) + else if (nd->nd_stable == NFS_WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); - uiop->uio_rw = UIO_WRITE; - uiop->uio_segflg = UIO_SYSSPACE; - uiop->uio_procp = (struct proc *)0; - uiop->uio_offset = nfsd->nd_off; - uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; - didhold = 0; - if (uiop->uio_resid > 0) { - mp = mrep; - i = 0; - while (mp) { - if (mp->m_len > 0) + + if (!error && ((nd->nd_eoff - nd->nd_off) > 0)) { + for (i=0, m=nmreq->nmc_mhead; m; m = mbuf_next(m)) + if (mbuf_len(m) > 0) i++; - mp = mp->m_next; - } - uiop->uio_iovcnt = i; - MALLOC(iov, struct iovec *, i * sizeof (struct iovec), - M_TEMP, M_WAITOK); - uiop->uio_iov = ivp = iov; - mp = mrep; - while (mp) { - if (mp->m_len > 0) { - ivp->iov_base = mtod(mp, caddr_t); - ivp->iov_len = mp->m_len; - ivp++; - } - mp = mp->m_next; - } + + MALLOC(uio_bufp, char *, UIO_SIZEOF(i), M_TEMP, M_WAITOK); + if (uio_bufp) + auio = uio_createwithbuffer(i, nd->nd_off, UIO_SYSSPACE, + UIO_WRITE, uio_bufp, UIO_SIZEOF(i)); + if (!uio_bufp || !auio) + error = ENOMEM; if (!error) { - didhold = ubc_hold(vp); - error = VOP_WRITE(vp, uiop, ioflags, cred); - nfsstats.srvvop_writes++; + for (m = nmreq->nmc_mhead; m; m = mbuf_next(m)) + if ((tlen = mbuf_len(m)) > 0) + uio_addiov(auio, CAST_USER_ADDR_T((caddr_t)mbuf_data(m)), tlen); + error = VNOP_WRITE(vp, auio, ioflags, ctx); + OSAddAtomic64(1, &nfsstats.srvvop_writes); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.bytes_written, nd->nd_len); + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, nd->nd_len); + +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CONTENT_MODIFIED, vp)) + nfsrv_modified(vp, ctx); +#endif + } + if (uio_bufp) { + FREE(uio_bufp, M_TEMP); + uio_bufp = NULL; } - FREE((caddr_t)iov, M_TEMP); } - m_freem(mrep); - mrep = NULL; if (vp) { - aftat_ret = VOP_GETATTR(vp, &va, cred, procp); - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); + nfsm_srv_vattr_init(&postattr, nd->nd_vers); + postattrerr = vnode_getattr(vp, &postattr, ctx); + vnode_put(vp); } /* * Loop around generating replies for all write rpcs that have * now been completed. */ - swp = nfsd; + swp = nd; do { - NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff)); if (error) { - nfsm_writereply(NFSX_WCCDATA(v3), v3); - if (v3) { - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers)); + if (!error && (nd->nd_vers == NFS_VER3)) { + nfsm_chain_add_wcc_data(error, nd, &nmrep, + preattrerr, &preattr, postattrerr, &postattr); } } else { - nfsm_writereply(NFSX_PREOPATTR(v3) + - NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + - NFSX_WRITEVERF(v3), v3); - if (v3) { - nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); - nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(nfsd->nd_len); - *tl++ = txdr_unsigned(swp->nd_stable); - /* - * Actually, there is no need to txdr these fields, - * but it may make the values more human readable, - * for debugging purposes. - */ - *tl++ = txdr_unsigned(boottime.tv_sec); - *tl = txdr_unsigned(boottime.tv_usec); - } else { - nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(&va, fp); + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_PREOPATTR(nd->nd_vers) + + NFSX_POSTOPORFATTR(nd->nd_vers) + 2 * NFSX_UNSIGNED + + NFSX_WRITEVERF(nd->nd_vers)); + if (!error && (nd->nd_vers == NFS_VER3)) { + nfsm_chain_add_wcc_data(error, nd, &nmrep, + preattrerr, &preattr, postattrerr, &postattr); + nfsm_chain_add_32(error, &nmrep, nd->nd_len); + nfsm_chain_add_32(error, &nmrep, nd->nd_stable); + /* write verifier */ + nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_sec); + nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_usec); + } else if (!error) { + error = nfsm_chain_add_fattr(nd, &nmrep, &postattr); } } - nfsd->nd_mreq = mreq; - if (nfsd->nd_mrep) - panic("nfsrv_write: nd_mrep not free"); + nfsm_chain_build_done(error, &nmrep); + nfsmerr_if(error); + nd->nd_mrep = nmrep.nmc_mhead; /* * Done. Put it at the head of the timer queue so that * the final phase can return the reply. */ - s = splsoftclock(); - if (nfsd != swp) { - nfsd->nd_time = 0; - LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); + if (nd != swp) { + nd->nd_time = 1; + LIST_INSERT_HEAD(&slp->ns_tq, nd, nd_tq); } - nfsd = swp->nd_coalesce.lh_first; - if (nfsd) { - LIST_REMOVE(nfsd, nd_tq); + nd = swp->nd_coalesce.lh_first; + if (nd) { + LIST_REMOVE(nd, nd_tq); } - splx(s); - } while (nfsd); - s = splsoftclock(); - swp->nd_time = 0; + } while (nd); + swp->nd_time = 1; LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); - splx(s); goto loop1; } - splx(s); /* * Search for a reply to return. */ - s = splsoftclock(); - for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) - if (nfsd->nd_mreq) { - NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff)); - LIST_REMOVE(nfsd, nd_tq); - *mrq = nfsd->nd_mreq; - *ndp = nfsd; + for (nd = slp->ns_tq.lh_first; nd; nd = nd->nd_tq.le_next) + if (nd->nd_mrep) { + LIST_REMOVE(nd, nd_tq); + *mrepp = nd->nd_mrep; + *ndp = nd; break; } - splx(s); + slp->ns_wgtime = slp->ns_tq.lh_first ? slp->ns_tq.lh_first->nd_time : 0; + lck_mtx_unlock(&slp->ns_wgmutex); + + /* + * If we've just created a write pending gather, + * start the timer to check on it soon to make sure + * the write will be completed. + * + * Add/Remove the socket in the nfsrv_sockwg queue as needed. + */ + lck_mtx_lock(nfsd_mutex); + if (slp->ns_wgtime) { + if (slp->ns_wgq.tqe_next == SLPNOLIST) { + TAILQ_INSERT_HEAD(&nfsrv_sockwg, slp, ns_wgq); + } + if (!nfsrv_wg_timer_on) { + nfsrv_wg_timer_on = 1; + nfs_interval_timer_start(nfsrv_wg_timer_call, + NFSRV_WGATHERDELAY); + } + } else if (slp->ns_wgq.tqe_next != SLPNOLIST) { + TAILQ_REMOVE(&nfsrv_sockwg, slp, ns_wgq); + slp->ns_wgq.tqe_next = SLPNOLIST; + } + lck_mtx_unlock(nfsd_mutex); + return (0); } /* - * Coalesce the write request nfsd into owp. To do this we must: - * - remove nfsd from the queues - * - merge nfsd->nd_mrep into owp->nd_mrep + * Coalesce the write request nd into owp. To do this we must: + * - remove nd from the queues + * - merge nd->nd_nmreq into owp->nd_nmreq * - update the nd_eoff and nd_stable for owp - * - put nfsd on owp's nd_coalesce list - * NB: Must be called at splsoftclock(). + * - put nd on owp's nd_coalesce list */ -static void -nfsrvw_coalesce(owp, nfsd) - register struct nfsrv_descript *owp; - register struct nfsrv_descript *nfsd; +int +nfsrv_wg_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nd) { - register int overlap; - register struct mbuf *mp; + int overlap, error; + mbuf_t mp, mpnext; struct nfsrv_descript *p; - NFS_DPF(WG, ("C%03x-%03x", - nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff)); - LIST_REMOVE(nfsd, nd_hash); - LIST_REMOVE(nfsd, nd_tq); - if (owp->nd_eoff < nfsd->nd_eoff) { - overlap = owp->nd_eoff - nfsd->nd_off; - if (overlap < 0) - panic("nfsrv_coalesce: bad off"); - if (overlap > 0) - m_adj(nfsd->nd_mrep, overlap); - mp = owp->nd_mrep; - while (mp->m_next) - mp = mp->m_next; - mp->m_next = nfsd->nd_mrep; - owp->nd_eoff = nfsd->nd_eoff; - } else - m_freem(nfsd->nd_mrep); - nfsd->nd_mrep = NULL; - if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) - owp->nd_stable = NFSV3WRITE_FILESYNC; - else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC && - owp->nd_stable == NFSV3WRITE_UNSTABLE) - owp->nd_stable = NFSV3WRITE_DATASYNC; - LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq); + LIST_REMOVE(nd, nd_hash); + LIST_REMOVE(nd, nd_tq); + if (owp->nd_eoff < nd->nd_eoff) { + overlap = owp->nd_eoff - nd->nd_off; + if (overlap < 0) + return (EIO); + if (overlap > 0) + mbuf_adj(nd->nd_nmreq.nmc_mhead, overlap); + mp = owp->nd_nmreq.nmc_mhead; + while ((mpnext = mbuf_next(mp))) + mp = mpnext; + error = mbuf_setnext(mp, nd->nd_nmreq.nmc_mhead); + if (error) + return (error); + owp->nd_eoff = nd->nd_eoff; + } else { + mbuf_freem(nd->nd_nmreq.nmc_mhead); + } + nd->nd_nmreq.nmc_mhead = NULL; + nd->nd_nmreq.nmc_mcur = NULL; + if (nd->nd_stable == NFS_WRITE_FILESYNC) + owp->nd_stable = NFS_WRITE_FILESYNC; + else if ((nd->nd_stable == NFS_WRITE_DATASYNC) && + (owp->nd_stable == NFS_WRITE_UNSTABLE)) + owp->nd_stable = NFS_WRITE_DATASYNC; + LIST_INSERT_HEAD(&owp->nd_coalesce, nd, nd_tq); /* - * If nfsd had anything else coalesced into it, transfer them + * If nd had anything else coalesced into it, transfer them * to owp, otherwise their replies will never get sent. */ - for (p = nfsd->nd_coalesce.lh_first; p; - p = nfsd->nd_coalesce.lh_first) { - LIST_REMOVE(p, nd_tq); - LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq); + while ((p = nd->nd_coalesce.lh_first)) { + LIST_REMOVE(p, nd_tq); + LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq); + } + return (0); +} + +/* + * Scan the write gathering queues for writes that need to be + * completed now. + */ +void +nfsrv_wg_timer(__unused void *param0, __unused void *param1) +{ + struct timeval now; + uint64_t cur_usec, next_usec; + int interval; + struct nfsrv_sock *slp; + int writes_pending = 0; + + microuptime(&now); + cur_usec = (uint64_t)now.tv_sec * 1000000 + (uint64_t)now.tv_usec; + next_usec = cur_usec + (NFSRV_WGATHERDELAY * 1000); + + lck_mtx_lock(nfsd_mutex); + TAILQ_FOREACH(slp, &nfsrv_sockwg, ns_wgq) { + if (slp->ns_wgtime) { + writes_pending++; + if (slp->ns_wgtime <= cur_usec) { + lck_rw_lock_exclusive(&slp->ns_rwlock); + slp->ns_flag |= SLP_DOWRITES; + lck_rw_done(&slp->ns_rwlock); + nfsrv_wakenfsd(slp); + continue; + } + if (slp->ns_wgtime < next_usec) + next_usec = slp->ns_wgtime; + } } + + if (writes_pending == 0) { + nfsrv_wg_timer_on = 0; + lck_mtx_unlock(nfsd_mutex); + return; + } + lck_mtx_unlock(nfsd_mutex); + + /* + * Return the number of msec to wait again + */ + interval = (next_usec - cur_usec) / 1000; + if (interval < 1) + interval = 1; + nfs_interval_timer_start(nfsrv_wg_timer_call, interval); } /* @@ -1382,11 +1783,9 @@ nfsrvw_coalesce(owp, nfsd) * that used to be here.) */ void -nfsrvw_sort(list, num) - register gid_t *list; - register int num; +nfsrv_group_sort(gid_t *list, int num) { - register int i, j; + int i, j; gid_t v; /* Insertion sort. */ @@ -1399,276 +1798,333 @@ nfsrvw_sort(list, num) } } -/* - * copy credentials making sure that the result can be compared with bcmp(). - */ -void -nfsrv_setcred(incred, outcred) - register struct ucred *incred, *outcred; -{ - register int i; - - bzero((caddr_t)outcred, sizeof (struct ucred)); - outcred->cr_ref = 1; - outcred->cr_uid = incred->cr_uid; - outcred->cr_ngroups = incred->cr_ngroups; - for (i = 0; i < incred->cr_ngroups; i++) - outcred->cr_groups[i] = incred->cr_groups[i]; - nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); -} - /* * nfs create service * now does a truncate to 0 length via. setattr if it already exists */ int -nfsrv_create(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_create( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct nfs_fattr *fp; - struct vattr va, dirfor, diraft; - register struct vattr *vap = &va; - register struct nfsv2_sattr *sp; - register u_long *tl; - struct nameidata nd; - register caddr_t cp; - register long t1; - caddr_t bpos; - int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev, tempsize; + struct vnode_attr dpreattr, dpostattr, postattr; + struct vnode_attr va, *vap = &va; + struct nameidata ni; + int error, rdev, dpreattrerr, dpostattrerr, postattrerr; + int how, exclusive_flag; + uint32_t len = 0, cnflags; + vnode_t vp, dvp, dirp; + struct nfs_filehandle nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + u_quad_t tempsize; u_char cverf[NFSX_V3CREATEVERF]; + uid_t saved_uid; + struct nfsm_chain *nmreq, nmrep; -#ifndef nolint + error = 0; + dpreattrerr = dpostattrerr = postattrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = dvp = dirp = NULL; + exclusive_flag = 0; + ni.ni_cnd.cn_nameiop = 0; rdev = 0; + + saved_uid = kauth_cred_getuid(nd->nd_cr); + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = CREATE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LINK; #endif - nd.ni_cnd.cn_nameiop = 0; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); + } + } if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } else { + vnode_put(dirp); + dirp = NULL; } } + if (error) { - nfsm_reply(NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - if (dirp) - vrele(dirp); - return (0); + ni.ni_cnd.cn_nameiop = 0; + goto nfsmerr; } - VATTR_NULL(vap); - if (v3) { - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - how = fxdr_unsigned(int, *tl); + + dvp = ni.ni_dvp; + vp = ni.ni_vp; + VATTR_INIT(vap); + + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_get_32(error, nmreq, how); + nfsmerr_if(error); switch (how) { - case NFSV3CREATE_GUARDED: - if (nd.ni_vp) { + case NFS_CREATE_GUARDED: + if (vp) { error = EEXIST; break; } - case NFSV3CREATE_UNCHECKED: - nfsm_srvsattr(vap); + case NFS_CREATE_UNCHECKED: + error = nfsm_chain_get_sattr(nd, nmreq, vap); break; - case NFSV3CREATE_EXCLUSIVE: - nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); - bcopy(cp, cverf, NFSX_V3CREATEVERF); + case NFS_CREATE_EXCLUSIVE: + nfsm_chain_get_opaque(error, nmreq, NFSX_V3CREATEVERF, cverf); exclusive_flag = 1; - if (nd.ni_vp == NULL) - vap->va_mode = 0; + if (vp == NULL) + VATTR_SET(vap, va_mode, 0); break; }; - vap->va_type = VREG; + VATTR_SET(vap, va_type, VREG); } else { - nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); - if (vap->va_type == VNON) - vap->va_type = VREG; - vap->va_mode = nfstov_mode(sp->sa_mode); - switch (vap->va_type) { - case VREG: - tsize = fxdr_unsigned(long, sp->sa_size); - if (tsize != -1) - vap->va_size = (u_quad_t)tsize; - break; + enum vtype v_type; + + error = nfsm_chain_get_sattr(nd, nmreq, vap); + nfsmerr_if(error); + v_type = vap->va_type; + if (v_type == VNON) + v_type = VREG; + VATTR_SET(vap, va_type, v_type); + + switch (v_type) { case VCHR: case VBLK: case VFIFO: - rdev = fxdr_unsigned(long, sp->sa_size); + rdev = vap->va_data_size; + VATTR_CLEAR_ACTIVE(vap, va_data_size); + break; + default: break; }; } + nfsmerr_if(error); /* - * Iff doesn't exist, create it + * If it doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ?? */ - if (nd.ni_vp == NULL) { + if (vp == NULL) { + kauth_acl_t xacl = NULL; + + /* authorize before creating */ + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0); + + /* construct ACL and handle inheritance */ + if (!error) { + error = kauth_acl_inherit(dvp, + NULL, + &xacl, + 0 /* !isdir */, + ctx); + + if (!error && xacl != NULL) + VATTR_SET(vap, va_acl, xacl); + } + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + /* + * Server policy is to alway use the mapped rpc credential for + * file system object creation. This has the nice side effect of + * enforcing BSD creation semantics + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + + /* validate new-file security information */ + if (!error) + error = vnode_authattr_new(dvp, vap, 0, ctx); + if (vap->va_type == VREG || vap->va_type == VSOCK) { - vrele(nd.ni_startdir); - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + + if (!error) + error = VNOP_CREATE(dvp, &vp, &ni.ni_cnd, vap, ctx); + + if (!error && !VATTR_ALL_SUPPORTED(vap)) + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + error = vnode_setattr_fallback(vp, vap, ctx); + + if (xacl != NULL) + kauth_acl_free(xacl); + if (!error) { - nfsrv_object_create(nd.ni_vp); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; if (exclusive_flag) { exclusive_flag = 0; - VATTR_NULL(vap); - bcopy(cverf, (caddr_t)&vap->va_atime, + VATTR_INIT(vap); + bcopy(cverf, (caddr_t)&vap->va_access_time, NFSX_V3CREATEVERF); - error = VOP_SETATTR(nd.ni_vp, vap, cred, - procp); + VATTR_SET_ACTIVE(vap, va_access_time); + // skip authorization, as this is an + // NFS internal implementation detail. + error = vnode_setattr(vp, vap, ctx); + } + +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && need_fsevent(FSE_CREATE_FILE, vp)) { + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); } +#endif } + } else if (vap->va_type == VCHR || vap->va_type == VBLK || vap->va_type == VFIFO) { - if (vap->va_type == VCHR && rdev == 0xffffffff) - vap->va_type = VFIFO; - if (vap->va_type != VFIFO && - (error = suser(cred, (u_short *)0))) { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); - nfsm_reply(0); - return (error); - } else - vap->va_rdev = (dev_t)rdev; - nqsrv_getl(nd.ni_dvp, ND_WRITE); - if ((error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))) { - vrele(nd.ni_startdir); - nfsm_reply(0); + if (vap->va_type == VCHR && rdev == (int)0xffffffff) + VATTR_SET(vap, va_type, VFIFO); + if (vap->va_type != VFIFO) { + error = suser(nd->nd_cr, NULL); + nfsmerr_if(error); + } + VATTR_SET(vap, va_rdev, (dev_t)rdev); + + error = VNOP_MKNOD(dvp, &vp, &ni.ni_cnd, vap, ctx); + + if (xacl != NULL) + kauth_acl_free(xacl); + + nfsmerr_if(error); + + if (vp) { + vnode_recycle(vp); + vnode_put(vp); + vp = NULL; } - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); - nd.ni_cnd.cn_proc = procp; - nd.ni_cnd.cn_cred = cred; - if ((error = lookup(&nd))) { - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - nfsm_reply(0); + ni.ni_cnd.cn_nameiop = LOOKUP; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LOOKUP; +#endif + ni.ni_cnd.cn_flags &= ~LOCKPARENT; + ni.ni_cnd.cn_context = ctx; + ni.ni_startdir = dvp; + ni.ni_usedvp = dvp; + cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(&ni)) == ERECYCLE) { + ni.ni_cnd.cn_flags = cnflags; + ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf; + ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp; } - nfsrv_object_create(nd.ni_vp); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - if (nd.ni_cnd.cn_flags & ISSYMLINK) { - vrele(nd.ni_dvp); - vput(nd.ni_vp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - error = EINVAL; - nfsm_reply(0); + if (!error) { + if (ni.ni_cnd.cn_flags & ISSYMLINK) + error = EINVAL; + vp = ni.ni_vp; } + nfsmerr_if(error); } else { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); error = ENXIO; } - vp = nd.ni_vp; + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + ni.ni_cnd.cn_nameiop = 0; + + vnode_put(dvp); } else { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - vp = nd.ni_vp; - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (vap->va_size != -1) { - error = nfsrv_access(vp, VWRITE, cred, - (nd.ni_cnd.cn_flags & RDONLY), procp, 0); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + ni.ni_cnd.cn_nameiop = 0; + + vnode_put(dvp); + + if (!error && VATTR_IS_ACTIVE(vap, va_data_size)) { + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, + ctx, nxo, 0); if (!error) { - nqsrv_getl(vp, ND_WRITE); - tempsize = vap->va_size; - VATTR_NULL(vap); - vap->va_size = tempsize; - error = VOP_SETATTR(vp, vap, cred, - procp); + tempsize = vap->va_data_size; + VATTR_INIT(vap); + VATTR_SET(vap, va_data_size, tempsize); + error = vnode_setattr(vp, vap, ctx); } - if (error) - vput(vp); - } else { - if (error) - vput(vp); /* make sure we catch the EEXIST for nfsv3 */ } } if (!error) { - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + error = nfsrv_vptofh(nx, nd->nd_vers, NULL, vp, ctx, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postattr, nd->nd_vers); + postattrerr = vnode_getattr(vp, &postattr, ctx); + if (nd->nd_vers == NFS_VER2) + error = postattrerr; + } } - if (v3) { + if (vp) + vnode_put(vp); + + if (nd->nd_vers == NFS_VER3) { if (exclusive_flag && !error && - bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) + bcmp(cverf, &postattr.va_access_time, NFSX_V3CREATEVERF)) error = EEXIST; - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&dpostattr, NFS_VER3); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); + dirp = NULL; } - nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); - if (v3) { - if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) + + NFSX_FATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + if (!nd->nd_repstat) { + nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr); } - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); } else { - nfsm_srvfhtom(fhp, v3); - nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + nfsm_chain_add_fh(error, &nmrep, NFS_VER2, nfh.nfh_fhp, nfh.nfh_len); + if (!error) + error = nfsm_chain_add_fattr(nd, &nmrep, &postattr); } - return (0); nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (ni.ni_cnd.cn_nameiop) { + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + } if (dirp) - vrele(dirp); - if (nd.ni_cnd.cn_nameiop) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - } - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vput(nd.ni_vp); + vnode_put(dirp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } return (error); } @@ -1676,159 +2132,235 @@ nfsmout: * nfs v3 mknod service */ int -nfsrv_mknod(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_mknod( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, dirfor, diraft; - register struct vattr *vap = &va; - register u_long *tl; - struct nameidata nd; - register long t1; - caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; - u_long major, minor; + struct vnode_attr dpreattr, dpostattr, postattr; + struct vnode_attr va, *vap = &va; + struct nameidata ni; + int error, dpreattrerr, dpostattrerr, postattrerr; + uint32_t len = 0, cnflags; + u_int32_t major = 0, minor = 0; enum vtype vtyp; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - - nd.ni_cnd.cn_nameiop = 0; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); - if (dirp) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); + nfstype nvtype; + vnode_t vp, dvp, dirp; + struct nfs_filehandle nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + uid_t saved_uid; + kauth_acl_t xacl = NULL; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + dpreattrerr = dpostattrerr = postattrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = dvp = dirp = NULL; + ni.ni_cnd.cn_nameiop = 0; + + saved_uid = kauth_cred_getuid(nd->nd_cr); + + nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = CREATE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LINK; +#endif + ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); + } + } + if (dirp) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } if (error) { - nfsm_reply(NFSX_WCCDATA(1)); - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - if (dirp) - vrele(dirp); - return (0); + ni.ni_cnd.cn_nameiop = 0; + goto nfsmerr; } - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - vtyp = nfsv3tov_type(*tl); - if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; + + dvp = ni.ni_dvp; + vp = ni.ni_vp; + + nfsm_chain_get_32(error, nmreq, nvtype); + nfsmerr_if(error); + vtyp = nfstov_type(nvtype, NFS_VER3); + if (!error && (vtyp != VCHR) && (vtyp != VBLK) && (vtyp != VSOCK) && (vtyp != VFIFO)) { error = NFSERR_BADTYPE; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); goto out; } - VATTR_NULL(vap); - nfsm_srvsattr(vap); - if (vtyp == VCHR || vtyp == VBLK) { - nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); - major = fxdr_unsigned(u_long, *tl++); - minor = fxdr_unsigned(u_long, *tl); - vap->va_rdev = makedev(major, minor); - } + + VATTR_INIT(vap); + error = nfsm_chain_get_sattr(nd, nmreq, vap); + if ((vtyp == VCHR) || (vtyp == VBLK)) { + nfsm_chain_get_32(error, nmreq, major); + nfsm_chain_get_32(error, nmreq, minor); + nfsmerr_if(error); + VATTR_SET(vap, va_rdev, makedev(major, minor)); + } + nfsmerr_if(error); /* - * Iff doesn't exist, create it. + * If it doesn't exist, create it. */ - if (nd.ni_vp) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; + if (vp) { error = EEXIST; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); goto out; } - vap->va_type = vtyp; - if (vtyp == VSOCK) { - vrele(nd.ni_startdir); - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); - if (!error) - FREE_ZONE(nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; + VATTR_SET(vap, va_type, vtyp); + + /* authorize before creating */ + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0); + + /* construct ACL and handle inheritance */ + if (!error) { + error = kauth_acl_inherit(dvp, + NULL, + &xacl, + 0 /* !isdir */, + ctx); + + if (!error && xacl != NULL) + VATTR_SET(vap, va_acl, xacl); + } + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + /* + * Server policy is to alway use the mapped rpc credential for + * file system object creation. This has the nice side effect of + * enforcing BSD creation semantics + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + + /* validate new-file security information */ + if (!error) + error = vnode_authattr_new(dvp, vap, 0, ctx); + + if (error) + goto out1; + + if (vtyp == VSOCK) { + error = VNOP_CREATE(dvp, &vp, &ni.ni_cnd, vap, ctx); + + if (!error && !VATTR_ALL_SUPPORTED(vap)) + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + error = vnode_setattr_fallback(vp, vap, ctx); } else { - if (vtyp != VFIFO && (error = suser(cred, (u_short *)0))) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - vput(nd.ni_dvp); - goto out; + if (vtyp != VFIFO && (error = suser(nd->nd_cr, (u_short *)0))) + goto out1; + if ((error = VNOP_MKNOD(dvp, &vp, &ni.ni_cnd, vap, ctx))) + goto out1; + if (vp) { + vnode_recycle(vp); + vnode_put(vp); + vp = NULL; } - nqsrv_getl(nd.ni_dvp, ND_WRITE); - if ((error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))) { - vrele(nd.ni_startdir); - goto out; + ni.ni_cnd.cn_nameiop = LOOKUP; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LOOKUP; +#endif + ni.ni_cnd.cn_flags &= ~LOCKPARENT; + ni.ni_cnd.cn_context = vfs_context_current(); + ni.ni_startdir = dvp; + ni.ni_usedvp = dvp; + cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(&ni)) == ERECYCLE) { + ni.ni_cnd.cn_flags = cnflags; + ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf; + ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp; } - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); - nd.ni_cnd.cn_proc = procp; - nd.ni_cnd.cn_cred = procp->p_ucred; - error = lookup(&nd); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - if (error) - goto out; - if (nd.ni_cnd.cn_flags & ISSYMLINK) { - vrele(nd.ni_dvp); - vput(nd.ni_vp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - error = EINVAL; + if (!error) { + vp = ni.ni_vp; + if (ni.ni_cnd.cn_flags & ISSYMLINK) + error = EINVAL; } } +out1: + if (xacl != NULL) + kauth_acl_free(xacl); out: - vp = nd.ni_vp; + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + ni.ni_cnd.cn_nameiop = 0; + + vnode_put(dvp); + dvp = NULL; + if (!error) { - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + error = nfsrv_vptofh(nx, NFS_VER3, NULL, vp, ctx, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postattr, NFS_VER3); + postattrerr = vnode_getattr(vp, &postattr, ctx); + } } - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); - nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); - if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + if (vp) { + vnode_put(vp); + vp = NULL; } - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - return (0); + + nfsm_srv_vattr_init(&dpostattr, NFS_VER3); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); + dirp = NULL; + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(NFS_VER3, &nfh) + + NFSX_POSTOPATTR(NFS_VER3) + NFSX_WCCDATA(NFS_VER3)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (!nd->nd_repstat) { + nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr); + } + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (ni.ni_cnd.cn_nameiop) { + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + + if (vp) + vnode_put(vp); + vnode_put(dvp); + } + if (dvp) + vnode_put(dvp); + if (vp) + vnode_put(vp); if (dirp) - vrele(dirp); - if (nd.ni_cnd.cn_nameiop) { - vrele(nd.ni_startdir); - FREE_ZONE((caddr_t)nd.ni_cnd.cn_pnbuf, - nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - } - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vput(nd.ni_vp); + vnode_put(dirp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } return (error); } @@ -1836,292 +2368,746 @@ nfsmout: * nfs remove service */ int -nfsrv_remove(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_remove( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct nameidata nd; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mreq; - struct vnode *vp, *dirp; - struct vattr dirfor, diraft; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - -#ifndef nolint - vp = (struct vnode *)0; + struct nameidata ni; + int error, dpreattrerr, dpostattrerr; + uint32_t len = 0; + uid_t saved_uid; + vnode_t vp, dvp, dirp = NULL; + struct vnode_attr dpreattr, dpostattr; + struct nfs_filehandle nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + dpreattrerr = dpostattrerr = ENOENT; + saved_uid = kauth_cred_getuid(nd->nd_cr); + dvp = vp = dirp = NULL; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = DELETE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_UNLINK; #endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = DELETE; - nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); + } + } if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else - vrele(dirp); + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } else { + vnode_put(dirp); + dirp = NULL; + } } + if (!error) { - vp = nd.ni_vp; - if (vp->v_type == VDIR) { + dvp = ni.ni_dvp; + vp = ni.ni_vp; + + if (vnode_vtype(vp) == VDIR) error = EPERM; /* POSIX */ - goto out; - } - /* - * The root of a mounted filesystem cannot be deleted. - */ - if (vp->v_flag & VROOT) { + else if (vnode_isvroot(vp)) + /* + * The root of a mounted filesystem cannot be deleted. + */ error = EBUSY; - goto out; - } -out: + else + error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx, nxo, 0); + if (!error) { - nqsrv_getl(nd.ni_dvp, ND_WRITE); - nqsrv_getl(vp, ND_WRITE); +#if CONFIG_FSE + char *path = NULL; + int plen; + fse_info finfo; + + if (nfsrv_fsevents_enabled && need_fsevent(FSE_DELETE, dvp)) { + plen = MAXPATHLEN; + if ((path = get_pathbuff()) && !vn_getpath(vp, path, &plen)) { + get_fse_info(vp, &finfo, ctx); + } else if (path) { + release_pathbuff(path); + path = NULL; + } + } +#endif + error = VNOP_REMOVE(dvp, vp, &ni.ni_cnd, 0, ctx); + +#if CONFIG_FSE + if (path) { + if (!error) + add_fsevent(FSE_DELETE, ctx, + FSE_ARG_STRING, plen, path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + release_pathbuff(path); + } +#endif + } - error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); - } + vnode_put(vp); + vnode_put(dvp); } - if (dirp && v3) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + +nfsmerr: + if (dirp) { + nfsm_srv_vattr_init(&dpostattr, nd->nd_vers); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); } - nfsm_reply(NFSX_WCCDATA(v3)); - if (v3) { - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - return (0); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nfsm_srvdone; + return (error); } /* * nfs rename service */ int -nfsrv_rename(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_rename( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; - int tdirfor_ret = 1, tdiraft_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mreq; - struct nameidata fromnd, tond; - struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; - struct vnode *tdirp = (struct vnode *)0; - struct vattr fdirfor, fdiraft, tdirfor, tdiraft; - nfsfh_t fnfh, tnfh; - fhandle_t *ffhp, *tfhp; - u_quad_t frev; + kauth_cred_t saved_cred = NULL; uid_t saved_uid; - -#ifndef nolint - fvp = (struct vnode *)0; + int error; + uint32_t fromlen, tolen; + int fdpreattrerr, fdpostattrerr; + int tdpreattrerr, tdpostattrerr; + char *frompath = NULL, *topath = NULL; + struct nameidata fromni, toni; + vnode_t fvp, tvp, tdvp, fdvp, fdirp, tdirp; + struct vnode_attr fdpreattr, fdpostattr; + struct vnode_attr tdpreattr, tdpostattr; + struct nfs_filehandle fnfh, tnfh; + struct nfs_export *fnx, *tnx; + struct nfs_export_options *fnxo, *tnxo; + enum vtype fvtype, tvtype; + int holding_mntlock; + mount_t locked_mp; + struct nfsm_chain *nmreq, nmrep; + char *from_name, *to_name; +#if CONFIG_FSE + int from_len=0, to_len=0; + fse_info from_finfo, to_finfo; #endif - ffhp = &fnfh.fh_generic; - tfhp = &tnfh.fh_generic; - fromnd.ni_cnd.cn_nameiop = 0; - tond.ni_cnd.cn_nameiop = 0; - nfsm_srvmtofh(ffhp); - nfsm_srvnamesiz(len); + u_char didstats = 0; + const char *oname; + + error = 0; + fdpreattrerr = fdpostattrerr = ENOENT; + tdpreattrerr = tdpostattrerr = ENOENT; + saved_uid = kauth_cred_getuid(nd->nd_cr); + fromlen = tolen = 0; + frompath = topath = NULL; + fdirp = tdirp = NULL; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + + /* + * these need to be set before calling any code + * that they may take us out through the error path. + */ + holding_mntlock = 0; + fvp = tvp = NULL; + fdvp = tdvp = NULL; + locked_mp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, fnfh.nfh_fhp, fnfh.nfh_len); + nfsm_chain_get_32(error, nmreq, fromlen); + nfsm_name_len_check(error, nd, fromlen); + nfsmerr_if(error); + error = nfsm_chain_get_path_namei(nmreq, fromlen, &fromni); + nfsmerr_if(error); + frompath = fromni.ni_cnd.cn_pnbuf; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, tnfh.nfh_fhp, tnfh.nfh_len); + nfsm_chain_get_32(error, nmreq, tolen); + nfsm_name_len_check(error, nd, tolen); + nfsmerr_if(error); + error = nfsm_chain_get_path_namei(nmreq, tolen, &toni); + nfsmerr_if(error); + topath = toni.ni_cnd.cn_pnbuf; + /* * Remember our original uid so that we can reset cr_uid before - * the second nfs_namei() call, in case it is remapped. + * the second nfsrv_namei() call, in case it is remapped. */ - saved_uid = cred->cr_uid; - fromnd.ni_cnd.cn_cred = cred; - fromnd.ni_cnd.cn_nameiop = DELETE; - fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; - error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, - &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + saved_cred = nd->nd_cr; + kauth_cred_ref(saved_cred); +retry: + fromni.ni_cnd.cn_nameiop = DELETE; +#if CONFIG_TRIGGERS + fromni.ni_op = OP_UNLINK; +#endif + fromni.ni_cnd.cn_flags = WANTPARENT; + + fromni.ni_cnd.cn_pnbuf = frompath; + frompath = NULL; + fromni.ni_cnd.cn_pnlen = MAXPATHLEN; + fromni.ni_cnd.cn_flags |= HASBUF; + + error = nfsrv_namei(nd, ctx, &fromni, &fnfh, &fdirp, &fnx, &fnxo); + if (error) + goto out; + fdvp = fromni.ni_dvp; + fvp = fromni.ni_vp; + if (fdirp) { - if (v3) - fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, - procp); - else { - vrele(fdirp); - fdirp = (struct vnode *)0; + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&fdpreattr); + fdpreattrerr = vnode_getattr(fdirp, &fdpreattr, ctx); + } else { + vnode_put(fdirp); + fdirp = NULL; } } + fvtype = vnode_vtype(fvp); + + /* reset credential if it was remapped */ + if (nd->nd_cr != saved_cred) { + kauth_cred_ref(saved_cred); + kauth_cred_unref(&nd->nd_cr); + ctx->vc_ucred = nd->nd_cr = saved_cred; + } + + toni.ni_cnd.cn_nameiop = RENAME; +#if CONFIG_TRIGGERS + toni.ni_op = OP_RENAME; +#endif + toni.ni_cnd.cn_flags = WANTPARENT; + + toni.ni_cnd.cn_pnbuf = topath; + topath = NULL; + toni.ni_cnd.cn_pnlen = MAXPATHLEN; + toni.ni_cnd.cn_flags |= HASBUF; + + if (fvtype == VDIR) + toni.ni_cnd.cn_flags |= WILLBEDIR; + + tnx = NULL; + error = nfsrv_namei(nd, ctx, &toni, &tnfh, &tdirp, &tnx, &tnxo); if (error) { - nfsm_reply(2 * NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); - nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); - if (fdirp) - vrele(fdirp); - return (0); + /* + * Translate error code for rename("dir1", "dir2/."). + */ + if (error == EISDIR && fvtype == VDIR) { + if (nd->nd_vers == NFS_VER3) + error = EINVAL; + else + error = ENOTEMPTY; + } + goto out; } - fvp = fromnd.ni_vp; - nfsm_srvmtofh(tfhp); - nfsm_strsiz(len2, NFS_MAXNAMLEN); - cred->cr_uid = saved_uid; - tond.ni_cnd.cn_cred = cred; - tond.ni_cnd.cn_nameiop = RENAME; - tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; - error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, - &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); - if (tdirp) { - if (v3) - tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, - procp); - else { - vrele(tdirp); - tdirp = (struct vnode *)0; + tdvp = toni.ni_dvp; + tvp = toni.ni_vp; + + if (!didstats) { + /* update export stats once only */ + if (tnx != NULL) { + /* update export stats */ + NFSStatAdd64(&tnx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(tnx, nd, saved_uid, 1, 0, 0); + didstats = 1; } } - if (error) { - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out1; + + if (tdirp) { + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&tdpreattr); + tdpreattrerr = vnode_getattr(tdirp, &tdpreattr, ctx); + } else { + vnode_put(tdirp); + tdirp = NULL; + } } - tdvp = tond.ni_dvp; - tvp = tond.ni_vp; + if (tvp != NULL) { - if (fvp->v_type == VDIR && tvp->v_type != VDIR) { - if (v3) + tvtype = vnode_vtype(tvp); + + if (fvtype == VDIR && tvtype != VDIR) { + if (nd->nd_vers == NFS_VER3) error = EEXIST; else error = EISDIR; goto out; - } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { - if (v3) + } else if (fvtype != VDIR && tvtype == VDIR) { + if (nd->nd_vers == NFS_VER3) error = EEXIST; else error = ENOTDIR; goto out; } - if (tvp->v_type == VDIR && tvp->v_mountedhere) { - if (v3) + if (tvtype == VDIR && vnode_mountedhere(tvp)) { + if (nd->nd_vers == NFS_VER3) error = EXDEV; else error = ENOTEMPTY; goto out; } } - if (fvp->v_type == VDIR && fvp->v_mountedhere) { - if (v3) + if (fvp == tdvp) { + if (nd->nd_vers == NFS_VER3) + error = EINVAL; + else + error = ENOTEMPTY; + goto out; + } + + /* + * Authorization. + * + * If tvp is a directory and not the same as fdvp, or tdvp is not the same as fdvp, + * the node is moving between directories and we need rights to remove from the + * old and add to the new. + * + * If tvp already exists and is not a directory, we need to be allowed to delete it. + * + * Note that we do not inherit when renaming. XXX this needs to be revisited to + * implement the deferred-inherit bit. + */ + { + int moving = 0; + + error = 0; + if ((tvp != NULL) && vnode_isdir(tvp)) { + if (tvp != fdvp) + moving = 1; + } else if (tdvp != fdvp) { + moving = 1; + } + if (moving) { + /* moving out of fdvp, must have delete rights */ + if ((error = nfsrv_authorize(fvp, fdvp, KAUTH_VNODE_DELETE, ctx, fnxo, 0)) != 0) + goto auth_exit; + /* moving into tdvp or tvp, must have rights to add */ + if ((error = nfsrv_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp, + NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, + ctx, tnxo, 0)) != 0) + goto auth_exit; + } else { + /* node staying in same directory, must be allowed to add new name */ + if ((error = nfsrv_authorize(fdvp, NULL, + vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE, + ctx, fnxo, 0)) != 0) + goto auth_exit; + } + /* overwriting tvp */ + if ((tvp != NULL) && !vnode_isdir(tvp) && + ((error = nfsrv_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx, tnxo, 0)) != 0)) + goto auth_exit; + + /* XXX more checks? */ + +auth_exit: + /* authorization denied */ + if (error != 0) + goto out; + } + + if ((vnode_mount(fvp) != vnode_mount(tdvp)) || + (tvp && (vnode_mount(fvp) != vnode_mount(tvp)))) { + if (nd->nd_vers == NFS_VER3) error = EXDEV; else error = ENOTEMPTY; goto out; } - if (fvp->v_mount != tdvp->v_mount) { - if (v3) + /* + * The following edge case is caught here: + * (to cannot be a descendent of from) + * + * o fdvp + * / + * / + * o fvp + * \ + * \ + * o tdvp + * / + * / + * o tvp + */ + if (tdvp->v_parent == fvp) { + if (nd->nd_vers == NFS_VER3) error = EXDEV; else error = ENOTEMPTY; goto out; } - if (fvp == tdvp) - if (v3) - error = EINVAL; + if (fvtype == VDIR && vnode_mountedhere(fvp)) { + if (nd->nd_vers == NFS_VER3) + error = EXDEV; else error = ENOTEMPTY; + goto out; + } /* * If source is the same as the destination (that is the - * same vnode) then there is nothing to do. - * (fixed to have POSIX semantics - CSM 3/2/98) + * same vnode) then there is nothing to do... + * EXCEPT if the underlying file system supports case + * insensitivity and is case preserving. In this case + * the file system needs to handle the special case of + * getting the same vnode as target (fvp) and source (tvp). + * + * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE + * and _PC_CASE_PRESERVING can have this exception, and they need to + * handle the special case of getting the same vnode as target and + * source. NOTE: Then the target is unlocked going into vnop_rename, + * so not to cause locking problems. There is a single reference on tvp. + * + * NOTE - that fvp == tvp also occurs if they are hard linked - NOTE + * that correct behaviour then is just to remove the source (link) */ - if (fvp == tvp) - error = -1; -out: - if (!error) { - nqsrv_getl(fromnd.ni_dvp, ND_WRITE); - nqsrv_getl(tdvp, ND_WRITE); - if (tvp) - nqsrv_getl(tvp, ND_WRITE); - error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, - tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); + if ((fvp == tvp) && (fdvp == tdvp)) { + if (fromni.ni_cnd.cn_namelen == toni.ni_cnd.cn_namelen && + !bcmp(fromni.ni_cnd.cn_nameptr, toni.ni_cnd.cn_nameptr, + fromni.ni_cnd.cn_namelen)) { + goto out; + } + } + + if (holding_mntlock && vnode_mount(fvp) != locked_mp) { + /* + * we're holding a reference and lock + * on locked_mp, but it no longer matches + * what we want to do... so drop our hold + */ + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp != fdvp && fvtype == VDIR) { + /* + * serialize renames that re-shape + * the tree... if holding_mntlock is + * set, then we're ready to go... + * otherwise we + * first need to drop the iocounts + * we picked up, second take the + * lock to serialize the access, + * then finally start the lookup + * process over with the lock held + */ + if (!holding_mntlock) { + /* + * need to grab a reference on + * the mount point before we + * drop all the iocounts... once + * the iocounts are gone, the mount + * could follow + */ + locked_mp = vnode_mount(fvp); + mount_ref(locked_mp, 0); + + /* make a copy of to path to pass to nfsrv_namei() again */ + MALLOC_ZONE(topath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (topath) + bcopy(toni.ni_cnd.cn_pnbuf, topath, tolen + 1); + + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&toni); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + + /* make a copy of from path to pass to nfsrv_namei() again */ + MALLOC_ZONE(frompath, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); + if (frompath) + bcopy(fromni.ni_cnd.cn_pnbuf, frompath, fromlen + 1); + + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromni); + + vnode_put(fvp); + vnode_put(fdvp); + + if (fdirp) { + vnode_put(fdirp); + fdirp = NULL; + } + if (tdirp) { + vnode_put(tdirp); + tdirp = NULL; + } + mount_lock_renames(locked_mp); + holding_mntlock = 1; + + fvp = tvp = NULL; + fdvp = tdvp = NULL; + + fdpreattrerr = tdpreattrerr = ENOENT; + + if (!topath || !frompath) { + /* we couldn't allocate a path, so bail */ + error = ENOMEM; + goto out; + } + + /* reset credential if it was remapped */ + if (nd->nd_cr != saved_cred) { + kauth_cred_ref(saved_cred); + kauth_cred_unref(&nd->nd_cr); + ctx->vc_ucred = nd->nd_cr = saved_cred; + } + + goto retry; + } } else { - VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); - if (tdvp == tvp) - vrele(tdvp); - else - vput(tdvp); + /* + * when we dropped the iocounts to take + * the lock, we allowed the identity of + * the various vnodes to change... if they did, + * we may no longer be dealing with a rename + * that reshapes the tree... once we're holding + * the iocounts, the vnodes can't change type + * so we're free to drop the lock at this point + * and continue on + */ + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + } + + // save these off so we can later verify that fvp is the same + vnode_t oparent; + oname = fvp->v_name; + oparent = fvp->v_parent; + + /* + * If generating an fsevent, then + * stash any pre-rename info we may need. + */ +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && need_fsevent(FSE_RENAME, fvp)) { + int from_truncated = 0, to_truncated = 0; + + get_fse_info(fvp, &from_finfo, ctx); if (tvp) - vput(tvp); - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); - if (error == -1) - error = 0; + get_fse_info(tvp, &to_finfo, ctx); + + from_name = get_pathbuff(); + if (from_name) { + from_len = safe_getpath(fdvp, fromni.ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated); + } + + to_name = from_name ? get_pathbuff() : NULL; + if (to_name) { + to_len = safe_getpath(tdvp, toni.ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated); + } + + if (from_truncated || to_truncated) { + from_finfo.mode |= FSE_TRUNCATED_PATH; + } + + } else { + from_name = NULL; + to_name = NULL; + } +#else /* CONFIG_FSE */ + from_name = NULL; + to_name = NULL; +#endif /* CONFIG_FSE */ + + error = VNOP_RENAME(fromni.ni_dvp, fromni.ni_vp, &fromni.ni_cnd, + toni.ni_dvp, toni.ni_vp, &toni.ni_cnd, ctx); + /* + * fix up name & parent pointers. note that we first + * check that fvp has the same name/parent pointers it + * had before the rename call... this is a 'weak' check + * at best... + */ + if (oname == fvp->v_name && oparent == fvp->v_parent) { + int update_flags; + update_flags = VNODE_UPDATE_NAME; + if (fdvp != tdvp) + update_flags |= VNODE_UPDATE_PARENT; + vnode_update_identity(fvp, tdvp, toni.ni_cnd.cn_nameptr, + toni.ni_cnd.cn_namelen, toni.ni_cnd.cn_hash, update_flags); + } + + /* + * If the rename is OK and we've got the paths + * then add an fsevent. + */ +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && !error && from_name && to_name) { + if (tvp) { + add_fsevent(FSE_RENAME, ctx, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_FINFO, &to_finfo, + FSE_ARG_DONE); + } else { + add_fsevent(FSE_RENAME, ctx, + FSE_ARG_STRING, from_len, from_name, + FSE_ARG_FINFO, &from_finfo, + FSE_ARG_STRING, to_len, to_name, + FSE_ARG_DONE); + } + } + if (from_name) + release_pathbuff(from_name); + if (to_name) + release_pathbuff(to_name); +#endif /* CONFIG_FSE */ + from_name = to_name = NULL; + +out: + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + holding_mntlock = 0; + } + if (tdvp) { + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&toni); + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + + tdvp = NULL; + } + if (fdvp) { + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromni); + + if (fvp) + vnode_put(fvp); + vnode_put(fdvp); + + fdvp = NULL; } - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); - tond.ni_cnd.cn_flags &= ~HASBUF; -out1: if (fdirp) { - fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); - vrele(fdirp); + nfsm_srv_vattr_init(&fdpostattr, nd->nd_vers); + fdpostattrerr = vnode_getattr(fdirp, &fdpostattr, ctx); + vnode_put(fdirp); + fdirp = NULL; } if (tdirp) { - tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); - vrele(tdirp); - } - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, fromnd.ni_cnd.cn_pnlen, M_NAMEI); - fromnd.ni_cnd.cn_flags &= ~HASBUF; - nfsm_reply(2 * NFSX_WCCDATA(v3)); - if (v3) { - nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); - nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); + nfsm_srv_vattr_init(&tdpostattr, nd->nd_vers); + tdpostattrerr = vnode_getattr(tdirp, &tdpostattr, ctx); + vnode_put(tdirp); + tdirp = NULL; } - return (0); +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, 2 * NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_wcc_data(error, nd, &nmrep, + fdpreattrerr, &fdpreattr, fdpostattrerr, &fdpostattr); + nfsm_chain_add_wcc_data(error, nd, &nmrep, + tdpreattrerr, &tdpreattr, tdpostattrerr, &tdpostattr); + } nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (holding_mntlock) { + mount_unlock_renames(locked_mp); + mount_drop(locked_mp, 0); + } + if (tdvp) { + /* + * nameidone has to happen before we vnode_put(tdvp) + * since it may need to release the fs_nodelock on the tdvp + */ + nameidone(&toni); + + if (tvp) + vnode_put(tvp); + vnode_put(tdvp); + } + if (fdvp) { + /* + * nameidone has to happen before we vnode_put(fdvp) + * since it may need to release the fs_nodelock on the fdvp + */ + nameidone(&fromni); + + if (fvp) + vnode_put(fvp); + vnode_put(fdvp); + } if (fdirp) - vrele(fdirp); + vnode_put(fdirp); if (tdirp) - vrele(tdirp); - if (tond.ni_cnd.cn_nameiop) { - vrele(tond.ni_startdir); - FREE_ZONE(tond.ni_cnd.cn_pnbuf, tond.ni_cnd.cn_pnlen, M_NAMEI); - tond.ni_cnd.cn_flags &= ~HASBUF; - } - if (fromnd.ni_cnd.cn_nameiop) { - vrele(fromnd.ni_startdir); - FREE_ZONE(fromnd.ni_cnd.cn_pnbuf, - fromnd.ni_cnd.cn_pnlen, M_NAMEI); - fromnd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - vrele(fromnd.ni_dvp); - vrele(fvp); + vnode_put(tdirp); + if (frompath) + FREE_ZONE(frompath, MAXPATHLEN, M_NAMEI); + if (topath) + FREE_ZONE(topath, MAXPATHLEN, M_NAMEI); + if (saved_cred) + kauth_cred_unref(&saved_cred); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } return (error); } @@ -2130,360 +3116,591 @@ nfsmout: * nfs link service */ int -nfsrv_link(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_link( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct nameidata nd; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; - int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mreq; - struct vnode *vp, *xp, *dirp = (struct vnode *)0; - struct vattr dirfor, diraft, at; - nfsfh_t nfh, dnfh; - fhandle_t *fhp, *dfhp; - u_quad_t frev; - - fhp = &nfh.fh_generic; - dfhp = &dnfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvmtofh(dfhp); - nfsm_srvnamesiz(len); - if ((error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); - nfsm_srvpostop_attr(getret, &at); - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - return (0); - } - if (vp->v_type == VDIR) { + struct nameidata ni; + int error, dpreattrerr, dpostattrerr, attrerr; + uint32_t len = 0; + vnode_t vp, xp, dvp, dirp; + struct vnode_attr dpreattr, dpostattr, attr; + struct nfs_filehandle nfh, dnfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + dpreattrerr = dpostattrerr = attrerr = ENOENT; + vp = xp = dvp = dirp = NULL; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, dnfh.nfh_fhp, dnfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + /* we're not allowed to link to directories... */ + if (vnode_vtype(vp) == VDIR) { error = EPERM; /* POSIX */ - goto out1; + goto out; } - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT; - error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + + /* ...or to anything that kauth doesn't want us to (eg. immutable items) */ + if ((error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx, nxo, 0)) != 0) + goto out; + + ni.ni_cnd.cn_nameiop = CREATE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LINK; +#endif + ni.ni_cnd.cn_flags = LOCKPARENT; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) + error = nfsrv_namei(nd, ctx, &ni, &dnfh, &dirp, &nx, &nxo); if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } else { + vnode_put(dirp); + dirp = NULL; } } if (error) - goto out1; - xp = nd.ni_vp; - if (xp != NULL) { - error = EEXIST; goto out; - } - xp = nd.ni_dvp; - if (vp->v_mount != xp->v_mount) + dvp = ni.ni_dvp; + xp = ni.ni_vp; + + if (xp != NULL) + error = EEXIST; + else if (vnode_mount(vp) != vnode_mount(dvp)) error = EXDEV; + else + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0); + + if (!error) + error = VNOP_LINK(vp, dvp, &ni.ni_cnd, ctx); + +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && !error && need_fsevent(FSE_CREATE_FILE, dvp)) { + char *target_path = NULL; + int plen, truncated=0; + fse_info finfo; + + /* build the path to the new link file */ + target_path = get_pathbuff(); + if (target_path) { + plen = safe_getpath(dvp, ni.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated); + + if (get_fse_info(vp, &finfo, ctx) == 0) { + if (truncated) { + finfo.mode |= FSE_TRUNCATED_PATH; + } + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_STRING, plen, target_path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + } + + release_pathbuff(target_path); + } + } +#endif + + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + + if (xp) + vnode_put(xp); + vnode_put(dvp); out: - if (!error) { - nqsrv_getl(vp, ND_WRITE); - nqsrv_getl(xp, ND_WRITE); - error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_vattr_init(&attr, NFS_VER3); + attrerr = vnode_getattr(vp, &attr, ctx); } -out1: - if (v3) - getret = VOP_GETATTR(vp, &at, cred, procp); if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); - } - vrele(vp); - nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); - if (v3) { - nfsm_srvpostop_attr(getret, &at); - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - return (0); + nfsm_srv_vattr_init(&dpostattr, nd->nd_vers); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); + dirp = NULL; } - nfsm_srvdone; + vnode_put(vp); + vp = NULL; + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); + } +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (vp) + vnode_put(vp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* * nfs symbolic link service */ int -nfsrv_symlink(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_symlink( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, dirfor, diraft; - struct nameidata nd; - register struct vattr *vap = &va; - register u_long *tl; - register long t1; - struct nfsv2_sattr *sp; - char *bpos, *pathcp = (char *)0, *cp2; - struct uio io; - struct iovec iv; - int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3); - struct mbuf *mb, *mreq, *mb2; - struct vnode *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - - nd.ni_cnd.cn_nameiop = 0; - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + struct vnode_attr dpreattr, dpostattr, postattr; + struct vnode_attr va, *vap = &va; + struct nameidata ni; + int error, dpreattrerr, dpostattrerr, postattrerr; + uint32_t len = 0, linkdatalen, cnflags; + uid_t saved_uid; + char *linkdata; + vnode_t vp, dvp, dirp; + struct nfs_filehandle nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + uio_t auio = NULL; + char uio_buf[ UIO_SIZEOF(1) ]; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + dpreattrerr = dpostattrerr = postattrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + linkdata = NULL; + dirp = NULL; + + saved_uid = kauth_cred_getuid(nd->nd_cr); + + ni.ni_cnd.cn_nameiop = 0; + vp = dvp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = CREATE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LINK; +#endif + ni.ni_cnd.cn_flags = LOCKPARENT; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); + } + } if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } else { + vnode_put(dirp); + dirp = NULL; } } - if (error) + if (error) { + ni.ni_cnd.cn_nameiop = 0; + goto out1; + } + dvp = ni.ni_dvp; + vp = ni.ni_vp; + + VATTR_INIT(vap); + if (nd->nd_vers == NFS_VER3) + error = nfsm_chain_get_sattr(nd, nmreq, vap); + nfsm_chain_get_32(error, nmreq, linkdatalen); + if (!error && (((nd->nd_vers == NFS_VER2) && (linkdatalen > NFS_MAXPATHLEN)) || + ((nd->nd_vers == NFS_VER3) && (linkdatalen > MAXPATHLEN)))) + error = NFSERR_NAMETOL; + nfsmerr_if(error); + MALLOC(linkdata, caddr_t, linkdatalen + 1, M_TEMP, M_WAITOK); + if (linkdata) + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!linkdata || !auio) { + error = ENOMEM; goto out; - VATTR_NULL(vap); - if (v3) - nfsm_srvsattr(vap); - nfsm_strsiz(len2, NFS_MAXPATHLEN); - MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); - iv.iov_base = pathcp; - iv.iov_len = len2; - io.uio_resid = len2; - io.uio_offset = 0; - io.uio_iov = &iv; - io.uio_iovcnt = 1; - io.uio_segflg = UIO_SYSSPACE; - io.uio_rw = UIO_READ; - io.uio_procp = (struct proc *)0; - nfsm_mtouio(&io, len2); - if (!v3) { - nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); - vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode); - } - *(pathcp + len2) = '\0'; - if (nd.ni_vp) { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(nd.ni_vp); + } + uio_addiov(auio, CAST_USER_ADDR_T(linkdata), linkdatalen); + error = nfsm_chain_get_uio(nmreq, linkdatalen, auio); + if (!error && (nd->nd_vers == NFS_VER2)) + error = nfsm_chain_get_sattr(nd, nmreq, vap); + nfsmerr_if(error); + *(linkdata + linkdatalen) = '\0'; + if (vp) { error = EEXIST; goto out; } - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); - if (error) - vrele(nd.ni_startdir); - else { - if (v3) { - nd.ni_cnd.cn_nameiop = LOOKUP; - nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART | FOLLOW); - nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); - nd.ni_cnd.cn_proc = procp; - nd.ni_cnd.cn_cred = cred; - error = lookup(&nd); - if (!error) { - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); + + VATTR_SET(vap, va_type, VLNK); + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + /* + * Server policy is to alway use the mapped rpc credential for + * file system object creation. This has the nice side effect of + * enforcing BSD creation semantics + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + + /* authorize before creating */ + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx, nxo, 0); + + /* validate given attributes */ + if (!error) + error = vnode_authattr_new(dvp, vap, 0, ctx); + + if (!error) + error = VNOP_SYMLINK(dvp, &vp, &ni.ni_cnd, vap, linkdata, ctx); + + if (!error && (nd->nd_vers == NFS_VER3)) { + if (vp == NULL) { + ni.ni_cnd.cn_nameiop = LOOKUP; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LOOKUP; +#endif + ni.ni_cnd.cn_flags &= ~(LOCKPARENT | FOLLOW); + ni.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); + ni.ni_cnd.cn_context = ctx; + ni.ni_startdir = dvp; + ni.ni_usedvp = dvp; + cnflags = ni.ni_cnd.cn_flags; /* store in case we have to restore */ + while ((error = lookup(&ni)) == ERECYCLE) { + ni.ni_cnd.cn_flags = cnflags; + ni.ni_cnd.cn_nameptr = ni.ni_cnd.cn_pnbuf; + ni.ni_usedvp = ni.ni_dvp = ni.ni_startdir = dvp; + } if (!error) - error = VOP_GETATTR(nd.ni_vp, vap, cred, - procp); - vput(nd.ni_vp); + vp = ni.ni_vp; + } + if (!error) { + error = nfsrv_vptofh(nx, NFS_VER3, NULL, vp, ctx, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postattr, NFS_VER3); + postattrerr = vnode_getattr(vp, &postattr, ctx); + } } - } else - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; } + +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && !error && vp) { + add_fsevent(FSE_CREATE_FILE, ctx, + FSE_ARG_VNODE, vp, + FSE_ARG_DONE); + } +#endif out: - if (pathcp) - FREE(pathcp, M_TEMP); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + ni.ni_cnd.cn_nameiop = 0; + if (vp) + vnode_put(vp); + vnode_put(dvp); +out1: + if (linkdata) { + FREE(linkdata, M_TEMP); + linkdata = NULL; + } if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&dpostattr, nd->nd_vers); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); + dirp = NULL; } - nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); - if (v3) { - if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) + + NFSX_POSTOPATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + if (!nd->nd_repstat) { + nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr); } - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); } - return (0); nfsmout: - if (nd.ni_cnd.cn_nameiop) { - vrele(nd.ni_startdir); - FREE_ZONE(nd.ni_cnd.cn_pnbuf, nd.ni_cnd.cn_pnlen, M_NAMEI); - nd.ni_cnd.cn_flags &= ~HASBUF; + nfsm_chain_build_done(error, &nmrep); + if (ni.ni_cnd.cn_nameiop) { + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + + if (vp) + vnode_put(vp); + vnode_put(dvp); } if (dirp) - vrele(dirp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); - if (pathcp) - FREE(pathcp, M_TEMP); + vnode_put(dirp); + if (linkdata) + FREE(linkdata, M_TEMP); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } return (error); } /* * nfs mkdir service */ + int -nfsrv_mkdir(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_mkdir( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr va, dirfor, diraft; - register struct vattr *vap = &va; - register struct nfs_fattr *fp; - struct nameidata nd; - register caddr_t cp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = CREATE; - nd.ni_cnd.cn_flags = LOCKPARENT; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); + struct vnode_attr dpreattr, dpostattr, postattr; + struct vnode_attr va, *vap = &va; + struct nameidata ni; + int error, dpreattrerr, dpostattrerr, postattrerr; + uint32_t len = 0; + vnode_t vp, dvp, dirp; + struct nfs_filehandle nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + uid_t saved_uid; + kauth_acl_t xacl = NULL; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + dpreattrerr = dpostattrerr = postattrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + + saved_uid = kauth_cred_getuid(nd->nd_cr); + + ni.ni_cnd.cn_nameiop = 0; + vp = dvp = dirp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = CREATE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_LINK; +#endif + ni.ni_cnd.cn_flags = LOCKPARENT; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); + } + } if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } else { + vnode_put(dirp); + dirp = NULL; } } if (error) { - nfsm_reply(NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - if (dirp) - vrele(dirp); - return (0); - } - VATTR_NULL(vap); - if (v3) { - nfsm_srvsattr(vap); - } else { - nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); - vap->va_mode = nfstov_mode(*tl++); + ni.ni_cnd.cn_nameiop = 0; + goto nfsmerr; } - vap->va_type = VDIR; - vp = nd.ni_vp; + dvp = ni.ni_dvp; + vp = ni.ni_vp; + + VATTR_INIT(vap); + error = nfsm_chain_get_sattr(nd, nmreq, vap); + nfsmerr_if(error); + VATTR_SET(vap, va_type, VDIR); + if (vp != NULL) { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + vnode_put(dvp); + vnode_put(vp); error = EEXIST; goto out; } - nqsrv_getl(nd.ni_dvp, ND_WRITE); - error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + + error = nfsrv_authorize(dvp, NULL, KAUTH_VNODE_ADD_SUBDIRECTORY, ctx, nxo, 0); + + /* construct ACL and handle inheritance */ if (!error) { - vp = nd.ni_vp; - bzero((caddr_t)fhp, sizeof(nfh)); - fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; - error = VFS_VPTOFH(vp, &fhp->fh_fid); - if (!error) - error = VOP_GETATTR(vp, vap, cred, procp); - vput(vp); + error = kauth_acl_inherit(dvp, + NULL, + &xacl, /* isdir */ + 1, + ctx); + + if (!error && xacl != NULL) + VATTR_SET(vap, va_acl, xacl); } + + VATTR_CLEAR_ACTIVE(vap, va_data_size); + VATTR_CLEAR_ACTIVE(vap, va_access_time); + /* + * We don't support the S_ISGID bit for directories. Solaris and other + * SRV4 derived systems might set this to get BSD semantics, which we enforce + * any ways. + */ + if (VATTR_IS_ACTIVE(vap, va_mode)) + vap->va_mode &= ~S_ISGID; + /* + * Server policy is to alway use the mapped rpc credential for + * file system object creation. This has the nice side effect of + * enforcing BSD creation semantics + */ + VATTR_CLEAR_ACTIVE(vap, va_uid); + VATTR_CLEAR_ACTIVE(vap, va_gid); + + /* validate new-file security information */ + if (!error) + error = vnode_authattr_new(dvp, vap, 0, ctx); + /* + * vnode_authattr_new can return errors other than EPERM, but that's not going to + * sit well with our clients so we map all errors to EPERM. + */ + if (error) + error = EPERM; + + if (!error) + error = VNOP_MKDIR(dvp, &vp, &ni.ni_cnd, vap, ctx); + +#if CONFIG_FSE + if (nfsrv_fsevents_enabled && !error) + add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); +#endif + + if (!error && !VATTR_ALL_SUPPORTED(vap)) + /* + * If some of the requested attributes weren't handled by the VNOP, + * use our fallback code. + */ + error = vnode_setattr_fallback(vp, vap, ctx); + + if (xacl != NULL) + kauth_acl_free(xacl); + + if (!error) { + error = nfsrv_vptofh(nx, nd->nd_vers, NULL, vp, ctx, &nfh); + if (!error) { + nfsm_srv_vattr_init(&postattr, nd->nd_vers); + postattrerr = vnode_getattr(vp, &postattr, ctx); + if (nd->nd_vers == NFS_VER2) + error = postattrerr; + } + vnode_put(vp); + vp = NULL; + } + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + vnode_put(dvp); out: + ni.ni_cnd.cn_nameiop = 0; + if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&dpostattr, nd->nd_vers); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); + dirp = NULL; } - nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); - if (v3) { - if (!error) { - nfsm_srvpostop_fh(fhp); - nfsm_srvpostop_attr(0, vap); + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_SRVFH(nd->nd_vers, &nfh) + + NFSX_POSTOPATTR(nd->nd_vers) + NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + if (!nd->nd_repstat) { + nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_add_postop_attr(error, nd, &nmrep, postattrerr, &postattr); } - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); } else { - nfsm_srvfhtom(fhp, v3); - nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); - nfsm_srvfillattr(vap, fp); + nfsm_chain_add_fh(error, &nmrep, NFS_VER2, nfh.nfh_fhp, nfh.nfh_len); + if (!error) + error = nfsm_chain_add_fattr(nd, &nmrep, &postattr); } - return (0); nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (ni.ni_cnd.cn_nameiop) { + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + vnode_put(dvp); + if (vp) + vnode_put(vp); + } if (dirp) - vrele(dirp); - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (nd.ni_vp) - vrele(nd.ni_vp); + vnode_put(dirp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } return (error); } @@ -2491,113 +3708,169 @@ nfsmout: * nfs rmdir service */ int -nfsrv_rmdir(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_rmdir( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mreq; - struct vnode *vp, *dirp = (struct vnode *)0; - struct vattr dirfor, diraft; - nfsfh_t nfh; - fhandle_t *fhp; - struct nameidata nd; - u_quad_t frev; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_srvnamesiz(len); - nd.ni_cnd.cn_cred = cred; - nd.ni_cnd.cn_nameiop = DELETE; - nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; - error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, - &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); - if (dirp) { - if (v3) - dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, - procp); - else { - vrele(dirp); - dirp = (struct vnode *)0; + int error, dpreattrerr, dpostattrerr; + uint32_t len = 0; + uid_t saved_uid; + vnode_t vp, dvp, dirp; + struct vnode_attr dpreattr, dpostattr; + struct nfs_filehandle nfh; + struct nfs_export *nx = NULL; + struct nfs_export_options *nxo; + struct nameidata ni; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + dpreattrerr = dpostattrerr = ENOENT; + saved_uid = kauth_cred_getuid(nd->nd_cr); + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + + vp = dvp = dirp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_32(error, nmreq, len); + nfsm_name_len_check(error, nd, len); + nfsmerr_if(error); + + ni.ni_cnd.cn_nameiop = DELETE; +#if CONFIG_TRIGGERS + ni.ni_op = OP_UNLINK; +#endif + ni.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; + error = nfsm_chain_get_path_namei(nmreq, len, &ni); + if (!error) { + error = nfsrv_namei(nd, ctx, &ni, &nfh, &dirp, &nx, &nxo); + if (nx != NULL) { + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, saved_uid, 1, 0, 0); } } - if (error) { - nfsm_reply(NFSX_WCCDATA(v3)); - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - if (dirp) - vrele(dirp); - return (0); + if (dirp) { + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_pre_vattr_init(&dpreattr); + dpreattrerr = vnode_getattr(dirp, &dpreattr, ctx); + } else { + vnode_put(dirp); + dirp = NULL; + } } - vp = nd.ni_vp; - if (vp->v_type != VDIR) { + nfsmerr_if(error); + + dvp = ni.ni_dvp; + vp = ni.ni_vp; + + if (vnode_vtype(vp) != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ - if (nd.ni_dvp == vp) { + if (dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ - if (vp->v_flag & VROOT) + if (vnode_isvroot(vp)) error = EBUSY; -out: + if (!error) + error = nfsrv_authorize(vp, dvp, KAUTH_VNODE_DELETE, ctx, nxo, 0); if (!error) { - nqsrv_getl(nd.ni_dvp, ND_WRITE); - nqsrv_getl(vp, ND_WRITE); - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - } else { - VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); +#if CONFIG_FSE + char *path = NULL; + int plen; + fse_info finfo; + + if (nfsrv_fsevents_enabled && need_fsevent(FSE_DELETE, dvp)) { + plen = MAXPATHLEN; + if ((path = get_pathbuff()) && !vn_getpath(vp, path, &plen)) { + get_fse_info(vp, &finfo, ctx); + } else if (path) { + release_pathbuff(path); + path = NULL; + } + } +#endif /* CONFIG_FSE */ + + error = VNOP_RMDIR(dvp, vp, &ni.ni_cnd, ctx); + +#if CONFIG_FSE + if (path) { + if (!error) + add_fsevent(FSE_DELETE, ctx, + FSE_ARG_STRING, plen, path, + FSE_ARG_FINFO, &finfo, + FSE_ARG_DONE); + release_pathbuff(path); + } +#endif /* CONFIG_FSE */ } +out: + /* + * nameidone has to happen before we vnode_put(dvp) + * since it may need to release the fs_nodelock on the dvp + */ + nameidone(&ni); + + vnode_put(dvp); + vnode_put(vp); + if (dirp) { - diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - vrele(dirp); + nfsm_srv_vattr_init(&dpostattr, nd->nd_vers); + dpostattrerr = vnode_getattr(dirp, &dpostattr, ctx); + vnode_put(dirp); + dirp = NULL; } - nfsm_reply(NFSX_WCCDATA(v3)); - if (v3) { - nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); - return (0); + +nfsmerr: + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_WCCDATA(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_wcc_data(error, nd, &nmrep, + dpreattrerr, &dpreattr, dpostattrerr, &dpostattr); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (dirp) + vnode_put(dirp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nfsm_srvdone; + return (error); } /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR - * - calls VOP_READDIR() + * - calls VNOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The nfsm_clget macro is used here so that the reply will be packed * tightly in mbuf clusters. - * - it only knows that it has encountered eof when the VOP_READDIR() + * - it only knows that it has encountered eof when the VNOP_READDIR() * reads nothing * - as such one readdir rpc will return eof false although you are there * and then the next will return eof * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. - * NB: It is tempting to set eof to true if the VOP_READDIR() reads less + * NB: It is tempting to set eof to true if the VNOP_READDIR() reads less * than requested, but this may not apply to all filesystems. For * example, client NFS does not { although it is never remote mounted * anyhow } @@ -2608,167 +3881,127 @@ out: * the EOF flag. For readdirplus, the maxcount is the same, and the * dircount includes all that except for the entry attributes and handles. */ -struct flrep { - nfsuint64 fl_off; - u_long fl_postopok; - u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; - u_long fl_fhok; - u_long fl_fhsize; - u_long fl_nfh[NFSX_V3FH / sizeof (u_long)]; -}; - int -nfsrv_readdir(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_readdir( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register char *bp, *be; - register struct mbuf *mp; - register struct dirent *dp; - register caddr_t cp; - register u_long *tl; - register long t1; - caddr_t bpos; - struct mbuf *mb, *mb2, *mreq, *mp2; - char *cpos, *cend, *cp2, *rbuf; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - struct uio io; - struct iovec iv; - int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; - int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies = 0; - int v3 = (nfsd->nd_flag & ND_NFSV3); - u_quad_t frev, off, toff, verf; - u_long *cookies = NULL, *cookiep; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if (v3) { - nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); - fxdr_hyper(tl, &toff); - tl += 2; - fxdr_hyper(tl, &verf); - tl += 2; + struct direntry *dp; + char *cpos, *cend, *rbuf; + vnode_t vp; + struct vnode_attr attr; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio = NULL; + char uio_buf[ UIO_SIZEOF(1) ]; + int len, nlen, rem, xfer, error, attrerr; + int siz, count, fullsiz, eofflag, nentries; + u_quad_t off, toff, verf; + int vnopflag; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = ENOENT; + count = nentries = 0; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + rbuf = NULL; + vp = NULL; + + vnopflag = VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_get_64(error, nmreq, toff); + nfsm_chain_get_64(error, nmreq, verf); } else { - nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); - toff = fxdr_unsigned(u_quad_t, *tl++); + nfsm_chain_get_32(error, nmreq, toff); } + nfsm_chain_get_32(error, nmreq, count); + nfsmerr_if(error); + off = toff; - cnt = fxdr_unsigned(int, *tl); - siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); - xfer = NFS_SRVMAXDATA(nfsd); + siz = ((count + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); + xfer = NFSRV_NDMAXDATA(nd); if (siz > xfer) siz = xfer; fullsiz = siz; - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - nqsrv_getl(vp, ND_READ); - if (v3) { - error = getret = VOP_GETATTR(vp, &at, cred, procp); - if (!error && toff && verf && verf != at.va_filerev) + + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + if (nxo->nxo_flags & NX_MANGLEDNAMES || nd->nd_vers == NFS_VER2) + vnopflag |= VNODE_READDIR_NAMEMAX; + + if ((nd->nd_vers == NFS_VER2) || (nxo->nxo_flags & NX_32BITCLIENTS)) + vnopflag |= VNODE_READDIR_SEEKOFF32; + + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_vattr_init(&attr, NFS_VER3); + error = attrerr = vnode_getattr(vp, &attr, ctx); + if (!error && toff && verf && (verf != attr.va_filerev)) error = NFSERR_BAD_COOKIE; } if (!error) - error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0); - if (error) { - vput(vp); - nfsm_reply(NFSX_POSTOPATTR(v3)); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - VOP_UNLOCK(vp, 0, procp); + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LIST_DIRECTORY, ctx, nxo, 0); + nfsmerr_if(error); + MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + if (rbuf) + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!rbuf || !auio) { + error = ENOMEM; + goto nfsmerr; + } again: - iv.iov_base = rbuf; - iv.iov_len = fullsiz; - io.uio_iov = &iv; - io.uio_iovcnt = 1; - io.uio_offset = (off_t)off; - io.uio_resid = fullsiz; - io.uio_segflg = UIO_SYSSPACE; - io.uio_rw = UIO_READ; - io.uio_procp = (struct proc *)0; + uio_reset(auio, off, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, CAST_USER_ADDR_T(rbuf), fullsiz); eofflag = 0; + error = VNOP_READDIR(vp, auio, vnopflag, &eofflag, &nentries, ctx); + off = uio_offset(auio); - if (cookies) { - _FREE((caddr_t)cookies, M_TEMP); - cookies = NULL; - } - if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) { - FREE((caddr_t)rbuf, M_TEMP); - nfsm_reply(NFSX_POSTOPATTR(v3)); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); - off = (off_t)io.uio_offset; - /* - * We cannot set the error in the case where there are no cookies - * and no error, only, as FreeBSD. In the scenario the client is - * calling us back being told there were "more" entries on last readdir - * return, and we have no more entries, our VOP_READDIR can give - * cookies = NULL and no error. This is due to a zero size to MALLOC - * returning NULL unlike FreeBSD which returns a pointer. - * With FreeBSD it makes sense if the MALLOC failed and you get in that - * bind. For us, we need something more. Thus, we should make sure we - * had some cookies to return, but no pointer and no error for EPERM case. - * Otherwise, go thru normal processing of sending back the eofflag. This check - * is also legit on first call to the routine by client since . and .. - * should be returned. Make same change to nfsrv_readdirplus. - */ - if ((ncookies != 0) && !cookies && !error) - error = NFSERR_PERM; - - if (v3) { - getret = VOP_GETATTR(vp, &at, cred, procp); - if (!error) - error = getret; - } - VOP_UNLOCK(vp, 0, procp); - if (error) { - vrele(vp); - _FREE((caddr_t)rbuf, M_TEMP); - if (cookies) - _FREE((caddr_t)cookies, M_TEMP); - nfsm_reply(NFSX_POSTOPATTR(v3)); - nfsm_srvpostop_attr(getret, &at); - return (0); + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_vattr_init(&attr, NFS_VER3); + attrerr = vnode_getattr(vp, &attr, ctx); } - if (io.uio_resid) { - siz -= io.uio_resid; + nfsmerr_if(error); - /* - * If nothing read, return eof - * rpc reply - */ + if (uio_resid(auio) != 0) { + siz -= uio_resid(auio); + + /* If nothing read, return empty reply with eof set */ if (siz == 0) { - vrele(vp); - nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + - 2 * NFSX_UNSIGNED); - if (v3) { - nfsm_srvpostop_attr(getret, &at); - nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); - txdr_hyper(&at.va_filerev, tl); - tl += 2; - } else - nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); - *tl++ = nfs_false; - *tl = nfs_true; - FREE((caddr_t)rbuf, M_TEMP); - FREE((caddr_t)cookies, M_TEMP); - return (0); + vnode_put(vp); + vp = NULL; + FREE(rbuf, M_TEMP); + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + + NFSX_COOKIEVERF(nd->nd_vers) + 2 * NFSX_UNSIGNED); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsm_chain_add_64(error, &nmrep, attr.va_filerev); + } + nfsm_chain_add_32(error, &nmrep, FALSE); + nfsm_chain_add_32(error, &nmrep, TRUE); + nfsm_chain_build_done(error, &nmrep); + return (error); } } @@ -2778,261 +4011,214 @@ again: */ cpos = rbuf; cend = rbuf + siz; - dp = (struct dirent *)cpos; - cookiep = cookies; -#ifdef __FreeBSD__ - /* - * For some reason FreeBSD's ufs_readdir() chooses to back the - * directory offset up to a block boundary, so it is necessary to - * skip over the records that preceed the requested offset. This - * requires the assumption that file offset cookies monotonically - * increase. - */ - while (cpos < cend && ncookies > 0 && - (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { -#else - while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { -#endif + dp = (struct direntry *)cpos; + while ((dp->d_fileno == 0) && (cpos < cend) && (nentries > 0)) { cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; + dp = (struct direntry *)cpos; + nentries--; } - if (cpos >= cend || ncookies == 0) { + if ((cpos >= cend) || (nentries == 0)) { toff = off; siz = fullsiz; goto again; } - nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); - if (v3) { - len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; - nfsm_srvpostop_attr(getret, &at); - nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); - txdr_hyper(&at.va_filerev, tl); - } else - len = 2 * NFSX_UNSIGNED; - mp = mp2 = mb; - bp = bpos; - be = bp + M_TRAILINGSPACE(mp); + vnode_put(vp); + vp = NULL; + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + + NFSX_COOKIEVERF(nd->nd_vers) + siz); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nmrep.nmc_flags |= NFSM_CHAIN_FLAG_ADD_CLUSTERS; + + len = 2 * NFSX_UNSIGNED; + if (nd->nd_vers == NFS_VER3) { + len += NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF; + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsm_chain_add_64(error, &nmrep, attr.va_filerev); + nfsmerr_if(error); + } /* Loop through the records and build reply */ - while (cpos < cend && ncookies > 0) { + while ((cpos < cend) && (nentries > 0)) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; + if ((nd->nd_vers == NFS_VER2) && (nlen > NFS_MAXNAMLEN)) + nlen = NFS_MAXNAMLEN; rem = nfsm_rndup(nlen)-nlen; len += (4 * NFSX_UNSIGNED + nlen + rem); - if (v3) + if (nd->nd_vers == NFS_VER3) len += 2 * NFSX_UNSIGNED; - if (len > cnt) { + if (len > count) { eofflag = 0; break; } - /* - * Build the directory record xdr from - * the dirent entry. - */ - nfsm_clget; - *tl = nfs_true; - bp += NFSX_UNSIGNED; - if (v3) { - nfsm_clget; - *tl = 0; - bp += NFSX_UNSIGNED; - } - nfsm_clget; - *tl = txdr_unsigned(dp->d_fileno); - bp += NFSX_UNSIGNED; - nfsm_clget; - *tl = txdr_unsigned(nlen); - bp += NFSX_UNSIGNED; - - /* And loop around copying the name */ - xfer = nlen; - cp = dp->d_name; - while (xfer > 0) { - nfsm_clget; - if ((bp+xfer) > be) - tsiz = be-bp; - else - tsiz = xfer; - bcopy(cp, bp, tsiz); - bp += tsiz; - xfer -= tsiz; - if (xfer > 0) - cp += tsiz; + /* Build the directory record xdr from the direntry. */ + nfsm_chain_add_32(error, &nmrep, TRUE); + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_64(error, &nmrep, dp->d_fileno); + } else { + nfsm_chain_add_32(error, &nmrep, dp->d_fileno); } - /* And null pad to a long boundary */ - for (i = 0; i < rem; i++) - *bp++ = '\0'; - nfsm_clget; - - /* Finish off the record */ - if (v3) { - *tl = 0; - bp += NFSX_UNSIGNED; - nfsm_clget; + nfsm_chain_add_string(error, &nmrep, dp->d_name, nlen); + if (nd->nd_vers == NFS_VER3) { + if (vnopflag & VNODE_READDIR_SEEKOFF32) + dp->d_seekoff &= 0x00000000ffffffffULL; + nfsm_chain_add_64(error, &nmrep, dp->d_seekoff); + } else { + nfsm_chain_add_32(error, &nmrep, dp->d_seekoff); } - *tl = txdr_unsigned(*cookiep); - bp += NFSX_UNSIGNED; + nfsmerr_if(error); } cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; - } - vrele(vp); - nfsm_clget; - *tl = nfs_false; - bp += NFSX_UNSIGNED; - nfsm_clget; - if (eofflag) - *tl = nfs_true; - else - *tl = nfs_false; - bp += NFSX_UNSIGNED; - if (mp != mb) { - if (bp < be) - mp->m_len = bp - mtod(mp, caddr_t); - } else - mp->m_len += bp - bpos; - FREE((caddr_t)rbuf, M_TEMP); - FREE((caddr_t)cookies, M_TEMP); - nfsm_srvdone; + dp = (struct direntry *)cpos; + nentries--; + } + nfsm_chain_add_32(error, &nmrep, FALSE); + nfsm_chain_add_32(error, &nmrep, eofflag ? TRUE : FALSE); + FREE(rbuf, M_TEMP); + goto nfsmout; +nfsmerr: + if (rbuf) + FREE(rbuf, M_TEMP); + if (vp) + vnode_put(vp); + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } int -nfsrv_readdirplus(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_readdirplus( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register char *bp, *be; - register struct mbuf *mp; - register struct dirent *dp; - register caddr_t cp; - register u_long *tl; - register long t1; - caddr_t bpos; - struct mbuf *mb, *mb2, *mreq, *mp2; - char *cpos, *cend, *cp2, *rbuf; - struct vnode *vp, *nvp; - struct flrep fl; - nfsfh_t nfh; - fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; - struct uio io; - struct iovec iv; - struct vattr va, at, *vap = &va; - struct nfs_fattr *fp; - int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; - int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies = 0; - u_quad_t frev, off, toff, verf; - u_long *cookies = NULL, *cookiep; - void *file; - - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); - fxdr_hyper(tl, &toff); - tl += 2; - fxdr_hyper(tl, &verf); - tl += 2; - siz = fxdr_unsigned(int, *tl++); - cnt = fxdr_unsigned(int, *tl); + struct direntry *dp; + char *cpos, *cend, *rbuf; + vnode_t vp, nvp; + struct nfs_filehandle dnfh, nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + uio_t auio = NULL; + char uio_buf[ UIO_SIZEOF(1) ]; + struct vnode_attr attr, va, *vap = &va; + int len, nlen, rem, xfer, error, attrerr, gotfh, gotattr; + int siz, dircount, maxcount, fullsiz, eofflag, dirlen, nentries, isdotdot; + u_quad_t off, toff, verf; + int vnopflag; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = ENOENT; + nentries = 0; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + rbuf = NULL; + vp = NULL; + dircount = maxcount = 0; + + vnopflag = VNODE_READDIR_EXTENDED | VNODE_READDIR_REQSEEKOFF; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, dnfh.nfh_fhp, dnfh.nfh_len); + nfsm_chain_get_64(error, nmreq, toff); + nfsm_chain_get_64(error, nmreq, verf); + nfsm_chain_get_32(error, nmreq, dircount); + nfsm_chain_get_32(error, nmreq, maxcount); + nfsmerr_if(error); + off = toff; - siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); - xfer = NFS_SRVMAXDATA(nfsd); - if (siz > xfer) - siz = xfer; - fullsiz = siz; - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - error = getret = VOP_GETATTR(vp, &at, cred, procp); - if (!error && toff && verf && verf != at.va_filerev) + xfer = NFSRV_NDMAXDATA(nd); + dircount = ((dircount + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); + if (dircount > xfer) + dircount = xfer; + fullsiz = siz = dircount; + maxcount = ((maxcount + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); + if (maxcount > xfer) + maxcount = xfer; + + error = nfsrv_fhtovp(&dnfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + if (nxo->nxo_flags & NX_32BITCLIENTS) + vnopflag |= VNODE_READDIR_SEEKOFF32; + + if (nxo->nxo_flags & NX_MANGLEDNAMES) + vnopflag |= VNODE_READDIR_NAMEMAX; + + nfsm_srv_vattr_init(&attr, NFS_VER3); + error = attrerr = vnode_getattr(vp, &attr, ctx); + if (!error && toff && verf && (verf != attr.va_filerev)) error = NFSERR_BAD_COOKIE; - if (!error) { - nqsrv_getl(vp, ND_READ); - error = nfsrv_access(vp, VEXEC, cred, rdonly, procp, 0); - } - if (error) { - vput(vp); - nfsm_reply(NFSX_V3POSTOPATTR); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - VOP_UNLOCK(vp, 0, procp); + if (!error) + error = nfsrv_authorize(vp, NULL, KAUTH_VNODE_LIST_DIRECTORY, ctx, nxo, 0); + nfsmerr_if(error); + MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); + if (rbuf) + auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, + &uio_buf[0], sizeof(uio_buf)); + if (!rbuf || !auio) { + error = ENOMEM; + goto nfsmerr; + } + again: - iv.iov_base = rbuf; - iv.iov_len = fullsiz; - io.uio_iov = &iv; - io.uio_iovcnt = 1; - io.uio_offset = (off_t)off; - io.uio_resid = fullsiz; - io.uio_segflg = UIO_SYSSPACE; - io.uio_rw = UIO_READ; - io.uio_procp = (struct proc *)0; + uio_reset(auio, off, UIO_SYSSPACE, UIO_READ); + uio_addiov(auio, CAST_USER_ADDR_T(rbuf), fullsiz); eofflag = 0; - if (cookies) { - _FREE((caddr_t)cookies, M_TEMP); - cookies = NULL; - } - if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) { - FREE((caddr_t)rbuf, M_TEMP); - nfsm_reply(NFSX_V3POSTOPATTR); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); - off = (u_quad_t)io.uio_offset; - getret = VOP_GETATTR(vp, &at, cred, procp); - VOP_UNLOCK(vp, 0, procp); - /* - * See nfsrv_readdir comment above on this - */ - if ((ncookies != 0) && !cookies && !error) - error = NFSERR_PERM; + error = VNOP_READDIR(vp, auio, vnopflag, &eofflag, &nentries, ctx); + off = uio_offset(auio); + nfsm_srv_vattr_init(&attr, NFS_VER3); + attrerr = vnode_getattr(vp, &attr, ctx); + nfsmerr_if(error); - if (!error) - error = getret; - if (error) { - vrele(vp); - if (cookies) - _FREE((caddr_t)cookies, M_TEMP); - _FREE((caddr_t)rbuf, M_TEMP); - nfsm_reply(NFSX_V3POSTOPATTR); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - if (io.uio_resid) { - siz -= io.uio_resid; + if (uio_resid(auio) != 0) { + siz -= uio_resid(auio); - /* - * If nothing read, return eof - * rpc reply - */ + /* If nothing read, return empty reply with eof set */ if (siz == 0) { - vrele(vp); - nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + - 2 * NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, &at); - nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); - txdr_hyper(&at.va_filerev, tl); - tl += 2; - *tl++ = nfs_false; - *tl = nfs_true; - FREE((caddr_t)cookies, M_TEMP); - FREE((caddr_t)rbuf, M_TEMP); - return (0); + vnode_put(vp); + vp = NULL; + FREE(rbuf, M_TEMP); + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR + + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsm_chain_add_64(error, &nmrep, attr.va_filerev); + nfsm_chain_add_32(error, &nmrep, FALSE); + nfsm_chain_add_32(error, &nmrep, TRUE); + nfsm_chain_build_done(error, &nmrep); + return (error); } } @@ -3042,27 +4228,13 @@ again: */ cpos = rbuf; cend = rbuf + siz; - dp = (struct dirent *)cpos; - cookiep = cookies; -#ifdef __FreeBSD__ - /* - * For some reason FreeBSD's ufs_readdir() chooses to back the - * directory offset up to a block boundary, so it is necessary to - * skip over the records that preceed the requested offset. This - * requires the assumption that file offset cookies monotonically - * increase. - */ - while (cpos < cend && ncookies > 0 && - (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { -#else - while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { -#endif + dp = (struct direntry *)cpos; + while ((dp->d_fileno == 0) && (cpos < cend) && (nentries > 0)) { cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; + dp = (struct direntry *)cpos; + nentries--; } - if (cpos >= cend || ncookies == 0) { + if ((cpos >= cend) || (nentries == 0)) { toff = off; siz = fullsiz; goto again; @@ -3070,70 +4242,49 @@ again: /* * Probe one of the directory entries to see if the filesystem - * supports VGET. See later comment for VFS_VGET changes. + * supports VGET. */ - if (vp->v_tag == VT_UFS) - file = (void *) dp->d_fileno; - else { - file = &dp->d_fileno; - } - - if (error = VFS_VGET(vp->v_mount, file, &nvp)) { - if (error == EOPNOTSUPP) /* let others get passed back */ - error = NFSERR_NOTSUPP; - vrele(vp); - _FREE((caddr_t)cookies, M_TEMP); - _FREE((caddr_t)rbuf, M_TEMP); - nfsm_reply(NFSX_V3POSTOPATTR); - nfsm_srvpostop_attr(getret, &at); - return (0); + if ((error = VFS_VGET(vnode_mount(vp), (ino64_t)dp->d_fileno, &nvp, ctx))) { + if (error == ENOTSUP) /* let others get passed back */ + error = NFSERR_NOTSUPP; + goto nfsmerr; } - vput(nvp); - + vnode_put(nvp); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, maxcount); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nmrep.nmc_flags |= NFSM_CHAIN_FLAG_ADD_CLUSTERS; + dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; - nfsm_reply(cnt); - nfsm_srvpostop_attr(getret, &at); - nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); - txdr_hyper(&at.va_filerev, tl); - mp = mp2 = mb; - bp = bpos; - be = bp + M_TRAILINGSPACE(mp); + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsm_chain_add_64(error, &nmrep, attr.va_filerev); + nfsmerr_if(error); /* Loop through the records and build reply */ - while (cpos < cend && ncookies > 0) { + while ((cpos < cend) && (nentries > 0)) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; + gotfh = gotattr = 1; - /* - * Got to get the vnode for lookup per entry. - * HFS+/volfs and others use address of file identifier to VGET - * UFS, nullfs, umapfs use inode (u_int32_t) - * until they are consistent, we must differentiate now. - * UFS is the only one of the latter class that is exported. - * Note this will be pulled out as we resolve the VGET issue - * of which it should use u_in32_t or addresses. - */ - - if (vp->v_tag == VT_UFS) - file = (void *) dp->d_fileno; - else - file = &dp->d_fileno; - - if (VFS_VGET(vp->v_mount, file, &nvp)) - goto invalid; - bzero((caddr_t)nfhp, NFSX_V3FH); - nfhp->fh_fsid = - nvp->v_mount->mnt_stat.f_fsid; - if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { - vput(nvp); - goto invalid; - } - if (VOP_GETATTR(nvp, vap, cred, procp)) { - vput(nvp); - goto invalid; + /* Got to get the vnode for lookup per entry. */ + if (VFS_VGET(vnode_mount(vp), (ino64_t)dp->d_fileno, &nvp, ctx)) { + /* Can't get the vnode... so no fh or attrs */ + gotfh = gotattr = 0; + } else { + isdotdot = ((dp->d_namlen == 2) && + (dp->d_name[0] == '.') && (dp->d_name[1] == '.')); + if (nfsrv_vptofh(nx, 0, (isdotdot ? &dnfh : NULL), nvp, ctx, &nfh)) + gotfh = 0; + nfsm_srv_vattr_init(vap, NFS_VER3); + if (vnode_getattr(nvp, vap, ctx)) + gotattr = 0; + vnode_put(nvp); } - vput(nvp); /* * If either the dircount or maxcount will be @@ -3141,382 +4292,402 @@ again: * are calculated conservatively, including all * XDR overheads. */ - len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + - NFSX_V3POSTOPATTR); - dirlen += (6 * NFSX_UNSIGNED + nlen + rem); - if (len > cnt || dirlen > fullsiz) { + len += 8 * NFSX_UNSIGNED + nlen + rem; + if (gotattr) + len += NFSX_V3FATTR; + if (gotfh) + len += NFSX_UNSIGNED + nfsm_rndup(nfh.nfh_len); + dirlen += 6 * NFSX_UNSIGNED + nlen + rem; + if ((len > maxcount) || (dirlen > dircount)) { eofflag = 0; break; } - /* - * Build the directory record xdr from - * the dirent entry. - */ - fp = (struct nfs_fattr *)&fl.fl_fattr; - nfsm_srvfillattr(vap, fp); - fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); - fl.fl_fhok = nfs_true; - fl.fl_postopok = nfs_true; - fl.fl_off.nfsuquad[0] = 0; - fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); - - nfsm_clget; - *tl = nfs_true; - bp += NFSX_UNSIGNED; - nfsm_clget; - *tl = 0; - bp += NFSX_UNSIGNED; - nfsm_clget; - *tl = txdr_unsigned(dp->d_fileno); - bp += NFSX_UNSIGNED; - nfsm_clget; - *tl = txdr_unsigned(nlen); - bp += NFSX_UNSIGNED; - - /* And loop around copying the name */ - xfer = nlen; - cp = dp->d_name; - while (xfer > 0) { - nfsm_clget; - if ((bp + xfer) > be) - tsiz = be - bp; - else - tsiz = xfer; - bcopy(cp, bp, tsiz); - bp += tsiz; - xfer -= tsiz; - if (xfer > 0) - cp += tsiz; - } - /* And null pad to a long boundary */ - for (i = 0; i < rem; i++) - *bp++ = '\0'; - - /* - * Now copy the flrep structure out. - */ - xfer = sizeof (struct flrep); - cp = (caddr_t)&fl; - while (xfer > 0) { - nfsm_clget; - if ((bp + xfer) > be) - tsiz = be - bp; - else - tsiz = xfer; - bcopy(cp, bp, tsiz); - bp += tsiz; - xfer -= tsiz; - if (xfer > 0) - cp += tsiz; - } + /* Build the directory record xdr from the direntry. */ + nfsm_chain_add_32(error, &nmrep, TRUE); + nfsm_chain_add_64(error, &nmrep, dp->d_fileno); + nfsm_chain_add_string(error, &nmrep, dp->d_name, nlen); + if (vnopflag & VNODE_READDIR_SEEKOFF32) + dp->d_seekoff &= 0x00000000ffffffffULL; + nfsm_chain_add_64(error, &nmrep, dp->d_seekoff); + nfsm_chain_add_postop_attr(error, nd, &nmrep, (gotattr ? 0 : ENOENT), vap); + if (gotfh) + nfsm_chain_add_postop_fh(error, &nmrep, nfh.nfh_fhp, nfh.nfh_len); + else + nfsm_chain_add_32(error, &nmrep, FALSE); + nfsmerr_if(error); } -invalid: cpos += dp->d_reclen; - dp = (struct dirent *)cpos; - cookiep++; - ncookies--; - } - vrele(vp); - nfsm_clget; - *tl = nfs_false; - bp += NFSX_UNSIGNED; - nfsm_clget; - if (eofflag) - *tl = nfs_true; - else - *tl = nfs_false; - bp += NFSX_UNSIGNED; - if (mp != mb) { - if (bp < be) - mp->m_len = bp - mtod(mp, caddr_t); - } else - mp->m_len += bp - bpos; - FREE((caddr_t)cookies, M_TEMP); - FREE((caddr_t)rbuf, M_TEMP); - nfsm_srvdone; + dp = (struct direntry *)cpos; + nentries--; + } + vnode_put(vp); + vp = NULL; + nfsm_chain_add_32(error, &nmrep, FALSE); + nfsm_chain_add_32(error, &nmrep, eofflag ? TRUE : FALSE); + FREE(rbuf, M_TEMP); + goto nfsmout; +nfsmerr: + if (rbuf) + FREE(rbuf, M_TEMP); + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (vp) + vnode_put(vp); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* * nfs commit service */ int -nfsrv_commit(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_commit( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - struct vattr bfor, aft; - struct vnode *vp; - nfsfh_t nfh; - fhandle_t *fhp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - u_quad_t frev, off; - int didhold; - -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); + vnode_t vp; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + int error, preattrerr, postattrerr, count; + struct vnode_attr preattr, postattr; + u_quad_t off; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + preattrerr = postattrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = NULL; /* - * XXX At this time VOP_FSYNC() does not accept offset and byte - * count parameters, so these arguments are useless (someday maybe). + * XXX At this time VNOP_FSYNC() does not accept offset and byte + * count parameters, so those arguments are useless (someday maybe). */ - fxdr_hyper(tl, &off); - tl += 2; - cnt = fxdr_unsigned(int, *tl); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(2 * NFSX_UNSIGNED); - nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); - return (0); + + nfsm_chain_get_fh_ptr(error, nmreq, NFS_VER3, nfh.nfh_fhp, nfh.nfh_len); + nfsm_chain_get_64(error, nmreq, off); + nfsm_chain_get_32(error, nmreq, count); + nfsmerr_if(error); + + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + nfsm_srv_pre_vattr_init(&preattr); + preattrerr = vnode_getattr(vp, &preattr, ctx); + + error = VNOP_FSYNC(vp, MNT_WAIT, ctx); + + nfsm_srv_vattr_init(&postattr, 1); + postattrerr = vnode_getattr(vp, &postattr, ctx); + +nfsmerr: + if (vp) + vnode_put(vp); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3WCCDATA + NFSX_V3WRITEVERF); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nfsm_chain_add_wcc_data(error, nd, &nmrep, + preattrerr, &preattr, postattrerr, &postattr); + if (!nd->nd_repstat) { + nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_sec); + nfsm_chain_add_32(error, &nmrep, nx->nx_exptime.tv_usec); } - for_ret = VOP_GETATTR(vp, &bfor, cred, procp); - didhold = ubc_hold(vp); - error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); - aft_ret = VOP_GETATTR(vp, &aft, cred, procp); - VOP_UNLOCK(vp, 0, procp); - if (didhold) - ubc_rele(vp); - vrele(vp); - nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); - nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); - if (!error) { - nfsm_build(tl, u_long *, NFSX_V3WRITEVERF); - *tl++ = txdr_unsigned(boottime.tv_sec); - *tl = txdr_unsigned(boottime.tv_usec); - } else - return (0); - nfsm_srvdone; +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* * nfs statfs service */ int -nfsrv_statfs(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_statfs( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register struct statfs *sf; - register struct nfs_statfs *sfp; - register u_long *tl; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, getret = 1; - int v3 = (nfsd->nd_flag & ND_NFSV3); - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - struct statfs statfs; - u_quad_t frev, tval; - -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, &at); - return (0); + struct vfs_attr va; + int error, attrerr; + vnode_t vp; + struct vnode_attr attr; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + off_t blksize; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = NULL; + blksize = 512; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + VFSATTR_INIT(&va); + VFSATTR_WANTED(&va, f_blocks); + VFSATTR_WANTED(&va, f_bavail); + VFSATTR_WANTED(&va, f_files); + VFSATTR_WANTED(&va, f_ffree); + error = vfs_getattr(vnode_mount(vp), &va, ctx); + blksize = vnode_mount(vp)->mnt_vfsstat.f_bsize; + + if (nd->nd_vers == NFS_VER3) { + nfsm_srv_vattr_init(&attr, nd->nd_vers); + attrerr = vnode_getattr(vp, &attr, ctx); } - sf = &statfs; - error = VFS_STATFS(vp->v_mount, sf, procp); - getret = VOP_GETATTR(vp, &at, cred, procp); - vput(vp); - nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); - if (v3) - nfsm_srvpostop_attr(getret, &at); - if (error) - return (0); - nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); - if (v3) { - tval = (u_quad_t)(unsigned long)sf->f_blocks; - tval *= (u_quad_t)(unsigned long)sf->f_bsize; - txdr_hyper(&tval, &sfp->sf_tbytes); - tval = (u_quad_t)(unsigned long)sf->f_bfree; - tval *= (u_quad_t)(unsigned long)sf->f_bsize; - txdr_hyper(&tval, &sfp->sf_fbytes); - tval = (u_quad_t)(unsigned long)sf->f_bavail; - tval *= (u_quad_t)(unsigned long)sf->f_bsize; - txdr_hyper(&tval, &sfp->sf_abytes); - sfp->sf_tfiles.nfsuquad[0] = 0; - sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); - sfp->sf_ffiles.nfsuquad[0] = 0; - sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); - sfp->sf_afiles.nfsuquad[0] = 0; - sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); - sfp->sf_invarsec = 0; + +nfsmerr: + if (vp) + vnode_put(vp); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_POSTOPATTR(nd->nd_vers) + NFSX_STATFS(nd->nd_vers)); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + if (nd->nd_vers == NFS_VER3) + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsmout_if(nd->nd_repstat); + + if (nd->nd_vers == NFS_VER3) { + nfsm_chain_add_64(error, &nmrep, va.f_blocks * blksize); + nfsm_chain_add_64(error, &nmrep, va.f_bfree * blksize); + nfsm_chain_add_64(error, &nmrep, va.f_bavail * blksize); + nfsm_chain_add_64(error, &nmrep, va.f_files); + nfsm_chain_add_64(error, &nmrep, va.f_ffree); + nfsm_chain_add_64(error, &nmrep, va.f_ffree); + nfsm_chain_add_32(error, &nmrep, 0); /* invarsec */ } else { - sfp->sf_tsize = txdr_unsigned(NFS_V2MAXDATA); - sfp->sf_bsize = txdr_unsigned(sf->f_bsize); - sfp->sf_blocks = txdr_unsigned(sf->f_blocks); - sfp->sf_bfree = txdr_unsigned(sf->f_bfree); - sfp->sf_bavail = txdr_unsigned(sf->f_bavail); + nfsm_chain_add_32(error, &nmrep, NFS_V2MAXDATA); + nfsm_chain_add_32(error, &nmrep, blksize); + nfsm_chain_add_32(error, &nmrep, va.f_blocks); + nfsm_chain_add_32(error, &nmrep, va.f_bfree); + nfsm_chain_add_32(error, &nmrep, va.f_bavail); + } +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; } - nfsm_srvdone; + return (error); } /* * nfs fsinfo service */ int -nfsrv_fsinfo(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_fsinfo( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register struct nfsv3_fsinfo *sip; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, getret = 1, pref, max; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - getret = VOP_GETATTR(vp, &at, cred, procp); - vput(vp); - nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); - nfsm_srvpostop_attr(getret, &at); - nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); + int error, attrerr, prefsize, maxsize; + vnode_t vp; + struct vnode_attr attr; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + nfsm_srv_vattr_init(&attr, NFS_VER3); + attrerr = vnode_getattr(vp, &attr, ctx); + +nfsmerr: + if (vp) + vnode_put(vp); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR + NFSX_V3FSINFO); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsmout_if(nd->nd_repstat); /* - * XXX - * There should be file system VFS OP(s) to get this information. + * XXX There should be file system VFS OP(s) to get this information. * For now, assume our usual NFS defaults. */ - if (slp->ns_so->so_type == SOCK_DGRAM) - max = pref = NFS_MAXDGRAMDATA; - else - max = pref = NFS_MAXDATA; - sip->fs_rtmax = txdr_unsigned(max); - sip->fs_rtpref = txdr_unsigned(pref); - sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); - sip->fs_wtmax = txdr_unsigned(max); - sip->fs_wtpref = txdr_unsigned(pref); - sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); - sip->fs_dtpref = txdr_unsigned(pref); - sip->fs_maxfilesize.nfsuquad[0] = 0xffffffff; - sip->fs_maxfilesize.nfsuquad[1] = 0xffffffff; - sip->fs_timedelta.nfsv3_sec = 0; - sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1); - sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | - NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | - NFSV3FSINFO_CANSETTIME); - nfsm_srvdone; + if (slp->ns_sotype == SOCK_DGRAM) { + maxsize = NFS_MAXDGRAMDATA; + prefsize = NFS_PREFDGRAMDATA; + } else + maxsize = prefsize = NFSRV_MAXDATA; + + nfsm_chain_add_32(error, &nmrep, maxsize); + nfsm_chain_add_32(error, &nmrep, prefsize); + nfsm_chain_add_32(error, &nmrep, NFS_FABLKSIZE); + nfsm_chain_add_32(error, &nmrep, maxsize); + nfsm_chain_add_32(error, &nmrep, prefsize); + nfsm_chain_add_32(error, &nmrep, NFS_FABLKSIZE); + nfsm_chain_add_32(error, &nmrep, prefsize); + nfsm_chain_add_64(error, &nmrep, 0xffffffffffffffffULL); + nfsm_chain_add_32(error, &nmrep, 0); + nfsm_chain_add_32(error, &nmrep, 1); + /* XXX link/symlink support should be taken from volume capabilities */ + nfsm_chain_add_32(error, &nmrep, + NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | + NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); + +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* * nfs pathconf service */ int -nfsrv_pathconf(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_pathconf( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; - struct mbuf *nam = nfsd->nd_nam; - caddr_t dpos = nfsd->nd_dpos; - struct ucred *cred = &nfsd->nd_cr; - register u_long *tl; - register struct nfsv3_pathconf *pc; - register long t1; - caddr_t bpos; - int error = 0, rdonly, cache, getret = 1, linkmax, namemax; + int error, attrerr, linkmax, namemax; int chownres, notrunc, case_sensitive, case_preserving; - char *cp2; - struct mbuf *mb, *mb2, *mreq; - struct vnode *vp; - struct vattr at; - nfsfh_t nfh; - fhandle_t *fhp; - u_quad_t frev; - -#ifndef nolint - cache = 0; -#endif - fhp = &nfh.fh_generic; - nfsm_srvmtofh(fhp); - if ((error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, - &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE))) { - nfsm_reply(NFSX_UNSIGNED); - nfsm_srvpostop_attr(getret, &at); - return (0); - } - error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); + vnode_t vp; + struct vnode_attr attr; + struct nfs_filehandle nfh; + struct nfs_export *nx; + struct nfs_export_options *nxo; + struct nfsm_chain *nmreq, nmrep; + + error = 0; + attrerr = ENOENT; + nmreq = &nd->nd_nmreq; + nfsm_chain_null(&nmrep); + vp = NULL; + + nfsm_chain_get_fh_ptr(error, nmreq, nd->nd_vers, nfh.nfh_fhp, nfh.nfh_len); + nfsmerr_if(error); + error = nfsrv_fhtovp(&nfh, nd, &vp, &nx, &nxo); + nfsmerr_if(error); + + /* update export stats */ + NFSStatAdd64(&nx->nx_stats.ops, 1); + + /* update active user stats */ + nfsrv_update_user_stat(nx, nd, kauth_cred_getuid(nd->nd_cr), 1, 0, 0); + + error = nfsrv_credcheck(nd, ctx, nx, nxo); + nfsmerr_if(error); + + error = VNOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax, ctx); if (!error) - error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); + error = VNOP_PATHCONF(vp, _PC_NAME_MAX, &namemax, ctx); if (!error) - error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); + error = VNOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres, ctx); if (!error) - error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); + error = VNOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc, ctx); if (!error) - error = VOP_PATHCONF(vp, _PC_CASE_SENSITIVE, &case_sensitive); + error = VNOP_PATHCONF(vp, _PC_CASE_SENSITIVE, &case_sensitive, ctx); if (!error) - error = VOP_PATHCONF(vp, _PC_CASE_PRESERVING, &case_preserving); - getret = VOP_GETATTR(vp, &at, cred, procp); - vput(vp); - nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); - nfsm_srvpostop_attr(getret, &at); - if (error) - return (0); - nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); + error = VNOP_PATHCONF(vp, _PC_CASE_PRESERVING, &case_preserving, ctx); - pc->pc_linkmax = txdr_unsigned(linkmax); - pc->pc_namemax = txdr_unsigned(namemax); - pc->pc_notrunc = txdr_unsigned(notrunc); - pc->pc_chownrestricted = txdr_unsigned(chownres); - pc->pc_caseinsensitive = txdr_unsigned(!case_sensitive); - pc->pc_casepreserving = txdr_unsigned(case_preserving); + nfsm_srv_vattr_init(&attr, NFS_VER3); + attrerr = vnode_getattr(vp, &attr, ctx); - nfsm_srvdone; +nfsmerr: + if (vp) + vnode_put(vp); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; + nfsmout_on_status(nd, error); + nfsm_chain_add_postop_attr(error, nd, &nmrep, attrerr, &attr); + nfsmout_if(nd->nd_repstat); + + nfsm_chain_add_32(error, &nmrep, linkmax); + nfsm_chain_add_32(error, &nmrep, namemax); + nfsm_chain_add_32(error, &nmrep, notrunc); + nfsm_chain_add_32(error, &nmrep, chownres); + nfsm_chain_add_32(error, &nmrep, !case_sensitive); + nfsm_chain_add_32(error, &nmrep, case_preserving); + +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* @@ -3524,23 +4695,35 @@ nfsrv_pathconf(nfsd, slp, procp, mrq) */ /* ARGSUSED */ int -nfsrv_null(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_null( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + __unused vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep; - caddr_t bpos; - int error = NFSERR_RETVOID, cache; - struct mbuf *mb, *mreq; - u_quad_t frev; - -#ifndef nolint - cache = 0; -#endif - nfsm_reply(0); - return (0); + int error = NFSERR_RETVOID; + struct nfsm_chain nmrep; + + /* + * RPCSEC_GSS context setup ? + */ + if (nd->nd_gss_context) + return(nfs_gss_svc_ctx_init(nd, slp, mrepp)); + + nfsm_chain_null(&nmrep); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, 0); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } /* @@ -3548,84 +4731,120 @@ nfsrv_null(nfsd, slp, procp, mrq) */ /* ARGSUSED */ int -nfsrv_noop(nfsd, slp, procp, mrq) - struct nfsrv_descript *nfsd; - struct nfssvc_sock *slp; - struct proc *procp; - struct mbuf **mrq; +nfsrv_noop( + struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + __unused vfs_context_t ctx, + mbuf_t *mrepp) { - struct mbuf *mrep = nfsd->nd_mrep; - caddr_t bpos; - int error, cache; - struct mbuf *mb, *mreq; - u_quad_t frev; - -#ifndef nolint - cache = 0; -#endif - if (nfsd->nd_repstat) - error = nfsd->nd_repstat; + int error; + struct nfsm_chain nmrep; + + nfsm_chain_null(&nmrep); + + if (nd->nd_repstat) + error = nd->nd_repstat; else error = EPROCUNAVAIL; - nfsm_reply(0); - return (0); + + /* assemble reply */ + nd->nd_repstat = error; + error = nfsrv_rephead(nd, slp, &nmrep, 0); + nfsmout_if(error); + *mrepp = nmrep.nmc_mhead; +nfsmout: + nfsm_chain_build_done(error, &nmrep); + if (error) { + nfsm_chain_cleanup(&nmrep); + *mrepp = NULL; + } + return (error); } +int (*nfsrv_procs[NFS_NPROCS])(struct nfsrv_descript *nd, + struct nfsrv_sock *slp, + vfs_context_t ctx, + mbuf_t *mrepp) = { + nfsrv_null, + nfsrv_getattr, + nfsrv_setattr, + nfsrv_lookup, + nfsrv_access, + nfsrv_readlink, + nfsrv_read, + nfsrv_write, + nfsrv_create, + nfsrv_mkdir, + nfsrv_symlink, + nfsrv_mknod, + nfsrv_remove, + nfsrv_rmdir, + nfsrv_rename, + nfsrv_link, + nfsrv_readdir, + nfsrv_readdirplus, + nfsrv_statfs, + nfsrv_fsinfo, + nfsrv_pathconf, + nfsrv_commit, + nfsrv_noop +}; + /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use - * vn_writechk() and VOP_ACCESS() for two reasons. + * vnode_authorize() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case * 2 - The owner is to be given access irrespective of mode bits so that * processes that chmod after opening a file don't break. I don't like * this because it opens a security hole, but since the nfs server opens * a security hole the size of a barn door anyhow, what the heck. - - * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS() + * + * The exception to rule 2 is EPERM. If a file is IMMUTABLE, vnode_authorize() * will return EPERM instead of EACCESS. EPERM is always an error. */ -static int -nfsrv_access(vp, flags, cred, rdonly, p, override) - register struct vnode *vp; - int flags; - register struct ucred *cred; - int rdonly; - struct proc *p; - int override; +int +nfsrv_authorize( + vnode_t vp, + vnode_t dvp, + kauth_action_t action, + vfs_context_t ctx, + struct nfs_export_options *nxo, + int override) { - struct vattr vattr; + struct vnode_attr vattr; int error; - if (flags & VWRITE) { - /* Just vn_writechk() changed to check rdonly */ + + if (action & KAUTH_VNODE_WRITE_RIGHTS) { /* - * Disallow write attempts on read-only file systems; + * Disallow write attempts on read-only exports; * unless the file is a socket or a block or character * device resident on the file system. */ - if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { - switch (vp->v_type) { + if (nxo->nxo_flags & NX_READONLY) { + switch (vnode_vtype(vp)) { case VREG: case VDIR: case VLNK: case VCPLX: return (EROFS); + default: + break; } } - /* - * If there's shared text associated with - * the inode, we can't allow writing. - */ - if (vp->v_flag & VTEXT) - return (ETXTBSY); - } - if ((error = VOP_GETATTR(vp, &vattr, cred, p))) - return (error); - error = VOP_ACCESS(vp, flags, cred, p); - /* - * Allow certain operations for the owner (reads and writes - * on files that are already open). Picking up from FreeBSD. - */ - if (override && error == EACCES && cred->cr_uid == vattr.va_uid) - error = 0; - return error; + } + error = vnode_authorize(vp, dvp, action, ctx); + /* + * Allow certain operations for the owner (reads and writes + * on files that are already open). Picking up from FreeBSD. + */ + if (override && (error == EACCES)) { + VATTR_INIT(&vattr); + VATTR_WANTED(&vattr, va_uid); + if ((vnode_getattr(vp, &vattr, ctx) == 0) && + (kauth_cred_getuid(vfs_context_ucred(ctx)) == vattr.va_uid)) + error = 0; + } + return error; } -#endif /* NFS_NOSERVER */ + +#endif /* NFSSERVER */