X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..a991bd8d3e7fe02dbca0644054bab73c5b75324a:/bsd/nfs/nfs_node.c diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index e42d5022d..c47fa9263 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2019 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ @@ -65,15 +65,21 @@ * FreeBSD-Id: nfs_node.c,v 1.22 1997/10/28 14:06:20 bde Exp $ */ +#include +#if CONFIG_NFS_CLIENT #include +#include #include #include #include #include +#include #include #include #include +#include +#include #include #include @@ -82,15 +88,21 @@ #include #include -#define NFSNOHASH(fhsum) \ +#define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) -static LIST_HEAD(nfsnodehashhead, nfsnode) *nfsnodehashtbl; +static LIST_HEAD(nfsnodehashhead, nfsnode) * nfsnodehashtbl; static u_long nfsnodehash; static lck_grp_t *nfs_node_hash_lck_grp; static lck_grp_t *nfs_node_lck_grp; +static lck_grp_t *nfs_data_lck_grp; lck_mtx_t *nfs_node_hash_mutex; +ZONE_DECLARE(nfsnode_zone, "NFS node", + sizeof(struct nfsnode), ZC_ZFREE_CLEARMEM); + +#define NFS_NODE_DBG(...) NFS_DBG(NFS_FAC_NODE, 7, ## __VA_ARGS__) + /* * Initialize hash links for nfsnodes * and build nfsnode free list. @@ -101,14 +113,16 @@ nfs_nhinit(void) nfs_node_hash_lck_grp = lck_grp_alloc_init("nfs_node_hash", LCK_GRP_ATTR_NULL); nfs_node_hash_mutex = lck_mtx_alloc_init(nfs_node_hash_lck_grp, LCK_ATTR_NULL); nfs_node_lck_grp = lck_grp_alloc_init("nfs_node", LCK_GRP_ATTR_NULL); + nfs_data_lck_grp = lck_grp_alloc_init("nfs_data", LCK_GRP_ATTR_NULL); } void nfs_nhinit_finish(void) { lck_mtx_lock(nfs_node_hash_mutex); - if (!nfsnodehashtbl) + if (!nfsnodehashtbl) { nfsnodehashtbl = hashinit(desiredvnodes, M_NFSNODE, &nfsnodehash); + } lck_mtx_unlock(nfs_node_hash_mutex); } @@ -122,11 +136,55 @@ nfs_hash(u_char *fhp, int fhsize) int i; fhsum = 0; - for (i = 0; i < fhsize; i++) + for (i = 0; i < fhsize; i++) { fhsum += *fhp++; - return (fhsum); + } + return fhsum; } + +int nfs_case_insensitive(mount_t); + +int +nfs_case_insensitive(mount_t mp) +{ + struct nfsmount *nmp = VFSTONFS(mp); + int answer = 0; + int skip = 0; + + if (nfs_mount_gone(nmp)) { + return 0; + } + + if (nmp->nm_vers == NFS_VER2) { + /* V2 has no way to know */ + return 0; + } + + lck_mtx_lock(&nmp->nm_lock); + if (nmp->nm_vers == NFS_VER3) { + if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) { + /* We're holding the node lock so we just return + * with answer as case sensitive. Is very rare + * for file systems not to be homogenous w.r.t. pathconf + */ + skip = 1; + } + } else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) { + /* no pathconf info cached */ + skip = 1; + } + + if (!skip && (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_INSENSITIVE)) { + answer = 1; + } + + lck_mtx_unlock(&nmp->nm_lock); + + return answer; +} + + /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! @@ -139,9 +197,10 @@ nfs_nget( nfsnode_t dnp, struct componentname *cnp, u_char *fhp, - int fhsize, + uint32_t fhsize, struct nfs_vattr *nvap, u_int64_t *xidp, + uint32_t auth, int flags, nfsnode_t *npp) { @@ -151,30 +210,50 @@ nfs_nget( int error, nfsvers; mount_t mp2; struct vnode_fsparam vfsp; - uint32_t vid; + uint32_t vid, cn_namelen; + u_long nfshash; FSDBG_TOP(263, mp, dnp, flags, npp); /* Check for unmount in progress */ - if (!mp || (mp->mnt_kern_flag & MNTK_FRCUNMOUNT)) { + if (!mp || vfs_isforce(mp)) { *npp = NULL; error = ENXIO; FSDBG_BOT(263, mp, dnp, 0xd1e, error); - return (error); + return error; } nfsvers = VFSTONFS(mp)->nm_vers; - - nhpp = NFSNOHASH(nfs_hash(fhp, fhsize)); + cn_namelen = cnp ? cnp->cn_namelen : 0; + nfshash = nfs_hash(fhp, fhsize); loop: lck_mtx_lock(nfs_node_hash_mutex); + nhpp = NFSNOHASH(nfshash); for (np = nhpp->lh_first; np != 0; np = np->n_hash.le_next) { mp2 = (np->n_hflag & NHINIT) ? np->n_mount : NFSTOMP(np); if (mp != mp2 || np->n_fhsize != fhsize || - bcmp(fhp, np->n_fhp, fhsize)) + bcmp(fhp, np->n_fhp, fhsize)) { continue; + } + if (nvap && (nvap->nva_flags & NFS_FFLAG_TRIGGER_REFERRAL) && + cnp && (cn_namelen > (fhsize - sizeof(dnp)))) { + /* The name was too long to fit in the file handle. Check it against the node's name. */ + int namecmp = 0; + const char *vname = vnode_getname(NFSTOV(np)); + if (vname) { + if (cn_namelen != strlen(vname)) { + namecmp = 1; + } else { + namecmp = strncmp(vname, cnp->cn_nameptr, cn_namelen); + } + vnode_putname(vname); + } + if (namecmp) { /* full name didn't match */ + continue; + } + } FSDBG(263, dnp, np, np->n_flag, 0xcace0000); - /* if the node is locked, sleep on it */ - if (np->n_hflag & NHLOCKED) { + /* if the node is being initialized or locked, sleep on it */ + if ((np->n_hflag & NHINIT) || ((np->n_hflag & NHLOCKED) && !(flags & NG_NOCREATE))) { np->n_hflag |= NHLOCKWANT; FSDBG(263, dnp, np, np->n_flag, 0xcace2222); msleep(np, nfs_node_hash_mutex, PDROP | PINOD, "nfs_nget", NULL); @@ -190,67 +269,178 @@ loop: * changed identity, no need to wait. */ FSDBG_BOT(263, dnp, *npp, 0xcace0d1e, error); - return (error); + return error; } - if ((error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE))) { + if ((error = nfs_node_lock(np))) { /* this only fails if the node is now unhashed */ /* so let's see if we can find/create it again */ FSDBG(263, dnp, *npp, 0xcaced1e2, error); vnode_put(vp); + if (flags & NG_NOCREATE) { + *npp = 0; + FSDBG_BOT(263, dnp, *npp, 0xcaced1e0, ENOENT); + return ENOENT; + } goto loop; } /* update attributes */ - error = nfs_loadattrcache(np, nvap, xidp, 0); + if (nvap) { + error = nfs_loadattrcache(np, nvap, xidp, 0); + } if (error) { - nfs_unlock(np); + nfs_node_unlock(np); vnode_put(vp); } else { - if (dnp && cnp && (flags & NG_MAKEENTRY)) + if (dnp && cnp && (flags & NG_MAKEENTRY)) { cache_enter(NFSTOV(dnp), vp, cnp); + } + /* + * Update the vnode if the name/and or the parent has + * changed. We need to do this so that if getattrlist is + * called asking for ATTR_CMN_NAME, that the "most" + * correct name is being returned. In addition for + * monitored vnodes we need to kick the vnode out of the + * name cache. We do this so that if there are hard + * links in the same directory the link will not be + * found and a lookup will get us here to return the + * name of the current link. In addition by removing the + * name from the name cache the old name will not be + * found after a rename done on another client or the + * server. The principle reason to do this is because + * Finder is asking for notifications on a directory. + * The directory changes, Finder gets notified, reads + * the directory (which we have purged) and for each + * entry returned calls getattrlist with the name + * returned from readdir. gettattrlist has to call + * namei/lookup to resolve the name, because its not in + * the cache we end up here. We need to update the name + * so Finder will get the name it called us with. + * + * We had an imperfect solution with respect to case + * sensitivity. There is a test that is run in + * FileBuster that does renames from some name to + * another name differing only in case. It then reads + * the directory looking for the new name, after it + * finds that new name, it ask gettattrlist to verify + * that the name is the new name. Usually that works, + * but renames generate fsevents and fseventsd will do a + * lookup on the name via lstat. Since that test renames + * old name to new name back and forth there is a race + * that an fsevent will be behind and will access the + * file by the old name, on a case insensitive file + * system that will work. Problem is if we do a case + * sensitive compare, we're going to change the name, + * which the test's getattrlist verification step is + * going to fail. So we will check the case sensitivity + * of the file system and do the appropriate compare. In + * a rare instance for non homogeneous file systems + * w.r.t. pathconf we will use case sensitive compares. + * That could break if the file system is actually case + * insensitive. + * + * Note that V2 does not know the case, so we just + * assume case sensitivity. + * + * This is clearly not perfect due to races, but this is + * as good as its going to get. You can defeat the + * handling of hard links simply by doing: + * + * while :; do ls -l > /dev/null; done + * + * in a terminal window. Even a single ls -l can cause a + * race. + * + * What we really need is for the caller, that + * knows the name being used is valid since it got it + * from a readdir to use that name and not ask for the + * ATTR_CMN_NAME + */ + if (dnp && cnp && (vp != NFSTOV(dnp))) { + int update_flags = (vnode_ismonitored((NFSTOV(dnp)))) ? VNODE_UPDATE_CACHE : 0; + int (*cmp)(const char *s1, const char *s2, size_t n); + + cmp = nfs_case_insensitive(mp) ? strncasecmp : strncmp; + + if (vp->v_name && cn_namelen != strnlen(vp->v_name, MAXPATHLEN)) { + update_flags |= VNODE_UPDATE_NAME; + } + if (vp->v_name && cn_namelen && (*cmp)(cnp->cn_nameptr, vp->v_name, cn_namelen)) { + update_flags |= VNODE_UPDATE_NAME; + } + if ((vp->v_name == NULL && cn_namelen != 0) || (vp->v_name != NULL && cn_namelen == 0)) { + update_flags |= VNODE_UPDATE_NAME; + } + if (vnode_parent(vp) != NFSTOV(dnp)) { + update_flags |= VNODE_UPDATE_PARENT; + } + if (update_flags) { + NFS_NODE_DBG("vnode_update_identity old name %s new name %.*s update flags = %x\n", + vp->v_name, cn_namelen, cnp->cn_nameptr ? cnp->cn_nameptr : "", update_flags); + vnode_update_identity(vp, NFSTOV(dnp), cnp->cn_nameptr, cn_namelen, 0, update_flags); + } + } + *npp = np; } FSDBG_BOT(263, dnp, *npp, 0xcace0000, error); - return(error); + return error; } FSDBG(263, mp, dnp, npp, 0xaaaaaaaa); + if (flags & NG_NOCREATE) { + lck_mtx_unlock(nfs_node_hash_mutex); + *npp = 0; + FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOENT); + return ENOENT; + } + /* * allocate and initialize nfsnode and stick it in the hash * before calling getnewvnode(). Anyone finding it in the * hash before initialization is complete will wait for it. */ - MALLOC_ZONE(np, nfsnode_t, sizeof *np, M_NFSNODE, M_WAITOK); - if (!np) { - lck_mtx_unlock(nfs_node_hash_mutex); - *npp = 0; - FSDBG_BOT(263, dnp, *npp, 0x80000001, ENOMEM); - return (ENOMEM); - } - bzero(np, sizeof *np); + np = zalloc_flags(nfsnode_zone, Z_WAITOK | Z_ZERO); np->n_hflag |= (NHINIT | NHLOCKED); np->n_mount = mp; + np->n_auth = auth; + TAILQ_INIT(&np->n_opens); + TAILQ_INIT(&np->n_lock_owners); + TAILQ_INIT(&np->n_locks); + np->n_dlink.tqe_next = NFSNOLIST; + np->n_dreturn.tqe_next = NFSNOLIST; + np->n_monlink.le_next = NFSNOLIST; + + /* ugh... need to keep track of ".zfs" directories to workaround server bugs */ + if ((nvap->nva_type == VDIR) && cnp && (cn_namelen == 4) && + (cnp->cn_nameptr[0] == '.') && (cnp->cn_nameptr[1] == 'z') && + (cnp->cn_nameptr[2] == 'f') && (cnp->cn_nameptr[3] == 's')) { + np->n_flag |= NISDOTZFS; + } + if (dnp && (dnp->n_flag & NISDOTZFS)) { + np->n_flag |= NISDOTZFSCHILD; + } - if (dnp && cnp && ((cnp->cn_namelen != 2) || + if (dnp && cnp && ((cn_namelen != 2) || (cnp->cn_nameptr[0] != '.') || (cnp->cn_nameptr[1] != '.'))) { vnode_t dvp = NFSTOV(dnp); if (!vnode_get(dvp)) { - if (!vnode_ref(dvp)) + if (!vnode_ref(dvp)) { np->n_parent = dvp; + } vnode_put(dvp); } } /* setup node's file handle */ if (fhsize > NFS_SMALLFH) { - MALLOC_ZONE(np->n_fhp, u_char *, - fhsize, M_NFSBIGFH, M_WAITOK); + MALLOC(np->n_fhp, u_char *, fhsize, M_NFSBIGFH, M_WAITOK); if (!np->n_fhp) { lck_mtx_unlock(nfs_node_hash_mutex); - FREE_ZONE(np, sizeof *np, M_NFSNODE); + NFS_ZFREE(nfsnode_zone, np); *npp = 0; FSDBG_BOT(263, dnp, *npp, 0x80000002, ENOMEM); - return (ENOMEM); + return ENOMEM; } } else { np->n_fhp = &np->n_fh[0]; @@ -264,21 +454,24 @@ loop: FSDBG(266, 0, np, np->n_flag, np->n_hflag); /* lock the new nfsnode */ - lck_rw_init(&np->n_lock, nfs_node_lck_grp, LCK_ATTR_NULL); - lck_rw_init(&np->n_datalock, nfs_node_lck_grp, LCK_ATTR_NULL); - nfs_lock(np, NFS_NODE_LOCK_FORCE); + lck_mtx_init(&np->n_lock, nfs_node_lck_grp, LCK_ATTR_NULL); + lck_rw_init(&np->n_datalock, nfs_data_lck_grp, LCK_ATTR_NULL); + lck_mtx_init(&np->n_openlock, nfs_open_grp, LCK_ATTR_NULL); + lck_mtx_lock(&np->n_lock); /* release lock on hash table */ lck_mtx_unlock(nfs_node_hash_mutex); /* do initial loading of attributes */ + NACLINVALIDATE(np); + NACCESSINVALIDATE(np); error = nfs_loadattrcache(np, nvap, xidp, 1); if (error) { FSDBG(266, 0, np, np->n_flag, 0xb1eb1e); - nfs_unlock(np); + nfs_node_unlock(np); lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); - np->n_hflag &= ~(NHHASHED|NHINIT|NHLOCKED); + np->n_hflag &= ~(NHHASHED | NHINIT | NHLOCKED); if (np->n_hflag & NHLOCKWANT) { np->n_hflag &= ~NHLOCKWANT; wakeup(np); @@ -291,19 +484,21 @@ loop: } np->n_parent = NULL; } - lck_rw_destroy(&np->n_lock, nfs_node_lck_grp); - lck_rw_destroy(&np->n_datalock, nfs_node_lck_grp); - if (np->n_fhsize > NFS_SMALLFH) - FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); - FREE_ZONE(np, sizeof *np, M_NFSNODE); + lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp); + lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp); + lck_mtx_destroy(&np->n_openlock, nfs_open_grp); + if (np->n_fhsize > NFS_SMALLFH) { + FREE(np->n_fhp, M_NFSBIGFH); + } + NFS_ZFREE(nfsnode_zone, np); *npp = 0; FSDBG_BOT(263, dnp, *npp, 0x80000003, error); - return (error); + return error; } NFS_CHANGED_UPDATE(nfsvers, np, nvap); - if (nvap->nva_type == VDIR) + if (nvap->nva_type == VDIR) { NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap); - NMODEINVALIDATE(np); + } /* now, attempt to get a new vnode */ vfsp.vnfs_mp = mp; @@ -311,26 +506,31 @@ loop: vfsp.vnfs_str = "nfs"; vfsp.vnfs_dvp = dnp ? NFSTOV(dnp) : NULL; vfsp.vnfs_fsnode = np; +#if CONFIG_NFS4 if (nfsvers == NFS_VER4) { #if FIFO - if (nvap->nva_type == VFIFO) + if (nvap->nva_type == VFIFO) { vfsp.vnfs_vops = fifo_nfsv4nodeop_p; - else + } else #endif /* FIFO */ - if (nvap->nva_type == VBLK || nvap->nva_type == VCHR) + if (nvap->nva_type == VBLK || nvap->nva_type == VCHR) { vfsp.vnfs_vops = spec_nfsv4nodeop_p; - else + } else { vfsp.vnfs_vops = nfsv4_vnodeop_p; - } else { + } + } else +#endif /* CONFIG_NFS4 */ + { #if FIFO - if (nvap->nva_type == VFIFO) + if (nvap->nva_type == VFIFO) { vfsp.vnfs_vops = fifo_nfsv2nodeop_p; - else + } else #endif /* FIFO */ - if (nvap->nva_type == VBLK || nvap->nva_type == VCHR) + if (nvap->nva_type == VBLK || nvap->nva_type == VCHR) { vfsp.vnfs_vops = spec_nfsv2nodeop_p; - else + } else { vfsp.vnfs_vops = nfsv2_vnodeop_p; + } } vfsp.vnfs_markroot = (flags & NG_MARKROOT) ? 1 : 0; vfsp.vnfs_marksystem = 0; @@ -338,16 +538,34 @@ loop: vfsp.vnfs_filesize = nvap->nva_size; vfsp.vnfs_cnp = cnp; vfsp.vnfs_flags = VNFS_ADDFSREF; - if (!dnp || !cnp || !(flags & NG_MAKEENTRY)) + if (!dnp || !cnp || !(flags & NG_MAKEENTRY)) { vfsp.vnfs_flags |= VNFS_NOCACHE; + } - error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode); +#if CONFIG_TRIGGERS + if (((nfsvers >= NFS_VER4) + ) + && (nvap->nva_type == VDIR) && (np->n_vattr.nva_flags & NFS_FFLAG_TRIGGER) + && !(flags & NG_MARKROOT)) { + struct vnode_trigger_param vtp; + bzero(&vtp, sizeof(vtp)); + bcopy(&vfsp, &vtp.vnt_params, sizeof(vfsp)); + vtp.vnt_resolve_func = nfs_mirror_mount_trigger_resolve; + vtp.vnt_unresolve_func = nfs_mirror_mount_trigger_unresolve; + vtp.vnt_rearm_func = nfs_mirror_mount_trigger_rearm; + vtp.vnt_flags = VNT_AUTO_REARM | VNT_KERN_RESOLVE; + error = vnode_create(VNCREATE_TRIGGER, VNCREATE_TRIGGER_SIZE, &vtp, &np->n_vnode); + } else +#endif + { + error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &np->n_vnode); + } if (error) { FSDBG(266, 0, np, np->n_flag, 0xb1eb1e); - nfs_unlock(np); + nfs_node_unlock(np); lck_mtx_lock(nfs_node_hash_mutex); LIST_REMOVE(np, n_hash); - np->n_hflag &= ~(NHHASHED|NHINIT|NHLOCKED); + np->n_hflag &= ~(NHHASHED | NHINIT | NHLOCKED); if (np->n_hflag & NHLOCKWANT) { np->n_hflag &= ~NHLOCKWANT; wakeup(np); @@ -360,14 +578,16 @@ loop: } np->n_parent = NULL; } - lck_rw_destroy(&np->n_lock, nfs_node_lck_grp); - lck_rw_destroy(&np->n_datalock, nfs_node_lck_grp); - if (np->n_fhsize > NFS_SMALLFH) - FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); - FREE_ZONE(np, sizeof *np, M_NFSNODE); + lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp); + lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp); + lck_mtx_destroy(&np->n_openlock, nfs_open_grp); + if (np->n_fhsize > NFS_SMALLFH) { + FREE(np->n_fhp, M_NFSBIGFH); + } + NFS_ZFREE(nfsnode_zone, np); *npp = 0; FSDBG_BOT(263, dnp, *npp, 0x80000004, error); - return (error); + return error; } vp = np->n_vnode; vnode_settag(vp, VT_NFS); @@ -375,7 +595,7 @@ loop: /* check if anyone's waiting on this node */ lck_mtx_lock(nfs_node_hash_mutex); - np->n_hflag &= ~(NHINIT|NHLOCKED); + np->n_hflag &= ~(NHINIT | NHLOCKED); if (np->n_hflag & NHLOCKWANT) { np->n_hflag &= ~NHLOCKWANT; wakeup(np); @@ -385,34 +605,206 @@ loop: *npp = np; FSDBG_BOT(263, dnp, vp, *npp, error); - return (error); + return error; } int -nfs_vnop_inactive(ap) +nfs_vnop_inactive( struct vnop_inactive_args /* { - struct vnodeop_desc *a_desc; - vnode_t a_vp; - vfs_context_t a_context; - } */ *ap; + * struct vnodeop_desc *a_desc; + * vnode_t a_vp; + * vfs_context_t a_context; + * } */*ap) { - vnode_t vp; + vnode_t vp = ap->a_vp; + vfs_context_t ctx = ap->a_context; nfsnode_t np; struct nfs_sillyrename *nsp; - struct nfs_vattr nvattr; - int unhash, attrerr; + struct nfs_vattr *nvattr; + int unhash, attrerr, busyerror, error, inuse, busied, force; + struct nfs_open_file *nofp; + struct componentname cn; + struct nfsmount *nmp; + mount_t mp; + + if (vp == NULL) { + panic("nfs_vnop_inactive: vp == NULL"); + } + np = VTONFS(vp); + if (np == NULL) { + panic("nfs_vnop_inactive: np == NULL"); + } + + nmp = NFSTONMP(np); + mp = vnode_mount(vp); + MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK); + +restart: + force = (!mp || vfs_isforce(mp)); + error = 0; + inuse = (nfs_mount_state_in_use_start(nmp, NULL) == 0); + + /* There shouldn't be any open or lock state at this point */ + lck_mtx_lock(&np->n_openlock); + if (np->n_openrefcnt && !force) { + /* + * vnode_rele and vnode_put drop the vnode lock before + * calling VNOP_INACTIVE, so there is a race were the + * vnode could become active again. Perhaps there are + * other places where this can happen, so if we've got + * here we need to get out. + */ +#ifdef NFS_NODE_DEBUG + NP(np, "nfs_vnop_inactive: still open: %d", np->n_openrefcnt); +#endif + lck_mtx_unlock(&np->n_openlock); + if (inuse) { + nfs_mount_state_in_use_end(nmp, 0); + } + goto out_free; + } + + TAILQ_FOREACH(nofp, &np->n_opens, nof_link) { + lck_mtx_lock(&nofp->nof_lock); + if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) { + if (!force) { + NP(np, "nfs_vnop_inactive: open file busy"); + } + busied = 0; + } else { + nofp->nof_flags |= NFS_OPEN_FILE_BUSY; + busied = 1; + } + lck_mtx_unlock(&nofp->nof_lock); + if ((np->n_flag & NREVOKE) || (nofp->nof_flags & NFS_OPEN_FILE_LOST)) { + if (busied) { + nfs_open_file_clear_busy(nofp); + } + continue; + } + /* + * If we just created the file, we already had it open in + * anticipation of getting a subsequent open call. If the + * node has gone inactive without being open, we need to + * clean up (close) the open done in the create. + */ +#if CONFIG_NFS4 + if ((nofp->nof_flags & NFS_OPEN_FILE_CREATE) && nofp->nof_creator && !force) { + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + lck_mtx_unlock(&np->n_openlock); + if (busied) { + nfs_open_file_clear_busy(nofp); + } + if (!nfs4_reopen(nofp, NULL)) { + if (inuse) { + nfs_mount_state_in_use_end(nmp, 0); + } + goto restart; + } + } + nofp->nof_flags &= ~NFS_OPEN_FILE_CREATE; + lck_mtx_unlock(&np->n_openlock); + error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_BOTH, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (error) { + NP(np, "nfs_vnop_inactive: create close error: %d", error); + nofp->nof_flags |= NFS_OPEN_FILE_CREATE; + } + if (busied) { + nfs_open_file_clear_busy(nofp); + } + if (inuse) { + nfs_mount_state_in_use_end(nmp, error); + } + goto restart; + } +#endif + if (nofp->nof_flags & NFS_OPEN_FILE_NEEDCLOSE) { + /* + * If the file is marked as needing reopen, but this was the only + * open on the file, just drop the open. + */ + nofp->nof_flags &= ~NFS_OPEN_FILE_NEEDCLOSE; + if ((nofp->nof_flags & NFS_OPEN_FILE_REOPEN) && (nofp->nof_opencnt == 1)) { + nofp->nof_flags &= ~NFS_OPEN_FILE_REOPEN; + nofp->nof_r--; + nofp->nof_opencnt--; + nofp->nof_access = 0; + } else if (!force) { + lck_mtx_unlock(&np->n_openlock); + if (nofp->nof_flags & NFS_OPEN_FILE_REOPEN) { + int should_restart = 0; + if (busied) { + nfs_open_file_clear_busy(nofp); + } +#if CONFIG_NFS4 + if (!nfs4_reopen(nofp, NULL)) { + should_restart = 1; + } +#endif + if (should_restart) { + if (inuse) { + nfs_mount_state_in_use_end(nmp, 0); + } + goto restart; + } + } + error = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_READ, NFS_OPEN_SHARE_DENY_NONE, ctx); + if (error) { + NP(np, "nfs_vnop_inactive: need close error: %d", error); + nofp->nof_flags |= NFS_OPEN_FILE_NEEDCLOSE; + } + if (busied) { + nfs_open_file_clear_busy(nofp); + } + if (inuse) { + nfs_mount_state_in_use_end(nmp, error); + } + goto restart; + } + } + if (nofp->nof_opencnt && !force) { + NP(np, "nfs_vnop_inactive: file still open: %d", nofp->nof_opencnt); + } + if (!force && (nofp->nof_access || nofp->nof_deny || + nofp->nof_mmap_access || nofp->nof_mmap_deny || + nofp->nof_r || nofp->nof_w || nofp->nof_rw || + nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw || + nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw || + nofp->nof_d_r || nofp->nof_d_w || nofp->nof_d_rw || + nofp->nof_d_r_dw || nofp->nof_d_w_dw || nofp->nof_d_rw_dw || + nofp->nof_d_r_drw || nofp->nof_d_w_drw || nofp->nof_d_rw_drw)) { + NP(np, "nfs_vnop_inactive: non-zero access: %d %d %d %d # %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u", + nofp->nof_access, nofp->nof_deny, + nofp->nof_mmap_access, nofp->nof_mmap_deny, + nofp->nof_r, nofp->nof_d_r, + nofp->nof_w, nofp->nof_d_w, + nofp->nof_rw, nofp->nof_d_rw, + nofp->nof_r_dw, nofp->nof_d_r_dw, + nofp->nof_w_dw, nofp->nof_d_w_dw, + nofp->nof_rw_dw, nofp->nof_d_rw_dw, + nofp->nof_r_drw, nofp->nof_d_r_drw, + nofp->nof_w_drw, nofp->nof_d_w_drw, + nofp->nof_rw_drw, nofp->nof_d_rw_drw); + } + if (busied) { + nfs_open_file_clear_busy(nofp); + } + } + lck_mtx_unlock(&np->n_openlock); - vp = ap->a_vp; - np = VTONFS(ap->a_vp); + if (inuse && nfs_mount_state_in_use_end(nmp, error)) { + goto restart; + } - nfs_lock(np, NFS_NODE_LOCK_FORCE); + nfs_node_lock_force(np); if (vnode_vtype(vp) != VDIR) { nsp = np->n_sillyrename; np->n_sillyrename = NULL; - } else + } else { nsp = NULL; + } FSDBG_TOP(264, vp, np, np->n_flag, nsp); @@ -420,34 +812,31 @@ nfs_vnop_inactive(ap) /* no silly file to clean up... */ /* clear all flags other than these */ np->n_flag &= (NMODIFIED); - nfs_unlock(np); + nfs_node_unlock(np); FSDBG_BOT(264, vp, np, np->n_flag, 0); - return (0); + goto out_free; } + nfs_node_unlock(np); /* Remove the silly file that was rename'd earlier */ /* flush all the buffers */ - nfs_unlock(np); - nfs_vinvalbuf2(vp, V_SAVE, vfs_context_thread(ap->a_context), nsp->nsr_cred, 1); - - /* purge the name cache to deter others from finding it */ - cache_purge(vp); + nfs_vinvalbuf2(vp, V_SAVE, vfs_context_thread(ctx), nsp->nsr_cred, 1); /* try to get the latest attributes */ - attrerr = nfs_getattr(np, &nvattr, ap->a_context, 0); + attrerr = nfs_getattr(np, nvattr, ctx, NGA_UNCACHED); /* Check if we should remove it from the node hash. */ /* Leave it if inuse or it has multiple hard links. */ - if (vnode_isinuse(vp, 0) || (!attrerr && (nvattr.nva_nlink > 1))) { + if (vnode_isinuse(vp, 0) || (!attrerr && (nvattr->nva_nlink > 1))) { unhash = 0; } else { unhash = 1; ubc_setsize(vp, 0); } - /* grab node lock on this node and the directory */ - nfs_lock2(nsp->nsr_dnp, np, NFS_NODE_LOCK_FORCE); + /* mark this node and the directory busy while we do the remove */ + busyerror = nfs_node_set_busy2(nsp->nsr_dnp, np, vfs_context_thread(ctx)); /* lock the node while we remove the silly file */ lck_mtx_lock(nfs_node_hash_mutex); @@ -458,8 +847,11 @@ nfs_vnop_inactive(ap) np->n_hflag |= NHLOCKED; lck_mtx_unlock(nfs_node_hash_mutex); - /* purge again in case it was looked up while we were locking */ - cache_purge(vp); + /* purge the name cache to deter others from finding it */ + bzero(&cn, sizeof(cn)); + cn.cn_nameptr = nsp->nsr_name; + cn.cn_namelen = nsp->nsr_namlen; + nfs_name_cache_purge(nsp->nsr_dnp, np, &cn, ctx); FSDBG(264, np, np->n_size, np->n_vattr.nva_size, 0xf00d00f1); @@ -467,8 +859,13 @@ nfs_vnop_inactive(ap) nfs_removeit(nsp); /* clear all flags other than these */ + nfs_node_lock_force(np); np->n_flag &= (NMODIFIED); - nfs_unlock2(nsp->nsr_dnp, np); + nfs_node_unlock(np); + + if (!busyerror) { + nfs_node_clear_busy2(nsp->nsr_dnp, np); + } if (unhash && vnode_isinuse(vp, 0)) { /* vnode now inuse after silly remove? */ @@ -499,36 +896,185 @@ nfs_vnop_inactive(ap) lck_mtx_unlock(nfs_node_hash_mutex); /* cleanup sillyrename info */ - if (nsp->nsr_cred != NOCRED) + if (nsp->nsr_cred != NOCRED) { kauth_cred_unref(&nsp->nsr_cred); + } vnode_rele(NFSTOV(nsp->nsr_dnp)); - FREE_ZONE(nsp, sizeof(*nsp), M_NFSREQ); - + FREE(nsp, M_TEMP); FSDBG_BOT(264, vp, np, np->n_flag, 0); - return (0); +out_free: + FREE(nvattr, M_TEMP); + return 0; } /* * Reclaim an nfsnode so that it can be used for other purposes. */ int -nfs_vnop_reclaim(ap) +nfs_vnop_reclaim( struct vnop_reclaim_args /* { - struct vnodeop_desc *a_desc; - vnode_t a_vp; - vfs_context_t a_context; - } */ *ap; + * struct vnodeop_desc *a_desc; + * vnode_t a_vp; + * vfs_context_t a_context; + * } */*ap) { vnode_t vp = ap->a_vp; nfsnode_t np = VTONFS(vp); - struct nfsdmap *dp, *dp2; + struct nfs_open_file *nofp, *nextnofp; + struct nfs_file_lock *nflp, *nextnflp; + struct nfs_lock_owner *nlop, *nextnlop; + struct nfsmount *nmp = np->n_mount ? VFSTONFS(np->n_mount) : NFSTONMP(np); + mount_t mp = vnode_mount(vp); + int force; FSDBG_TOP(265, vp, np, np->n_flag, 0); + force = (!mp || vfs_isforce(mp) || nfs_mount_gone(nmp)); + + + /* There shouldn't be any open or lock state at this point */ + lck_mtx_lock(&np->n_openlock); + +#if CONFIG_NFS4 + if (nmp && (nmp->nm_vers >= NFS_VER4)) { + /* need to drop a delegation */ + if (np->n_dreturn.tqe_next != NFSNOLIST) { + /* remove this node from the delegation return list */ + lck_mtx_lock(&nmp->nm_lock); + if (np->n_dreturn.tqe_next != NFSNOLIST) { + TAILQ_REMOVE(&nmp->nm_dreturnq, np, n_dreturn); + np->n_dreturn.tqe_next = NFSNOLIST; + } + lck_mtx_unlock(&nmp->nm_lock); + } + if (np->n_dlink.tqe_next != NFSNOLIST) { + /* remove this node from the delegation list */ + lck_mtx_lock(&nmp->nm_lock); + if (np->n_dlink.tqe_next != NFSNOLIST) { + TAILQ_REMOVE(&nmp->nm_delegations, np, n_dlink); + np->n_dlink.tqe_next = NFSNOLIST; + } + lck_mtx_unlock(&nmp->nm_lock); + } + if ((np->n_openflags & N_DELEG_MASK) && !force) { + /* try to return the delegation */ + np->n_openflags &= ~N_DELEG_MASK; + } + if (np->n_attrdirfh) { + FREE(np->n_attrdirfh, M_TEMP); + np->n_attrdirfh = NULL; + } + } +#endif + + /* clean up file locks */ + TAILQ_FOREACH_SAFE(nflp, &np->n_locks, nfl_link, nextnflp) { + if (!(nflp->nfl_flags & NFS_FILE_LOCK_DEAD) && !force) { + NP(np, "nfs_vnop_reclaim: lock 0x%llx 0x%llx 0x%x (bc %d)", + nflp->nfl_start, nflp->nfl_end, nflp->nfl_flags, nflp->nfl_blockcnt); + } + if (!(nflp->nfl_flags & (NFS_FILE_LOCK_BLOCKED | NFS_FILE_LOCK_DEAD))) { + /* try sending an unlock RPC if it wasn't delegated */ + if (!(nflp->nfl_flags & NFS_FILE_LOCK_DELEGATED) && !force) { + nmp->nm_funcs->nf_unlock_rpc(np, nflp->nfl_owner, F_WRLCK, nflp->nfl_start, nflp->nfl_end, R_RECOVER, + NULL, nflp->nfl_owner->nlo_open_owner->noo_cred); + } + lck_mtx_lock(&nflp->nfl_owner->nlo_lock); + TAILQ_REMOVE(&nflp->nfl_owner->nlo_locks, nflp, nfl_lolink); + lck_mtx_unlock(&nflp->nfl_owner->nlo_lock); + } + TAILQ_REMOVE(&np->n_locks, nflp, nfl_link); + nfs_file_lock_destroy(nflp); + } + /* clean up lock owners */ + TAILQ_FOREACH_SAFE(nlop, &np->n_lock_owners, nlo_link, nextnlop) { + if (!TAILQ_EMPTY(&nlop->nlo_locks) && !force) { + NP(np, "nfs_vnop_reclaim: lock owner with locks"); + } + TAILQ_REMOVE(&np->n_lock_owners, nlop, nlo_link); + nfs_lock_owner_destroy(nlop); + } + /* clean up open state */ + if (np->n_openrefcnt && !force) { + NP(np, "nfs_vnop_reclaim: still open: %d", np->n_openrefcnt); + } + TAILQ_FOREACH_SAFE(nofp, &np->n_opens, nof_link, nextnofp) { + if (nofp->nof_flags & NFS_OPEN_FILE_BUSY) { + NP(np, "nfs_vnop_reclaim: open file busy"); + } + if (!(np->n_flag & NREVOKE) && !(nofp->nof_flags & NFS_OPEN_FILE_LOST)) { + if (nofp->nof_opencnt && !force) { + NP(np, "nfs_vnop_reclaim: file still open: %d", nofp->nof_opencnt); + } + if (!force && (nofp->nof_access || nofp->nof_deny || + nofp->nof_mmap_access || nofp->nof_mmap_deny || + nofp->nof_r || nofp->nof_w || nofp->nof_rw || + nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw || + nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw || + nofp->nof_d_r || nofp->nof_d_w || nofp->nof_d_rw || + nofp->nof_d_r_dw || nofp->nof_d_w_dw || nofp->nof_d_rw_dw || + nofp->nof_d_r_drw || nofp->nof_d_w_drw || nofp->nof_d_rw_drw)) { + NP(np, "nfs_vnop_reclaim: non-zero access: %d %d %d %d # %u.%u %u.%u %u.%u dw %u.%u %u.%u %u.%u drw %u.%u %u.%u %u.%u", + nofp->nof_access, nofp->nof_deny, + nofp->nof_mmap_access, nofp->nof_mmap_deny, + nofp->nof_r, nofp->nof_d_r, + nofp->nof_w, nofp->nof_d_w, + nofp->nof_rw, nofp->nof_d_rw, + nofp->nof_r_dw, nofp->nof_d_r_dw, + nofp->nof_w_dw, nofp->nof_d_w_dw, + nofp->nof_rw_dw, nofp->nof_d_rw_dw, + nofp->nof_r_drw, nofp->nof_d_r_drw, + nofp->nof_w_drw, nofp->nof_d_w_drw, + nofp->nof_rw_drw, nofp->nof_d_rw_drw); +#if CONFIG_NFS4 + /* try sending a close RPC if it wasn't delegated */ + if (nofp->nof_r || nofp->nof_w || nofp->nof_rw || + nofp->nof_r_dw || nofp->nof_w_dw || nofp->nof_rw_dw || + nofp->nof_r_drw || nofp->nof_w_drw || nofp->nof_rw_drw) { + nfs4_close_rpc(np, nofp, NULL, nofp->nof_owner->noo_cred, R_RECOVER); + } +#endif + } + } + TAILQ_REMOVE(&np->n_opens, nofp, nof_link); + nfs_open_file_destroy(nofp); + } + lck_mtx_unlock(&np->n_openlock); + + if (np->n_monlink.le_next != NFSNOLIST) { + /* Wait for any in-progress getattr to complete, */ + /* then remove this node from the monitored node list. */ + lck_mtx_lock(&nmp->nm_lock); + while (np->n_mflag & NMMONSCANINPROG) { + struct timespec ts = { .tv_sec = 1, .tv_nsec = 0 }; + np->n_mflag |= NMMONSCANWANT; + msleep(&np->n_mflag, &nmp->nm_lock, PZERO - 1, "nfswaitmonscan", &ts); + } + if (np->n_monlink.le_next != NFSNOLIST) { + LIST_REMOVE(np, n_monlink); + np->n_monlink.le_next = NFSNOLIST; + } + lck_mtx_unlock(&nmp->nm_lock); + } + + lck_mtx_lock(nfs_buf_mutex); + if (!force && (!LIST_EMPTY(&np->n_dirtyblkhd) || !LIST_EMPTY(&np->n_cleanblkhd))) { + NP(np, "nfs_reclaim: dropping %s buffers", (!LIST_EMPTY(&np->n_dirtyblkhd) ? "dirty" : "clean")); + } + lck_mtx_unlock(nfs_buf_mutex); + nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ap->a_context, 0); lck_mtx_lock(nfs_node_hash_mutex); - if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) - printf("nfs_reclaim: leaving unlinked file %s\n", np->n_sillyrename->nsr_name); + if ((vnode_vtype(vp) != VDIR) && np->n_sillyrename) { + if (!force) { + NP(np, "nfs_reclaim: leaving unlinked file %s", np->n_sillyrename->nsr_name); + } + if (np->n_sillyrename->nsr_cred != NOCRED) { + kauth_cred_unref(&np->n_sillyrename->nsr_cred); + } + vnode_rele(NFSTOV(np->n_sillyrename->nsr_dnp)); + FREE(np->n_sillyrename, M_TEMP); + } vnode_removefsref(vp); @@ -540,25 +1086,20 @@ nfs_vnop_reclaim(ap) lck_mtx_unlock(nfs_node_hash_mutex); /* - * Free up any directory cookie structures and - * large file handle structures that might be associated with - * this nfs node. + * Free up any directory cookie structures and large file handle + * structures that might be associated with this nfs node. */ - nfs_lock(np, NFS_NODE_LOCK_FORCE); - if (vnode_vtype(vp) == VDIR) { - dp = np->n_cookies.lh_first; - while (dp) { - dp2 = dp; - dp = dp->ndm_list.le_next; - FREE_ZONE((caddr_t)dp2, - sizeof (struct nfsdmap), M_NFSDIROFF); - } + nfs_node_lock_force(np); + if ((vnode_vtype(vp) == VDIR) && np->n_cookiecache) { + NFS_ZFREE(ZV_NFSDIROFF, np->n_cookiecache); } if (np->n_fhsize > NFS_SMALLFH) { - FREE_ZONE(np->n_fhp, np->n_fhsize, M_NFSBIGFH); + FREE(np->n_fhp, M_NFSBIGFH); } - - nfs_unlock(np); + if (np->n_vattr.nva_acl) { + kauth_acl_free(np->n_vattr.nva_acl); + } + nfs_node_unlock(np); vnode_clearfsnode(vp); if (np->n_parent) { @@ -569,167 +1110,220 @@ nfs_vnop_reclaim(ap) np->n_parent = NULL; } - lck_rw_destroy(&np->n_lock, nfs_node_lck_grp); - lck_rw_destroy(&np->n_datalock, nfs_node_lck_grp); + lck_mtx_destroy(&np->n_lock, nfs_node_lck_grp); + lck_rw_destroy(&np->n_datalock, nfs_data_lck_grp); + lck_mtx_destroy(&np->n_openlock, nfs_open_grp); FSDBG_BOT(265, vp, np, np->n_flag, 0xd1ed1e); - FREE_ZONE(np, sizeof(struct nfsnode), M_NFSNODE); - return (0); + NFS_ZFREE(nfsnode_zone, np); + return 0; } /* * Acquire an NFS node lock */ + int -nfs_lock(nfsnode_t np, int locktype) +nfs_node_lock_internal(nfsnode_t np, int force) { - FSDBG_TOP(268, np, locktype, np->n_lockowner, 0); - if (locktype == NFS_NODE_LOCK_SHARED) { - lck_rw_lock_shared(&np->n_lock); - } else { - lck_rw_lock_exclusive(&np->n_lock); - np->n_lockowner = current_thread(); + FSDBG_TOP(268, np, force, 0, 0); + lck_mtx_lock(&np->n_lock); + if (!force && !(np->n_hflag && NHHASHED)) { + FSDBG_BOT(268, np, 0xdead, 0, 0); + lck_mtx_unlock(&np->n_lock); + return ENOENT; } - if ((locktype != NFS_NODE_LOCK_FORCE) && !(np->n_hflag && NHHASHED)) { - FSDBG_BOT(268, np, 0xdead, np->n_lockowner, 0); - nfs_unlock(np); - return (ENOENT); - } - FSDBG_BOT(268, np, locktype, np->n_lockowner, 0); - return (0); + FSDBG_BOT(268, np, force, 0, 0); + return 0; +} + +int +nfs_node_lock(nfsnode_t np) +{ + return nfs_node_lock_internal(np, 0); +} + +void +nfs_node_lock_force(nfsnode_t np) +{ + nfs_node_lock_internal(np, 1); } /* * Release an NFS node lock */ void -nfs_unlock(nfsnode_t np) +nfs_node_unlock(nfsnode_t np) { - FSDBG(269, np, np->n_lockowner, current_thread(), 0); - np->n_lockowner = NULL; - lck_rw_done(&np->n_lock); + FSDBG(269, np, current_thread(), 0, 0); + lck_mtx_unlock(&np->n_lock); } /* * Acquire 2 NFS node locks - * - locks taken in order given (assumed to be parent-child order) + * - locks taken in reverse address order * - both or neither of the locks are taken * - only one lock taken per node (dup nodes are skipped) */ int -nfs_lock2(nfsnode_t np1, nfsnode_t np2, int locktype) +nfs_node_lock2(nfsnode_t np1, nfsnode_t np2) { + nfsnode_t first, second; int error; - if ((error = nfs_lock(np1, locktype))) - return (error); - if (np1 == np2) - return (error); - if ((error = nfs_lock(np2, locktype))) - nfs_unlock(np1); - return (error); + first = (np1 > np2) ? np1 : np2; + second = (np1 > np2) ? np2 : np1; + if ((error = nfs_node_lock(first))) { + return error; + } + if (np1 == np2) { + return error; + } + if ((error = nfs_node_lock(second))) { + nfs_node_unlock(first); + } + return error; } -/* - * Unlock a couple of NFS nodes - */ void -nfs_unlock2(nfsnode_t np1, nfsnode_t np2) +nfs_node_unlock2(nfsnode_t np1, nfsnode_t np2) { - nfs_unlock(np1); - if (np1 != np2) - nfs_unlock(np2); + nfs_node_unlock(np1); + if (np1 != np2) { + nfs_node_unlock(np2); + } } /* - * Acquire 4 NFS node locks - * - fdnp/fnp and tdnp/tnp locks taken in order given - * - otherwise locks taken in node address order. - * - all or none of the locks are taken - * - only one lock taken per node (dup nodes are skipped) - * - some of the node pointers may be null + * Manage NFS node busy state. + * (Similar to NFS node locks above) */ int -nfs_lock4(nfsnode_t fdnp, nfsnode_t fnp, nfsnode_t tdnp, nfsnode_t tnp, int locktype) +nfs_node_set_busy(nfsnode_t np, thread_t thd) { - nfsnode_t list[4]; - int i, lcnt = 0, error; - - if (fdnp == tdnp) { - list[lcnt++] = fdnp; - } else if (fdnp->n_parent && (tdnp == VTONFS(fdnp->n_parent))) { - list[lcnt++] = tdnp; - list[lcnt++] = fdnp; - } else if (tdnp->n_parent && (fdnp == VTONFS(tdnp->n_parent))) { - list[lcnt++] = fdnp; - list[lcnt++] = tdnp; - } else if (fdnp < tdnp) { - list[lcnt++] = fdnp; - list[lcnt++] = tdnp; - } else { - list[lcnt++] = tdnp; - list[lcnt++] = fdnp; + struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 }; + int error; + + if ((error = nfs_node_lock(np))) { + return error; + } + while (ISSET(np->n_flag, NBUSY)) { + SET(np->n_flag, NBUSYWANT); + msleep(np, &np->n_lock, PZERO - 1, "nfsbusywant", &ts); + if ((error = nfs_sigintr(NFSTONMP(np), NULL, thd, 0))) { + break; + } + } + if (!error) { + SET(np->n_flag, NBUSY); } + nfs_node_unlock(np); + return error; +} - if (!tnp || (fnp == tnp) || (tnp == fdnp)) { - list[lcnt++] = fnp; - } else if (fnp < tnp) { - list[lcnt++] = fnp; - list[lcnt++] = tnp; - } else { - list[lcnt++] = tnp; - list[lcnt++] = fnp; +void +nfs_node_clear_busy(nfsnode_t np) +{ + int wanted; + + nfs_node_lock_force(np); + wanted = ISSET(np->n_flag, NBUSYWANT); + CLR(np->n_flag, NBUSY | NBUSYWANT); + nfs_node_unlock(np); + if (wanted) { + wakeup(np); + } +} + +int +nfs_node_set_busy2(nfsnode_t np1, nfsnode_t np2, thread_t thd) +{ + nfsnode_t first, second; + int error; + + first = (np1 > np2) ? np1 : np2; + second = (np1 > np2) ? np2 : np1; + if ((error = nfs_node_set_busy(first, thd))) { + return error; + } + if (np1 == np2) { + return error; + } + if ((error = nfs_node_set_busy(second, thd))) { + nfs_node_clear_busy(first); + } + return error; +} + +void +nfs_node_clear_busy2(nfsnode_t np1, nfsnode_t np2) +{ + nfs_node_clear_busy(np1); + if (np1 != np2) { + nfs_node_clear_busy(np2); } +} + +/* helper function to sort four nodes in reverse address order (no dupes) */ +static void +nfs_node_sort4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4, nfsnode_t *list, int *lcntp) +{ + nfsnode_t na[2], nb[2]; + int a, b, i, lcnt; + + /* sort pairs then merge */ + na[0] = (np1 > np2) ? np1 : np2; + na[1] = (np1 > np2) ? np2 : np1; + nb[0] = (np3 > np4) ? np3 : np4; + nb[1] = (np3 > np4) ? np4 : np3; + for (a = b = i = lcnt = 0; i < 4; i++) { + if (a >= 2) { + list[lcnt] = nb[b++]; + } else if ((b >= 2) || (na[a] >= nb[b])) { + list[lcnt] = na[a++]; + } else { + list[lcnt] = nb[b++]; + } + if ((lcnt <= 0) || (list[lcnt] != list[lcnt - 1])) { + lcnt++; /* omit dups */ + } + } + if (list[lcnt - 1] == NULL) { + lcnt--; + } + *lcntp = lcnt; +} + +int +nfs_node_set_busy4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4, thread_t thd) +{ + nfsnode_t list[4]; + int i, lcnt, error; + + nfs_node_sort4(np1, np2, np3, np4, list, &lcnt); /* Now we can lock using list[0 - lcnt-1] */ for (i = 0; i < lcnt; ++i) { - if (list[i]) - if ((error = nfs_lock(list[i], locktype))) { - /* Drop any locks we acquired. */ - while (--i >= 0) { - if (list[i]) - nfs_unlock(list[i]); - } - return (error); + if ((error = nfs_node_set_busy(list[i], thd))) { + /* Drop any locks we acquired. */ + while (--i >= 0) { + nfs_node_clear_busy(list[i]); } + return error; + } } - return (0); + return 0; } -/* - * Unlock a group of NFS nodes - */ void -nfs_unlock4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4) +nfs_node_clear_busy4(nfsnode_t np1, nfsnode_t np2, nfsnode_t np3, nfsnode_t np4) { nfsnode_t list[4]; - int i, k = 0; - - if (np1) { - nfs_unlock(np1); - list[k++] = np1; - } - if (np2) { - for (i = 0; i < k; ++i) - if (list[i] == np2) - goto skip2; - nfs_unlock(np2); - list[k++] = np2; - } -skip2: - if (np3) { - for (i = 0; i < k; ++i) - if (list[i] == np3) - goto skip3; - nfs_unlock(np3); - list[k++] = np3; - } -skip3: - if (np4) { - for (i = 0; i < k; ++i) - if (list[i] == np4) - return; - nfs_unlock(np4); + int lcnt; + + nfs_node_sort4(np1, np2, np3, np4, list, &lcnt); + while (--lcnt >= 0) { + nfs_node_clear_busy(list[lcnt]); } } @@ -739,21 +1333,28 @@ skip3: void nfs_data_lock(nfsnode_t np, int locktype) { - nfs_data_lock2(np, locktype, 1); + nfs_data_lock_internal(np, locktype, 1); } void -nfs_data_lock2(nfsnode_t np, int locktype, int updatesize) +nfs_data_lock_noupdate(nfsnode_t np, int locktype) +{ + nfs_data_lock_internal(np, locktype, 0); +} +void +nfs_data_lock_internal(nfsnode_t np, int locktype, int updatesize) { FSDBG_TOP(270, np, locktype, np->n_datalockowner, 0); - if (locktype == NFS_NODE_LOCK_SHARED) { - if (updatesize && ISSET(np->n_flag, NUPDATESIZE)) + if (locktype == NFS_DATA_LOCK_SHARED) { + if (updatesize && ISSET(np->n_flag, NUPDATESIZE)) { nfs_data_update_size(np, 0); + } lck_rw_lock_shared(&np->n_datalock); } else { lck_rw_lock_exclusive(&np->n_datalock); np->n_datalockowner = current_thread(); - if (updatesize && ISSET(np->n_flag, NUPDATESIZE)) + if (updatesize && ISSET(np->n_flag, NUPDATESIZE)) { nfs_data_update_size(np, 1); + } } FSDBG_BOT(270, np, locktype, np->n_datalockowner, 0); } @@ -764,19 +1365,26 @@ nfs_data_lock2(nfsnode_t np, int locktype, int updatesize) void nfs_data_unlock(nfsnode_t np) { - nfs_data_unlock2(np, 1); + nfs_data_unlock_internal(np, 1); } void -nfs_data_unlock2(nfsnode_t np, int updatesize) +nfs_data_unlock_noupdate(nfsnode_t np) +{ + nfs_data_unlock_internal(np, 0); +} +void +nfs_data_unlock_internal(nfsnode_t np, int updatesize) { int mine = (np->n_datalockowner == current_thread()); FSDBG_TOP(271, np, np->n_datalockowner, current_thread(), 0); - if (updatesize && mine && ISSET(np->n_flag, NUPDATESIZE)) + if (updatesize && mine && ISSET(np->n_flag, NUPDATESIZE)) { nfs_data_update_size(np, 1); + } np->n_datalockowner = NULL; lck_rw_done(&np->n_datalock); - if (updatesize && !mine && ISSET(np->n_flag, NUPDATESIZE)) + if (updatesize && !mine && ISSET(np->n_flag, NUPDATESIZE)) { nfs_data_update_size(np, 0); + } FSDBG_BOT(271, np, np->n_datalockowner, current_thread(), 0); } @@ -791,16 +1399,17 @@ nfs_data_update_size(nfsnode_t np, int datalocked) FSDBG_TOP(272, np, np->n_flag, np->n_size, np->n_newsize); if (!datalocked) { - nfs_data_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE); /* grabbing data lock will automatically update size */ nfs_data_unlock(np); FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize); return; } - error = nfs_lock(np, NFS_NODE_LOCK_EXCLUSIVE); + error = nfs_node_lock(np); if (error || !ISSET(np->n_flag, NUPDATESIZE)) { - if (!error) - nfs_unlock(np); + if (!error) { + nfs_node_unlock(np); + } FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize); return; } @@ -808,8 +1417,45 @@ nfs_data_update_size(nfsnode_t np, int datalocked) np->n_size = np->n_newsize; /* make sure we invalidate buffers the next chance we get */ SET(np->n_flag, NNEEDINVALIDATE); - nfs_unlock(np); + nfs_node_unlock(np); ubc_setsize(NFSTOV(np), (off_t)np->n_size); /* XXX error? */ FSDBG_BOT(272, np, np->n_flag, np->n_size, np->n_newsize); } +#define DODEBUG 1 + +int +nfs_mount_is_dirty(mount_t mp) +{ + u_long i; + nfsnode_t np; +#ifdef DODEBUG + struct timeval now, then, diff; + u_long ncnt = 0; + microuptime(&now); +#endif + lck_mtx_lock(nfs_node_hash_mutex); + for (i = 0; i <= nfsnodehash; i++) { + LIST_FOREACH(np, &nfsnodehashtbl[i], n_hash) { +#ifdef DODEBUG + ncnt++; +#endif + if (np->n_mount == mp && !LIST_EMPTY(&np->n_dirtyblkhd)) { + goto out; + } + } + } +out: + lck_mtx_unlock(nfs_node_hash_mutex); +#ifdef DODEBUG + microuptime(&then); + timersub(&then, &now, &diff); + + NFS_DBG(NFS_FAC_SOCK, 7, "mount_is_dirty for %s took %lld mics for %ld slots and %ld nodes return %d\n", + vfs_statfs(mp)->f_mntfromname, (uint64_t)diff.tv_sec * 1000000LL + diff.tv_usec, i, ncnt, (i <= nfsnodehash)); +#endif + + return i <= nfsnodehash; +} + +#endif /* CONFIG_NFS_CLIENT */