+ u_int64_t *xidp)
+{
+ struct nfsmount *nmp = mp ? VFSTONFS(mp) : NFSTONMP(np);
+ int error = 0, status, nfsvers, rpcflags = 0;
+ struct nfsm_chain nmreq, nmrep;
+
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ if (flags & NGA_MONITOR) { /* vnode monitor requests should be soft */
+ rpcflags = R_RECOVER;
+ }
+
+ if (flags & NGA_SOFT) { /* Return ETIMEDOUT if server not responding */
+ rpcflags |= R_SOFT;
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
+ if (nfsvers != NFS_VER2) {
+ nfsm_chain_add_32(error, &nmreq, fhsize);
+ }
+ nfsm_chain_add_opaque(error, &nmreq, fhp, fhsize);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request2(np, mp, &nmreq, NFSPROC_GETATTR,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx),
+ NULL, rpcflags, &nmrep, xidp, &status);
+ if (!error) {
+ error = status;
+ }
+ nfsmout_if(error);
+ error = nfs_parsefattr(nmp, &nmrep, nfsvers, nvap);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * nfs_refresh_fh will attempt to update the file handle for the node.
+ *
+ * It only does this for symbolic links and regular files that are not currently opened.
+ *
+ * On Success returns 0 and the nodes file handle is updated, or ESTALE on failure.
+ */
+int
+nfs_refresh_fh(nfsnode_t np, vfs_context_t ctx)
+{
+ vnode_t dvp, vp = NFSTOV(np);
+ nfsnode_t dnp;
+ const char *v_name = vnode_getname(vp);
+ char *name;
+ int namelen, fhsize, refreshed;
+ int error, wanted = 0;
+ uint8_t *fhp;
+ struct timespec ts = {.tv_sec = 2, .tv_nsec = 0};
+
+ NFS_VNOP_DBG("vnode is %d\n", vnode_vtype(vp));
+
+ dvp = vnode_parent(vp);
+ if ((vnode_vtype(vp) != VREG && vnode_vtype(vp) != VLNK) ||
+ v_name == NULL || *v_name == '\0' || dvp == NULL) {
+ if (v_name != NULL) {
+ vnode_putname(v_name);
+ }
+ return ESTALE;
+ }
+ dnp = VTONFS(dvp);
+
+ namelen = strlen(v_name);
+ MALLOC(name, char *, namelen + 1, M_TEMP, M_WAITOK);
+ if (name == NULL) {
+ vnode_putname(v_name);
+ return ESTALE;
+ }
+ bcopy(v_name, name, namelen + 1);
+ NFS_VNOP_DBG("Trying to refresh %s : %s\n", v_name, name);
+ vnode_putname(v_name);
+
+ /* Allocate the maximum size file handle */
+ MALLOC(fhp, uint8_t *, NFS4_FHSIZE, M_TEMP, M_WAITOK);
+ if (fhp == NULL) {
+ FREE(name, M_TEMP);
+ return ESTALE;
+ }
+
+ if ((error = nfs_node_lock(np))) {
+ FREE(name, M_TEMP);
+ FREE(fhp, M_TEMP);
+ return ESTALE;
+ }
+
+ fhsize = np->n_fhsize;
+ bcopy(np->n_fhp, fhp, fhsize);
+ while (ISSET(np->n_flag, NREFRESH)) {
+ SET(np->n_flag, NREFRESHWANT);
+ NFS_VNOP_DBG("Waiting for refresh of %s\n", name);
+ msleep(np, &np->n_lock, PZERO - 1, "nfsrefreshwant", &ts);
+ if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
+ break;
+ }
+ }
+ refreshed = error ? 0 : !NFS_CMPFH(np, fhp, fhsize);
+ SET(np->n_flag, NREFRESH);
+ nfs_node_unlock(np);
+
+ NFS_VNOP_DBG("error = %d, refreshed = %d\n", error, refreshed);
+ if (error || refreshed) {
+ goto nfsmout;
+ }
+
+ /* Check that there are no open references for this file */
+ lck_mtx_lock(&np->n_openlock);
+ if (np->n_openrefcnt || !TAILQ_EMPTY(&np->n_opens) || !TAILQ_EMPTY(&np->n_lock_owners)) {
+ int cnt = 0;
+ struct nfs_open_file *ofp;
+
+ TAILQ_FOREACH(ofp, &np->n_opens, nof_link) {
+ cnt += ofp->nof_opencnt;
+ }
+ if (cnt) {
+ lck_mtx_unlock(&np->n_openlock);
+ NFS_VNOP_DBG("Can not refresh file handle for %s with open state\n", name);
+ NFS_VNOP_DBG("\topenrefcnt = %d, opens = %d lock_owners = %d\n",
+ np->n_openrefcnt, cnt, !TAILQ_EMPTY(&np->n_lock_owners));
+ error = ESTALE;
+ goto nfsmout;
+ }
+ }
+ lck_mtx_unlock(&np->n_openlock);
+ /*
+ * Since the FH is currently stale we should not be able to
+ * establish any open state until the FH is refreshed.
+ */
+
+ error = nfs_node_lock(np);
+ nfsmout_if(error);
+ /*
+ * Symlinks should never need invalidations and are holding
+ * the one and only nfsbuf in an uncached acquired state
+ * trying to do a readlink. So we will hang if we invalidate
+ * in that case. Only in in the VREG case do we need to
+ * invalidate.
+ */
+ if (vnode_vtype(vp) == VREG) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ nfs_node_unlock(np);
+ error = nfs_vinvalbuf(vp, V_IGNORE_WRITEERR, ctx, 1);
+ if (error) {
+ NFS_VNOP_DBG("nfs_vinvalbuf returned %d\n", error);
+ }
+ nfsmout_if(error);
+ } else {
+ nfs_node_unlock(np);
+ }
+
+ NFS_VNOP_DBG("Looking up %s\n", name);
+ error = nfs_lookitup(dnp, name, namelen, ctx, &np);
+ if (error) {
+ NFS_VNOP_DBG("nfs_lookitup returned %d\n", error);
+ }
+
+nfsmout:
+ nfs_node_lock_force(np);
+ wanted = ISSET(np->n_flag, NREFRESHWANT);
+ CLR(np->n_flag, NREFRESH | NREFRESHWANT);
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+
+ if (error == 0) {
+ NFS_VNOP_DBG("%s refreshed file handle\n", name);
+ }
+
+ FREE(name, M_TEMP);
+ FREE(fhp, M_TEMP);
+
+ return error ? ESTALE : 0;
+}
+
+int
+nfs_getattr(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
+{
+ int error;
+
+retry:
+ error = nfs_getattr_internal(np, nvap, ctx, flags);
+ if (error == ESTALE) {
+ error = nfs_refresh_fh(np, ctx);
+ if (!error) {
+ goto retry;
+ }
+ }
+ return error;
+}
+
+int
+nfs_getattr_internal(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods;
+ struct nfs_vattr nvattr;
+ struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
+ u_int64_t xid;
+
+ FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
+
+ nmp = NFSTONMP(np);
+
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ if (!nvap) {
+ nvap = &nvattr;
+ }
+ NVATTR_INIT(nvap);
+
+ /* Update local times for special files. */
+ if (np->n_flag & (NACC | NUPD)) {
+ nfs_node_lock_force(np);
+ np->n_flag |= NCHG;
+ nfs_node_unlock(np);
+ }
+ /* Update size, if necessary */
+ if (ISSET(np->n_flag, NUPDATESIZE)) {
+ nfs_data_update_size(np, 0);
+ }
+
+ error = nfs_node_lock(np);
+ nfsmout_if(error);
+ if (!(flags & (NGA_UNCACHED | NGA_MONITOR)) || ((nfsvers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))) {
+ /*
+ * Use the cache or wait for any getattr in progress if:
+ * - it's a cached request, or
+ * - we have a delegation, or
+ * - the server isn't responding
+ */
+ while (1) {
+ error = nfs_getattrcache(np, nvap, flags);
+ if (!error || (error != ENOENT)) {
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ error = 0;
+ if (!ISSET(np->n_flag, NGETATTRINPROG)) {
+ break;
+ }
+ if (flags & NGA_MONITOR) {
+ /* no need to wait if a request is pending */
+ error = EINPROGRESS;
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ SET(np->n_flag, NGETATTRWANT);
+ msleep(np, &np->n_lock, PZERO - 1, "nfsgetattrwant", &ts);
+ if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ }
+ SET(np->n_flag, NGETATTRINPROG);
+ inprogset = 1;
+ } else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
+ SET(np->n_flag, NGETATTRINPROG);
+ inprogset = 1;
+ } else if (flags & NGA_MONITOR) {
+ /* no need to make a request if one is pending */
+ error = EINPROGRESS;
+ }
+ nfs_node_unlock(np);
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ }
+ if (error) {
+ goto nfsmout;
+ }
+
+ /*
+ * Return cached attributes if they are valid,
+ * if the server doesn't respond, and this is
+ * some softened up style of mount.
+ */
+ if (NATTRVALID(np) && nfs_use_cache(nmp)) {
+ flags |= NGA_SOFT;
+ }
+
+ /*
+ * We might want to try to get both the attributes and access info by
+ * making an ACCESS call and seeing if it returns updated attributes.
+ * But don't bother if we aren't caching access info or if the
+ * attributes returned wouldn't be cached.
+ */
+ if (!(flags & NGA_ACL) && (nfsvers != NFS_VER2) && nfs_access_for_getattr && (nfs_access_cache_timeout > 0)) {
+ if (nfs_attrcachetimeout(np) > 0) {
+ /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
+ u_int32_t access = NFS_ACCESS_ALL;
+ int rpcflags = 0;
+
+ /* Return cached attrs if server doesn't respond */
+ if (flags & NGA_SOFT) {
+ rpcflags |= R_SOFT;
+ }
+
+ error = nmp->nm_funcs->nf_access_rpc(np, &access, rpcflags, ctx);
+
+ if (error == ETIMEDOUT) {
+ goto returncached;
+ }
+
+ if (error) {
+ goto nfsmout;
+ }
+ nfs_node_lock_force(np);
+ error = nfs_getattrcache(np, nvap, flags);
+ nfs_node_unlock(np);
+ if (!error || (error != ENOENT)) {
+ goto nfsmout;
+ }
+ /* Well, that didn't work... just do a getattr... */
+ error = 0;
+ }
+ }
+
+ avoidfloods = 0;
+
+tryagain:
+ error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid);
+ if (!error) {
+ nfs_node_lock_force(np);
+ error = nfs_loadattrcache(np, nvap, &xid, 0);
+ nfs_node_unlock(np);
+ }
+
+ /*
+ * If the server didn't respond, return cached attributes.
+ */
+returncached:
+ if ((flags & NGA_SOFT) && (error == ETIMEDOUT)) {
+ nfs_node_lock_force(np);
+ error = nfs_getattrcache(np, nvap, flags);
+ if (!error || (error != ENOENT)) {
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ nfs_node_unlock(np);
+ }
+ nfsmout_if(error);
+
+ if (!xid) { /* out-of-order rpc - attributes were dropped */
+ FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
+ if (avoidfloods++ < 20) {
+ goto tryagain;
+ }
+ /* avoidfloods>1 is bizarre. at 20 pull the plug */
+ /* just return the last attributes we got */
+ }
+nfsmout:
+ nfs_node_lock_force(np);
+ if (inprogset) {
+ wanted = ISSET(np->n_flag, NGETATTRWANT);
+ CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT));
+ }
+ if (!error) {
+ /* check if the node changed on us */
+ vnode_t vp = NFSTOV(np);
+ enum vtype vtype = vnode_vtype(vp);
+ if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) {
+ FSDBG(513, -1, np, 0, np);
+ np->n_flag &= ~NNEGNCENTRIES;
+ cache_purge(vp);
+ np->n_ncgen++;
+ NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
+ NFS_VNOP_DBG("Purge directory 0x%llx\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(vp));
+ }
+ if (NFS_CHANGED(nfsvers, np, nvap)) {
+ FSDBG(513, -1, np, -1, np);
+ if (vtype == VDIR) {
+ NFS_VNOP_DBG("Invalidate directory 0x%llx\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(vp));
+ nfs_invaldir(np);
+ }
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ FSDBG(513, -1, np, -2, error);
+ if (!error) {
+ nfs_node_lock_force(np);
+ NFS_CHANGED_UPDATE(nfsvers, np, nvap);
+ nfs_node_unlock(np);
+ }
+ } else {
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+ }
+ } else {
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+ }
+
+ if (nvap == &nvattr) {
+ NVATTR_CLEANUP(nvap);
+ } else if (!(flags & NGA_ACL)) {
+ /* make sure we don't return an ACL if it wasn't asked for */
+ NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
+ if (nvap->nva_acl) {
+ kauth_acl_free(nvap->nva_acl);
+ nvap->nva_acl = NULL;
+ }
+ }
+ FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
+ return error;
+}
+
+
+/*
+ * NFS getattr call from vfs.
+ */
+
+/*
+ * The attributes we support over the wire.
+ * We also get fsid but the vfs layer gets it out of the mount
+ * structure after this calling us so there's no need to return it,
+ * and Finder expects to call getattrlist just looking for the FSID
+ * with out hanging on a non responsive server.
+ */
+#define NFS3_SUPPORTED_VATTRS \
+ (VNODE_ATTR_va_rdev | \
+ VNODE_ATTR_va_nlink | \
+ VNODE_ATTR_va_data_size | \
+ VNODE_ATTR_va_data_alloc | \
+ VNODE_ATTR_va_uid | \
+ VNODE_ATTR_va_gid | \
+ VNODE_ATTR_va_mode | \
+ VNODE_ATTR_va_modify_time | \
+ VNODE_ATTR_va_change_time | \
+ VNODE_ATTR_va_access_time | \
+ VNODE_ATTR_va_fileid | \
+ VNODE_ATTR_va_type)
+
+
+int
+nfs3_vnop_getattr(
+ struct vnop_getattr_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ int error;
+ nfsnode_t np;
+ uint64_t supported_attrs;
+ struct nfs_vattr nva;
+ struct vnode_attr *vap = ap->a_vap;
+ struct nfsmount *nmp;
+ dev_t rdev;
+
+ nmp = VTONMP(ap->a_vp);
+
+ /*
+ * Lets don't go over the wire if we don't support any of the attributes.
+ * Just fall through at the VFS layer and let it cons up what it needs.
+ */
+ /* Return the io size no matter what, since we don't go over the wire for this */
+ VATTR_RETURN(vap, va_iosize, nfs_iosize);
+
+ supported_attrs = NFS3_SUPPORTED_VATTRS;
+
+ if ((vap->va_active & supported_attrs) == 0) {
+ return 0;
+ }
+
+ if (VATTR_IS_ACTIVE(ap->a_vap, va_name)) {
+ NFS_VNOP_DBG("Getting attrs for 0x%llx, vname is %s\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
+ ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
+ }
+
+ /*
+ * We should not go over the wire if only fileid was requested and has ever been populated.
+ */
+ if ((vap->va_active & supported_attrs) == VNODE_ATTR_va_fileid) {
+ np = VTONFS(ap->a_vp);
+ if (np->n_attrstamp) {
+ VATTR_RETURN(vap, va_fileid, np->n_vattr.nva_fileid);
+ return 0;
+ }
+ }
+
+ error = nfs_getattr(VTONFS(ap->a_vp), &nva, ap->a_context, NGA_CACHED);
+ if (error) {
+ return error;
+ }
+
+ /* copy nva to *a_vap */
+ VATTR_RETURN(vap, va_type, nva.nva_type);
+ VATTR_RETURN(vap, va_mode, nva.nva_mode);
+ rdev = makedev(nva.nva_rawdev.specdata1, nva.nva_rawdev.specdata2);
+ VATTR_RETURN(vap, va_rdev, rdev);
+ VATTR_RETURN(vap, va_uid, nva.nva_uid);
+ VATTR_RETURN(vap, va_gid, nva.nva_gid);
+ VATTR_RETURN(vap, va_nlink, nva.nva_nlink);
+ VATTR_RETURN(vap, va_fileid, nva.nva_fileid);
+ VATTR_RETURN(vap, va_data_size, nva.nva_size);
+ VATTR_RETURN(vap, va_data_alloc, nva.nva_bytes);
+ vap->va_access_time.tv_sec = nva.nva_timesec[NFSTIME_ACCESS];
+ vap->va_access_time.tv_nsec = nva.nva_timensec[NFSTIME_ACCESS];
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ vap->va_modify_time.tv_sec = nva.nva_timesec[NFSTIME_MODIFY];
+ vap->va_modify_time.tv_nsec = nva.nva_timensec[NFSTIME_MODIFY];
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ vap->va_change_time.tv_sec = nva.nva_timesec[NFSTIME_CHANGE];
+ vap->va_change_time.tv_nsec = nva.nva_timensec[NFSTIME_CHANGE];
+ VATTR_SET_SUPPORTED(vap, va_change_time);
+
+
+ // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
+ return error;
+}
+
+/*
+ * NFS setattr call.
+ */
+int
+nfs_vnop_setattr(
+ struct vnop_setattr_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t vp = ap->a_vp;
+ nfsnode_t np = VTONFS(vp);
+ struct nfsmount *nmp;
+ struct vnode_attr *vap = ap->a_vap;
+ int error = 0;
+ int biosize, nfsvers, namedattrs;
+ u_quad_t origsize, vapsize;
+ struct nfs_dulookup dul;
+ nfsnode_t dnp = NULL;
+ int dul_in_progress = 0;
+ vnode_t dvp = NULL;
+ const char *vname = NULL;
+#if CONFIG_NFS4
+ struct nfs_open_owner *noop = NULL;
+ struct nfs_open_file *nofp = NULL;
+#endif
+ nmp = VTONMP(vp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+ biosize = nmp->nm_biosize;
+
+ /* Disallow write attempts if the filesystem is mounted read-only. */
+ if (vnode_vfsisrdonly(vp)) {
+ return EROFS;
+ }
+
+ origsize = np->n_size;
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ switch (vnode_vtype(vp)) {
+ case VDIR:
+ return EISDIR;
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
+ !VATTR_IS_ACTIVE(vap, va_access_time) &&
+ !VATTR_IS_ACTIVE(vap, va_mode) &&
+ !VATTR_IS_ACTIVE(vap, va_uid) &&
+ !VATTR_IS_ACTIVE(vap, va_gid)) {
+ return 0;
+ }
+ VATTR_CLEAR_ACTIVE(vap, va_data_size);
+ break;
+ default:
+ /*
+ * Disallow write attempts if the filesystem is
+ * mounted read-only.
+ */
+ if (vnode_vfsisrdonly(vp)) {
+ return EROFS;
+ }
+ FSDBG_TOP(512, np->n_size, vap->va_data_size,
+ np->n_vattr.nva_size, np->n_flag);
+ /* clear NNEEDINVALIDATE, if set */
+ if ((error = nfs_node_lock(np))) {
+ return error;
+ }
+ if (np->n_flag & NNEEDINVALIDATE) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ }
+ nfs_node_unlock(np);
+ /* flush everything */
+ error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0), ctx, 1);
+ if (error) {
+ NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
+ FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
+ return error;
+ }
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ /* setting file size requires having the file open for write access */
+ if (np->n_flag & NREVOKE) {
+ return EIO;
+ }
+ noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+ if (!noop) {
+ return ENOMEM;
+ }
+restart:
+ error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+ if (error) {
+ return error;
+ }
+ if (np->n_flag & NREVOKE) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ return EIO;
+ }
+ error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+ if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+ error = EIO;
+ }
+ if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ error = nfs4_reopen(nofp, vfs_context_thread(ctx));
+ nofp = NULL;
+ if (!error) {
+ goto restart;
+ }
+ }
+ if (!error) {
+ error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+ }
+ if (error) {
+ nfs_open_owner_rele(noop);
+ return error;
+ }
+ if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
+ /* we don't have the file open for write access, so open it */
+ error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+ if (!error) {
+ nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
+ }
+ if (nfs_mount_state_error_should_restart(error)) {
+ nfs_open_file_clear_busy(nofp);
+ nofp = NULL;
+ if (nfs_mount_state_in_use_end(nmp, error)) {
+ goto restart;
+ }
+ }
+ }
+ }
+#endif
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (np->n_size > vap->va_data_size) { /* shrinking? */
+ daddr64_t obn, bn;
+ int neweofoff, mustwrite;
+ struct nfsbuf *bp;
+
+ obn = (np->n_size - 1) / biosize;
+ bn = vap->va_data_size / biosize;
+ for (; obn >= bn; obn--) {
+ if (!nfs_buf_is_incore(np, obn)) {
+ continue;
+ }
+ error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
+ if (error) {
+ continue;
+ }
+ if (obn != bn) {
+ FSDBG(512, bp, bp->nb_flags, 0, obn);
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 1);
+ continue;
+ }
+ mustwrite = 0;
+ neweofoff = vap->va_data_size - NBOFF(bp);
+ /* check for any dirty data before the new EOF */
+ if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
+ /* clip dirty range to EOF */
+ if (bp->nb_dirtyend > neweofoff) {
+ bp->nb_dirtyend = neweofoff;
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
+ mustwrite++;
+ }
+ }
+ bp->nb_dirty &= (1 << round_page_32(neweofoff) / PAGE_SIZE) - 1;
+ if (bp->nb_dirty) {
+ mustwrite++;
+ }
+ if (!mustwrite) {
+ FSDBG(512, bp, bp->nb_flags, 0, obn);
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 1);
+ continue;
+ }
+ /* gotta write out dirty data before invalidating */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ /* (NB_NOCACHE indicates buffer should be discarded) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
+ SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_t cred = vfs_context_ucred(ctx);
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+ error = nfs_buf_write(bp);
+ // Note: bp has been released
+ if (error) {
+ FSDBG(512, bp, 0xd00dee, 0xbad, error);
+ nfs_node_lock_force(np);
+ np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ /*
+ * There was a write error and we need to
+ * invalidate attrs and flush buffers in
+ * order to sync up with the server.
+ * (if this write was extending the file,
+ * we may no longer know the correct size)
+ */
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ error = 0;
+ }
+ }
+ }
+ if (vap->va_data_size != np->n_size) {
+ ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
+ }
+ origsize = np->n_size;
+ np->n_size = np->n_vattr.nva_size = vap->va_data_size;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ nfs_node_unlock(np);
+ FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ }
+ } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
+ VATTR_IS_ACTIVE(vap, va_access_time) ||
+ (vap->va_vaflags & VA_UTIMES_NULL)) {
+ if ((error = nfs_node_lock(np))) {
+ return error;
+ }
+ if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
+ nfs_node_unlock(np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ if (error == EINTR) {
+ return error;
+ }
+ } else {
+ nfs_node_unlock(np);
+ }
+ }
+ if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
+ VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
+ !(error = nfs_node_lock(np))) {
+ NACCESSINVALIDATE(np);
+ nfs_node_unlock(np);
+ if (!namedattrs) {
+ dvp = vnode_getparent(vp);
+ vname = vnode_getname(vp);
+ dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
+ if (dnp) {
+ if (nfs_node_set_busy(dnp, vfs_context_thread(ctx))) {
+ vnode_put(dvp);
+ vnode_putname(vname);
+ } else {
+ nfs_dulookup_init(&dul, dnp, vname, strlen(vname), ctx);
+ nfs_dulookup_start(&dul, dnp, ctx);
+ dul_in_progress = 1;
+ }
+ } else {
+ if (dvp) {
+ vnode_put(dvp);
+ }
+ if (vname) {
+ vnode_putname(vname);
+ }
+ }
+ }
+ }
+
+ if (!error) {
+ error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
+ }
+
+ if (dul_in_progress) {
+ nfs_dulookup_finish(&dul, dnp, ctx);
+ nfs_node_clear_busy(dnp);
+ vnode_put(dvp);
+ vnode_putname(vname);
+ }
+
+ FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ if (error && (origsize != np->n_size) &&
+ ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
+ /* make every effort to resync file size w/ server... */
+ /* (don't bother if we'll be restarting the operation) */
+ int err; /* preserve "error" for return */
+ np->n_size = np->n_vattr.nva_size = origsize;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ nfs_node_unlock(np);
+ FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+ vapsize = vap->va_data_size;
+ vap->va_data_size = origsize;
+ err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
+ if (err) {
+ NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
+ }
+ vap->va_data_size = vapsize;
+ }
+ nfs_node_lock_force(np);
+ /*
+ * The size was just set. If the size is already marked for update, don't
+ * trust the newsize (it may have been set while the setattr was in progress).
+ * Clear the update flag and make sure we fetch new attributes so we are sure
+ * we have the latest size.
+ */
+ if (ISSET(np->n_flag, NUPDATESIZE)) {
+ CLR(np->n_flag, NUPDATESIZE);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
+ } else {
+ nfs_node_unlock(np);
+ }
+ nfs_data_unlock(np);
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ if (nofp) {
+ /* don't close our setattr open if we'll be restarting... */
+ if (!nfs_mount_state_error_should_restart(error) &&
+ (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
+ int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+ if (err) {
+ NP(np, "nfs_vnop_setattr: close error: %d", err);
+ }
+ nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
+ }
+ nfs_open_file_clear_busy(nofp);
+ nofp = NULL;
+ }
+ if (nfs_mount_state_in_use_end(nmp, error)) {
+ goto restart;
+ }
+ nfs_open_owner_rele(noop);
+ }
+#endif
+ }
+ return error;
+}
+
+/*
+ * Do an NFS setattr RPC.
+ */
+int
+nfs3_setattr_rpc(
+ nfsnode_t np,
+ struct vnode_attr *vap,
+ vfs_context_t ctx)
+{
+ struct nfsmount *nmp = NFSTONMP(np);
+ int error = 0, lockerror = ENOENT, status, wccpostattr = 0, nfsvers;
+ u_int64_t xid, nextxid;
+ struct nfsm_chain nmreq, nmrep;
+
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+
+
+ if (VATTR_IS_ACTIVE(vap, va_flags)
+ ) {
+ if (vap->va_flags) { /* we don't support setting flags */
+ if (vap->va_active & ~VNODE_ATTR_va_flags) {
+ return EINVAL; /* return EINVAL if other attributes also set */
+ } else {
+ return ENOTSUP; /* return ENOTSUP for chflags(2) */
+ }
+ }
+ /* no flags set, so we'll just ignore it */
+ if (!(vap->va_active & ~VNODE_ATTR_va_flags)) {
+ return 0; /* no (other) attributes to set, so nothing to do */
+ }
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ if (VATTR_IS_ACTIVE(vap, va_mode)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_mode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_uid)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_uid);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_gid)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_gid);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (vap->va_vaflags & VA_UTIMES_NULL) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
+ } else {
+ if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
+ }
+ }
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
+ vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
+ vap->va_uid : (uint32_t)-1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
+ vap->va_gid : (uint32_t)-1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
+ vap->va_data_size : (uint32_t)-1);
+ if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
+ ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, -1);
+ nfsm_chain_add_32(error, &nmreq, -1);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
+ ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, -1);
+ nfsm_chain_add_32(error, &nmreq, -1);
+ }
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ nfsmout_if(error);
+ /* if file hadn't changed, update cached mtime */
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+ }
+ /* if directory hadn't changed, update namecache mtime */
+ if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
+ nfstimespeccmp(&np->n_ncmtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
+ }
+ if (!wccpostattr) {
+ NATTRINVALIDATE(np);
+ }
+ error = status;
+ } else {
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ }
+ /*
+ * We just changed the attributes and we want to make sure that we
+ * see the latest attributes. Get the next XID. If it's not the
+ * next XID after the SETATTR XID, then it's possible that another
+ * RPC was in flight at the same time and it might put stale attributes
+ * in the cache. In that case, we invalidate the attributes and set
+ * the attribute cache XID to guarantee that newer attributes will
+ * get loaded next.
+ */
+ nextxid = 0;
+ nfs_get_xid(&nextxid);
+ if (nextxid != (xid + 1)) {
+ np->n_xid = nextxid;
+ NATTRINVALIDATE(np);
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the RPC
+ */
+int
+nfs_vnop_lookup(
+ struct vnop_lookup_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_dvp;
+ * vnode_t *a_vpp;
+ * struct componentname *a_cnp;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ struct componentname *cnp = ap->a_cnp;
+ vnode_t dvp = ap->a_dvp;
+ vnode_t *vpp = ap->a_vpp;
+ int flags = cnp->cn_flags;
+ vnode_t newvp;
+ nfsnode_t dnp, np;
+ struct nfsmount *nmp;
+ mount_t mp;
+ int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
+ u_int64_t xid;
+ struct nfs_vattr nvattr;
+ int ngflags;
+ struct vnop_access_args naa;
+ fhandle_t fh;
+ struct nfsreq rq, *req = &rq;
+
+ *vpp = NULLVP;
+
+ dnp = VTONFS(dvp);
+ NVATTR_INIT(&nvattr);
+
+ mp = vnode_mount(dvp);
+ nmp = VFSTONFS(mp);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ goto error_return;
+ }
+ nfsvers = nmp->nm_vers;
+ negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
+
+ if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)))) {
+ goto error_return;
+ }
+ /* nfs_getattr() will check changed and purge caches */
+ if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED))) {
+ goto error_return;
+ }
+
+ error = cache_lookup(dvp, vpp, cnp);
+ switch (error) {
+ case ENOENT:
+ /* negative cache entry */
+ goto error_return;
+ case 0:
+ /* cache miss */
+ if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
+ /* if rdirplus, try dir buf cache lookup */
+ error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0);
+ if (!error && np) {
+ /* dir buf cache hit */
+ *vpp = NFSTOV(np);
+ error = -1;
+ }
+ }
+ if (error != -1) { /* cache miss */
+ break;
+ }
+ /* FALLTHROUGH */
+ case -1:
+ /* cache hit, not really an error */
+ OSAddAtomic64(1, &nfsstats.lookupcache_hits);
+
+ nfs_node_clear_busy(dnp);
+ busyerror = ENOENT;
+
+ /* check for directory access */
+ naa.a_desc = &vnop_access_desc;
+ naa.a_vp = dvp;
+ naa.a_action = KAUTH_VNODE_SEARCH;
+ naa.a_context = ctx;
+
+ /* compute actual success/failure based on accessibility */
+ error = nfs_vnop_access(&naa);
+ /* FALLTHROUGH */
+ default:
+ /* unexpected error from cache_lookup */
+ goto error_return;
+ }
+
+ /* skip lookup, if we know who we are: "." or ".." */
+ isdot = isdotdot = 0;
+ if (cnp->cn_nameptr[0] == '.') {
+ if (cnp->cn_namelen == 1) {
+ isdot = 1;
+ }
+ if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.')) {
+ isdotdot = 1;
+ }
+ }
+ if (isdotdot || isdot) {
+ fh.fh_len = 0;
+ goto found;
+ }
+#if CONFIG_NFS4
+ if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
+ /* we should never be looking things up in a trigger directory, return nothing */
+ error = ENOENT;
+ goto error_return;
+ }
+#endif
+
+ /* do we know this name is too long? */
+ nmp = VTONMP(dvp);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ goto error_return;
+ }
+ if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
+ (cnp->cn_namelen > (int)nmp->nm_fsattr.nfsa_maxname)) {
+ error = ENAMETOOLONG;
+ goto error_return;
+ }
+
+ error = 0;
+ newvp = NULLVP;
+
+ OSAddAtomic64(1, &nfsstats.lookupcache_misses);
+
+ error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
+ nfsmout_if(error);
+ error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, &fh, &nvattr);
+ nfsmout_if(error);
+
+ /* is the file handle the same as this directory's file handle? */
+ isdot = NFS_CMPFH(dnp, fh.fh_data, fh.fh_len);
+
+found:
+ if (flags & ISLASTCN) {
+ switch (cnp->cn_nameiop) {
+ case DELETE:
+ cnp->cn_flags &= ~MAKEENTRY;
+ break;
+ case RENAME:
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (isdot) {
+ error = EISDIR;
+ goto error_return;
+ }
+ break;
+ }
+ }
+
+ if (isdotdot) {
+ newvp = vnode_getparent(dvp);
+ if (!newvp) {
+ error = ENOENT;
+ goto error_return;
+ }
+ } else if (isdot) {
+ error = vnode_get(dvp);
+ if (error) {
+ goto error_return;
+ }
+ newvp = dvp;
+ nfs_node_lock_force(dnp);
+ if (fh.fh_len && (dnp->n_xid <= xid)) {
+ nfs_loadattrcache(dnp, &nvattr, &xid, 0);
+ }
+ nfs_node_unlock(dnp);
+ } else {
+ ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
+ error = nfs_nget(mp, dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, ngflags, &np);
+ if (error) {
+ goto error_return;
+ }
+ newvp = NFSTOV(np);
+ nfs_node_unlock(np);
+ }
+ *vpp = newvp;
+
+nfsmout:
+ if (error) {
+ if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
+ (flags & ISLASTCN) && (error == ENOENT)) {
+ if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp)) {
+ error = EROFS;
+ } else {
+ error = EJUSTRETURN;
+ }
+ }
+ }
+ if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != CREATE) && negnamecache) {
+ /* add a negative entry in the name cache */
+ nfs_node_lock_force(dnp);
+ cache_enter(dvp, NULL, cnp);
+ dnp->n_flag |= NNEGNCENTRIES;
+ nfs_node_unlock(dnp);
+ }
+error_return:
+ NVATTR_CLEANUP(&nvattr);
+ if (!busyerror) {
+ nfs_node_clear_busy(dnp);
+ }
+ if (error && *vpp) {
+ vnode_put(*vpp);
+ *vpp = NULLVP;
+ }
+ return error;
+}
+
+int nfs_readlink_nocache = DEFAULT_READLINK_NOCACHE;
+
+/*
+ * NFS readlink call
+ */
+int
+nfs_vnop_readlink(
+ struct vnop_readlink_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct uio *a_uio;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ nfsnode_t np = VTONFS(ap->a_vp);
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ uint32_t buflen;
+ uio_t uio = ap->a_uio;
+ struct nfsbuf *bp = NULL;
+ struct timespec ts;
+ int timeo;
+
+ if (vnode_vtype(ap->a_vp) != VLNK) {
+ return EPERM;
+ }
+
+ if (uio_resid(uio) == 0) {
+ return 0;
+ }
+ if (uio_offset(uio) < 0) {
+ return EINVAL;
+ }
+
+ nmp = VTONMP(ap->a_vp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+
+ /* nfs_getattr() will check changed and purge caches */
+ if ((error = nfs_getattr(np, NULL, ctx, nfs_readlink_nocache ? NGA_UNCACHED : NGA_CACHED))) {
+ FSDBG(531, np, 0xd1e0001, 0, error);
+ return error;
+ }
+
+ if (nfs_readlink_nocache) {
+ timeo = nfs_attrcachetimeout(np);
+ nanouptime(&ts);
+ }
+
+retry:
+ OSAddAtomic64(1, &nfsstats.biocache_readlinks);
+ error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_META, &bp);
+ if (error) {
+ FSDBG(531, np, 0xd1e0002, 0, error);
+ return error;
+ }
+
+ if (nfs_readlink_nocache) {
+ NFS_VNOP_DBG("timeo = %d ts.tv_sec = %ld need refresh = %d cached = %d\n", timeo, ts.tv_sec,
+ (np->n_rltim.tv_sec + timeo) < ts.tv_sec || nfs_readlink_nocache > 1,
+ ISSET(bp->nb_flags, NB_CACHE) == NB_CACHE);
+ /* n_rltim is synchronized by the associated nfs buf */
+ if (ISSET(bp->nb_flags, NB_CACHE) && ((nfs_readlink_nocache > 1) || ((np->n_rltim.tv_sec + timeo) < ts.tv_sec))) {
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 0);
+ goto retry;
+ }
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE)) {
+readagain:
+ OSAddAtomic64(1, &nfsstats.readlink_bios);
+ buflen = bp->nb_bufsize;
+ error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
+ if (error) {
+ if (error == ESTALE) {
+ NFS_VNOP_DBG("Stale FH from readlink rpc\n");
+ error = nfs_refresh_fh(np, ctx);
+ if (error == 0) {
+ goto readagain;
+ }
+ }
+ SET(bp->nb_flags, NB_ERROR);
+ bp->nb_error = error;
+ NFS_VNOP_DBG("readlink failed %d\n", error);
+ } else {
+ bp->nb_validoff = 0;
+ bp->nb_validend = buflen;
+ np->n_rltim = ts;
+ NFS_VNOP_DBG("readlink of %.*s\n", bp->nb_validend, (char *)bp->nb_data);
+ }
+ } else {
+ NFS_VNOP_DBG("got cached link of %.*s\n", bp->nb_validend, (char *)bp->nb_data);
+ }
+
+ if (!error && (bp->nb_validend > 0)) {
+ error = uiomove(bp->nb_data, bp->nb_validend, uio);
+ }
+ FSDBG(531, np, bp->nb_validend, 0, error);
+ nfs_buf_release(bp, 1);
+ return error;
+}
+
+/*
+ * Do a readlink RPC.
+ */
+int
+nfs3_readlink_rpc(nfsnode_t np, char *buf, uint32_t *buflenp, vfs_context_t ctx)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ uint32_t len;
+ u_int64_t xid;
+ struct nfsm_chain nmreq, nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
+ }
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_get_32(error, &nmrep, len);
+ nfsmout_if(error);
+ if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
+ error = EBADRPC;
+ goto nfsmout;
+ }
+ if (len >= *buflenp) {
+ if (np->n_size && (np->n_size < *buflenp)) {
+ len = np->n_size;
+ } else {
+ len = *buflenp - 1;
+ }
+ }
+ nfsm_chain_get_opaque(error, &nmrep, len, buf);
+ if (!error) {
+ *buflenp = len;
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS read RPC call
+ * Ditto above
+ */
+int
+nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers, eof = 0;
+ size_t nmrsize, len, retlen;
+ user_ssize_t tsiz;
+ off_t txoffset;
+ struct nfsreq rq, *req = &rq;
+#if CONFIG_NFS4
+ uint32_t stategenid = 0, restart = 0;
+#endif
+ FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nmrsize = nmp->nm_rsize;
+
+ txoffset = uio_offset(uio);
+ tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return EFBIG;
+ }
+
+ while (tsiz > 0) {
+ len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
+ FSDBG(536, np, txoffset, len, 0);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ break;
+ }
+#if CONFIG_NFS4
+ if (nmp->nm_vers >= NFS_VER4) {
+ stategenid = nmp->nm_stategenid;
+ }
+#endif
+ error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+ if (!error) {
+ error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
+ }
+#if CONFIG_NFS4
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+ NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
+ nfs_need_recover(nmp, error);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ } else {
+ if (error == NFSERR_GRACE) {
+ tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+ }
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+ continue;
+ }
+ }
+ }
+#endif
+ if (error) {
+ break;
+ }
+ txoffset += retlen;
+ tsiz -= retlen;
+ if (nfsvers != NFS_VER2) {
+ if (eof || (retlen == 0)) {
+ tsiz = 0;
+ }
+ } else if (retlen < len) {
+ tsiz = 0;
+ }
+ }
+
+ FSDBG_BOT(536, np, eof, uio_resid(uio), error);
+ return error;
+}
+
+int
+nfs3_read_rpc_async(
+ nfsnode_t np,
+ off_t offset,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, offset);
+ nfsm_chain_add_32(error, &nmreq, len);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, offset);
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return error;
+}
+
+int
+nfs3_read_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ uio_t uio,
+ size_t *lenp,
+ int *eofp)
+{
+ int error = 0, lockerror, nfsvers, status, eof = 0;
+ size_t retlen = 0;
+ uint64_t xid;
+ struct nfsmount *nmp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ nfs_request_async_cancel(req);
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) { /* async request restarted */
+ return error;
+ }
+
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
+ }
+ if (!error) {
+ error = status;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
+ nfsm_chain_get_32(error, &nmrep, eof);
+ } else {
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ }
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_get_32(error, &nmrep, retlen);
+ if ((nfsvers == NFS_VER2) && (retlen > *lenp)) {
+ error = EBADRPC;
+ }
+ nfsmout_if(error);
+ error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
+ if (eofp) {
+ if (nfsvers == NFS_VER3) {
+ if (!eof && !retlen) {
+ eof = 1;
+ }
+ } else if (retlen < *lenp) {
+ eof = 1;
+ }
+ *eofp = eof;
+ }
+ *lenp = MIN(retlen, *lenp);
+nfsmout:
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS write call
+ */
+int
+nfs_vnop_write(
+ struct vnop_write_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct uio *a_uio;
+ * int a_ioflag;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ uio_t uio = ap->a_uio;
+ vnode_t vp = ap->a_vp;
+ nfsnode_t np = VTONFS(vp);
+ int ioflag = ap->a_ioflag;
+ struct nfsbuf *bp;
+ struct nfsmount *nmp = VTONMP(vp);
+ daddr64_t lbn;
+ int biosize;
+ int n, on, error = 0;
+ off_t boff, start, end;
+ uio_t auio;
+ char auio_buf[UIO_SIZEOF(1)];
+ thread_t thd;
+ kauth_cred_t cred;
+
+ FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
+
+ if (vnode_vtype(vp) != VREG) {
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
+ return EIO;
+ }
+
+ thd = vfs_context_thread(ctx);
+ cred = vfs_context_ucred(ctx);
+
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+
+ if ((error = nfs_node_lock(np))) {
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return error;
+ }
+ np->n_wrbusy++;
+
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+ if (np->n_flag & NNEEDINVALIDATE) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+ } else {
+ nfs_node_unlock(np);
+ }
+ if (error) {
+ goto out;
+ }
+
+ biosize = nmp->nm_biosize;
+
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ nfs_node_lock_force(np);
+ if (np->n_flag & NMODIFIED) {
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
+ goto out;
+ }
+ } else {
+ nfs_node_unlock(np);
+ }
+ if (ioflag & IO_APPEND) {
+ nfs_data_unlock(np);
+ /* nfs_getattr() will check changed and purge caches */
+ error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
+ /* we'll be extending the file, so take the data lock exclusive */
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
+ goto out;
+ }
+ uio_setoffset(uio, np->n_size);
+ }
+ }
+ if (uio_offset(uio) < 0) {
+ error = EINVAL;
+ FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
+ goto out;
+ }
+ if (uio_resid(uio) == 0) {
+ goto out;
+ }
+
+ if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
+ /*
+ * It looks like we'll be extending the file, so take the data lock exclusive.
+ */
+ nfs_data_unlock(np);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+
+ /*
+ * Also, if the write begins after the previous EOF buffer, make sure to zero
+ * and validate the new bytes in that buffer.
+ */
+ struct nfsbuf *eofbp = NULL;
+ daddr64_t eofbn = np->n_size / biosize;
+ int eofoff = np->n_size % biosize;
+ lbn = uio_offset(uio) / biosize;
+
+ if (eofoff && (eofbn < lbn)) {
+ if ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE | NBLK_ONLYVALID, &eofbp))) {
+ goto out;
+ }
+ np->n_size += (biosize - eofoff);
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ if (eofbp) {
+ /*
+ * For the old last page, don't zero bytes if there
+ * are invalid bytes in that page (i.e. the page isn't
+ * currently valid).
+ * For pages after the old last page, zero them and
+ * mark them as valid.
+ */
+ char *d;
+ int i;
+ if (ioflag & IO_NOCACHE) {
+ SET(eofbp->nb_flags, NB_NOCACHE);
+ }
+ NFS_BUF_MAP(eofbp);
+ FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
+ d = eofbp->nb_data;
+ i = eofoff / PAGE_SIZE;
+ while (eofoff < biosize) {
+ int poff = eofoff & PAGE_MASK;
+ if (!poff || NBPGVALID(eofbp, i)) {
+ bzero(d + eofoff, PAGE_SIZE - poff);
+ NBPGVALID_SET(eofbp, i);
+ }
+ eofoff += PAGE_SIZE - poff;
+ i++;
+ }
+ nfs_buf_release(eofbp, 1);
+ }
+ }
+ }
+
+ do {
+ OSAddAtomic64(1, &nfsstats.biocache_writes);
+ lbn = uio_offset(uio) / biosize;
+ on = uio_offset(uio) % biosize;
+ n = biosize - on;
+ if (uio_resid(uio) < n) {
+ n = uio_resid(uio);
+ }
+again:
+ /*
+ * Get a cache block for writing. The range to be written is
+ * (off..off+n) within the block. We ensure that the block
+ * either has no dirty region or that the given range is
+ * contiguous with the existing dirty region.
+ */
+ error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
+ if (error) {
+ goto out;
+ }
+ /* map the block because we know we're going to write to it */
+ NFS_BUF_MAP(bp);
+
+ if (ioflag & IO_NOCACHE) {
+ SET(bp->nb_flags, NB_NOCACHE);
+ }
+
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+
+ /*
+ * If there's already a dirty range AND dirty pages in this block we
+ * need to send a commit AND write the dirty pages before continuing.
+ *
+ * If there's already a dirty range OR dirty pages in this block
+ * and the new write range is not contiguous with the existing range,
+ * then force the buffer to be written out now.
+ * (We used to just extend the dirty range to cover the valid,
+ * but unwritten, data in between also. But writing ranges
+ * of data that weren't actually written by an application
+ * risks overwriting some other client's data with stale data
+ * that's just masquerading as new written data.)
+ */
+ if (bp->nb_dirtyend > 0) {
+ if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ goto again;
+ }
+ } else if (bp->nb_dirty) {
+ int firstpg, lastpg;
+ u_int32_t pagemask;
+ /* calculate write range pagemask */
+ firstpg = on / PAGE_SIZE;
+ lastpg = (on + n - 1) / PAGE_SIZE;
+ pagemask = ((1 << (lastpg + 1)) - 1) & ~((1 << firstpg) - 1);
+ /* check if there are dirty pages outside the write range */
+ if (bp->nb_dirty & ~pagemask) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ goto again;
+ }
+ /* if the first or last pages are already dirty */
+ /* make sure that the dirty range encompasses those pages */
+ if (NBPGDIRTY(bp, firstpg) || NBPGDIRTY(bp, lastpg)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
+ bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
+ if (NBPGDIRTY(bp, lastpg)) {
+ bp->nb_dirtyend = (lastpg + 1) * PAGE_SIZE;
+ /* clip to EOF */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ } else {
+ bp->nb_dirtyend = on + n;
+ }
+ }
+ }
+
+ /*
+ * Are we extending the size of the file with this write?
+ * If so, update file size now that we have the block.
+ * If there was a partial buf at the old eof, validate
+ * and zero the new bytes.
+ */
+ if ((uio_offset(uio) + n) > (off_t)np->n_size) {
+ daddr64_t eofbn = np->n_size / biosize;
+ int neweofoff = (uio_offset(uio) + n) % biosize;
+
+ FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
+
+ /* if we're extending within the same last block */
+ /* and the block is flagged as being cached... */
+ if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
+ /* ...check that all pages in buffer are valid */
+ int endpg = ((neweofoff ? neweofoff : biosize) - 1) / PAGE_SIZE;
+ u_int32_t pagemask;
+ /* pagemask only has to extend to last page being written to */
+ pagemask = (1 << (endpg + 1)) - 1;
+ FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
+ if ((bp->nb_valid & pagemask) != pagemask) {
+ /* zerofill any hole */
+ if (on > bp->nb_validend) {
+ int i;
+ for (i = bp->nb_validend / PAGE_SIZE; i <= (on - 1) / PAGE_SIZE; i++) {
+ NBPGVALID_SET(bp, i);
+ }
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
+ bzero((char *)bp->nb_data + bp->nb_validend,
+ on - bp->nb_validend);
+ }
+ /* zerofill any trailing data in the last page */
+ if (neweofoff) {
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
+ bzero((char *)bp->nb_data + neweofoff,
+ PAGE_SIZE - (neweofoff & PAGE_MASK));
+ }
+ }
+ }
+ np->n_size = uio_offset(uio) + n;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ }
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ /*
+ * UBC doesn't handle partial pages, so we need to make sure
+ * that any pages left in the page cache are completely valid.
+ *
+ * Writes that are smaller than a block are delayed if they
+ * don't extend to the end of the block.
+ *
+ * If the block isn't (completely) cached, we may need to read
+ * in some parts of pages that aren't covered by the write.
+ * If the write offset (on) isn't page aligned, we'll need to
+ * read the start of the first page being written to. Likewise,
+ * if the offset of the end of the write (on+n) isn't page aligned,
+ * we'll need to read the end of the last page being written to.
+ *
+ * Notes:
+ * We don't want to read anything we're just going to write over.
+ * We don't want to read anything we're just going drop when the
+ * I/O is complete (i.e. don't do reads for NOCACHE requests).
+ * We don't want to issue multiple I/Os if we don't have to
+ * (because they're synchronous rpcs).
+ * We don't want to read anything we already have modified in the
+ * page cache.
+ */
+ if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+ int firstpg, lastpg, dirtypg;
+ int firstpgoff, lastpgoff;
+ start = end = -1;
+ firstpg = on / PAGE_SIZE;
+ firstpgoff = on & PAGE_MASK;
+ lastpg = (on + n - 1) / PAGE_SIZE;
+ lastpgoff = (on + n) & PAGE_MASK;
+ if (firstpgoff && !NBPGVALID(bp, firstpg)) {
+ /* need to read start of first page */
+ start = firstpg * PAGE_SIZE;
+ end = start + firstpgoff;
+ }
+ if (lastpgoff && !NBPGVALID(bp, lastpg)) {
+ /* need to read end of last page */
+ if (start < 0) {
+ start = (lastpg * PAGE_SIZE) + lastpgoff;
+ }
+ end = (lastpg + 1) * PAGE_SIZE;
+ }
+ if (ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /*
+ * For nocache writes, if there is any partial page at the
+ * start or end of the write range, then we do the write
+ * synchronously to make sure that we can drop the data
+ * from the cache as soon as the WRITE finishes. Normally,
+ * we would do an unstable write and not drop the data until
+ * it was committed. But doing that here would risk allowing
+ * invalid data to be read from the cache between the WRITE
+ * and the COMMIT.
+ * (NB_STABLE indicates that data writes should be FILESYNC)
+ */
+ if (end > start) {
+ SET(bp->nb_flags, NB_STABLE);
+ }
+ goto skipread;
+ }
+ if (end > start) {
+ /* need to read the data in range: start...end-1 */
+
+ /* first, check for dirty pages in between */
+ /* if there are, we'll have to do two reads because */
+ /* we don't want to overwrite the dirty pages. */
+ for (dirtypg = start / PAGE_SIZE; dirtypg <= (end - 1) / PAGE_SIZE; dirtypg++) {
+ if (NBPGDIRTY(bp, dirtypg)) {
+ break;
+ }
+ }
+
+ /* if start is at beginning of page, try */
+ /* to get any preceeding pages as well. */
+ if (!(start & PAGE_MASK)) {
+ /* stop at next dirty/valid page or start of block */
+ for (; start > 0; start -= PAGE_SIZE) {
+ if (NBPGVALID(bp, ((start - 1) / PAGE_SIZE))) {
+ break;
+ }
+ }
+ }
+
+ NFS_BUF_MAP(bp);
+ /* setup uio for read(s) */
+ boff = NBOFF(bp);
+ auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+ &auio_buf, sizeof(auio_buf));
+
+ if (dirtypg <= (end - 1) / PAGE_SIZE) {
+ /* there's a dirty page in the way, so just do two reads */
+ /* we'll read the preceding data here */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
+ bp->nb_validoff = start;
+ }
+ if ((bp->nb_validend < 0) || (bp->nb_validend < on)) {
+ bp->nb_validend = on;
+ }
+ if ((off_t)np->n_size > boff + bp->nb_validend) {
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ }
+ /* validate any pages before the write offset */
+ for (; start < on / PAGE_SIZE; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ }
+ }
+ /* adjust start to read any trailing data */
+ start = on + n;
+ }
+
+ /* if end is at end of page, try to */
+ /* get any following pages as well. */
+ if (!(end & PAGE_MASK)) {
+ /* stop at next valid page or end of block */
+ for (; end < biosize; end += PAGE_SIZE) {
+ if (NBPGVALID(bp, end / PAGE_SIZE)) {
+ break;
+ }
+ }
+ }
+
+ if (((boff + start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ bzero(bp->nb_data + start, end - start);
+ error = 0;
+ } else {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
+ bp->nb_validoff = start;
+ }
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end)) {
+ bp->nb_validend = end;
+ }
+ if ((off_t)np->n_size > boff + bp->nb_validend) {
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ }
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_32(on); start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ }
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_32(on + n - 1); end -= PAGE_SIZE) {
+ NBPGVALID_SET(bp, (end - 1) / PAGE_SIZE);
+ }
+ }
+ /* Note: pages being written to will be validated when written */
+ }
+ }
+skipread:
+
+ if (ISSET(bp->nb_flags, NB_ERROR)) {
+ error = bp->nb_error;
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ nfs_node_lock_force(np);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+
+ NFS_BUF_MAP(bp);
+ error = uiomove((char *)bp->nb_data + on, n, uio);
+ if (error) {
+ SET(bp->nb_flags, NB_ERROR);
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ /* validate any pages written to */
+ start = on & ~PAGE_MASK;
+ for (; start < on + n; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ /*
+ * This may seem a little weird, but we don't actually set the
+ * dirty bits for writes. This is because we keep the dirty range
+ * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
+ * delayed writes, when we give the pages back to the VM we don't
+ * want to keep them marked dirty, because when we later write the
+ * buffer we won't be able to tell which pages were written dirty
+ * and which pages were mmapped and dirtied.
+ */
+ }
+ if (bp->nb_dirtyend > 0) {
+ bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
+ bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
+ } else {
+ bp->nb_dirtyoff = on;
+ bp->nb_dirtyend = on + n;
+ }
+ if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
+ bp->nb_validoff > bp->nb_dirtyend) {
+ bp->nb_validoff = bp->nb_dirtyoff;
+ bp->nb_validend = bp->nb_dirtyend;
+ } else {
+ bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
+ bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE)) {
+ nfs_buf_normalize_valid_range(np, bp);
+ }
+
+ /*
+ * Since this block is being modified, it must be written
+ * again and not just committed.
+ */
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ nfs_node_lock_force(np);
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ np->n_needcommitcnt--;
+ CHECK_NEEDCOMMITCNT(np);
+ }
+ CLR(bp->nb_flags, NB_NEEDCOMMIT);
+ nfs_node_unlock(np);
+ }
+
+ if (ioflag & IO_SYNC) {
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
+ (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
+ SET(bp->nb_flags, NB_ASYNC);
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ } else {
+ /* If the block wasn't already delayed: charge for the write */
+ if (!ISSET(bp->nb_flags, NB_DELWRI)) {
+ proc_t p = vfs_context_proc(ctx);
+ if (p && p->p_stats) {
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
+ }
+ }
+ nfs_buf_write_delayed(bp);
+ }
+
+
+ if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS) {
+ nfs_flushcommits(np, 1);
+ }
+ } while (uio_resid(uio) > 0 && n > 0);
+
+out:
+ nfs_node_lock_force(np);
+ np->n_wrbusy--;
+ if ((ioflag & IO_SYNC) && !np->n_wrbusy && !np->n_numoutput) {
+ np->n_flag &= ~NMODIFIED;
+ }
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return error;
+}
+
+
+/*
+ * NFS write call
+ */
+int
+nfs_write_rpc(
+ nfsnode_t np,
+ uio_t uio,
+ vfs_context_t ctx,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
+}
+
+int
+nfs_write_rpc2(
+ nfsnode_t np,
+ uio_t uio,
+ thread_t thd,
+ kauth_cred_t cred,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ int wverfset, commit, committed;
+ uint64_t wverf = 0, wverf2;
+ size_t nmwsize, totalsize, tsiz, len, rlen;
+ struct nfsreq rq, *req = &rq;
+#if CONFIG_NFS4
+ uint32_t stategenid = 0, restart = 0;
+#endif
+ uint32_t vrestart = 0;
+ uio_t uio_save = NULL;
+
+#if DIAGNOSTIC
+ /* XXX limitation based on need to back up uio on short write */
+ if (uio_iovcnt(uio) != 1) {
+ panic("nfs3_write_rpc: iovcnt > 1");
+ }
+#endif
+ FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nmwsize = nmp->nm_wsize;
+
+ wverfset = 0;
+ committed = NFS_WRITE_FILESYNC;
+
+ totalsize = tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return EFBIG;
+ }
+
+ uio_save = uio_duplicate(uio);
+ if (uio_save == NULL) {
+ return EIO;
+ }
+
+ while (tsiz > 0) {
+ len = (tsiz > nmwsize) ? nmwsize : tsiz;
+ FSDBG(537, np, uio_offset(uio), len, 0);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ break;
+ }
+#if CONFIG_NFS4
+ if (nmp->nm_vers >= NFS_VER4) {
+ stategenid = nmp->nm_stategenid;
+ }
+#endif
+ error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
+ if (!error) {
+ error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
+ }
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ }
+#if CONFIG_NFS4
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+ NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
+ nfs_need_recover(nmp, error);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ } else {
+ if (error == NFSERR_GRACE) {
+ tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+ }
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+ continue;
+ }
+ }
+ }
+#endif
+ if (error) {
+ break;
+ }
+ if (nfsvers == NFS_VER2) {
+ tsiz -= len;
+ continue;
+ }
+
+ /* check for a short write */
+ if (rlen < len) {
+ /* Reset the uio to reflect the actual transfer */
+ *uio = *uio_save;
+ uio_update(uio, totalsize - (tsiz - rlen));
+ len = rlen;
+ }
+
+ /* return lowest commit level returned */
+ if (commit < committed) {
+ committed = commit;
+ }
+
+ tsiz -= len;
+
+ /* check write verifier */
+ if (!wverfset) {
+ wverf = wverf2;
+ wverfset = 1;
+ } else if (wverf != wverf2) {
+ /* verifier changed, so we need to restart all the writes */
+ if (++vrestart > 100) {
+ /* give up after too many restarts */
+ error = EIO;
+ break;
+ }
+ *uio = *uio_save; // Reset the uio back to the start
+ committed = NFS_WRITE_FILESYNC;
+ wverfset = 0;
+ tsiz = totalsize;
+ }
+ }
+ if (uio_save) {
+ uio_free(uio_save);
+ }
+ if (wverfset && wverfp) {
+ *wverfp = wverf;
+ }
+ *iomodep = committed;
+ if (error) {
+ uio_setresid(uio, tsiz);
+ }
+ FSDBG_BOT(537, np, committed, uio_resid(uio), error);
+ return error;
+}
+
+int
+nfs3_write_rpc_async(
+ nfsnode_t np,
+ uio_t uio,
+ size_t len,
+ thread_t thd,