+ }
+ return error;
+}
+
+int
+nfs_getattr_internal(nfsnode_t np, struct nfs_vattr *nvap, vfs_context_t ctx, int flags)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers, inprogset = 0, wanted = 0, avoidfloods = 0;
+ struct nfs_vattr *nvattr = NULL;
+ struct timespec ts = { .tv_sec = 2, .tv_nsec = 0 };
+ u_int64_t xid = 0;
+
+ FSDBG_TOP(513, np->n_size, np, np->n_vattr.nva_size, np->n_flag);
+
+ nmp = NFSTONMP(np);
+
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ if (!nvap) {
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);
+ nvap = nvattr;
+ }
+ NVATTR_INIT(nvap);
+
+ /* Update local times for special files. */
+ if (np->n_flag & (NACC | NUPD)) {
+ nfs_node_lock_force(np);
+ np->n_flag |= NCHG;
+ nfs_node_unlock(np);
+ }
+ /* Update size, if necessary */
+ if (ISSET(np->n_flag, NUPDATESIZE)) {
+ nfs_data_update_size(np, 0);
+ }
+
+ error = nfs_node_lock(np);
+ nfsmout_if(error);
+ if (!(flags & (NGA_UNCACHED | NGA_MONITOR)) || ((nfsvers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))) {
+ /*
+ * Use the cache or wait for any getattr in progress if:
+ * - it's a cached request, or
+ * - we have a delegation, or
+ * - the server isn't responding
+ */
+ while (1) {
+ error = nfs_getattrcache(np, nvap, flags);
+ if (!error || (error != ENOENT)) {
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ error = 0;
+ if (!ISSET(np->n_flag, NGETATTRINPROG)) {
+ break;
+ }
+ if (flags & NGA_MONITOR) {
+ /* no need to wait if a request is pending */
+ error = EINPROGRESS;
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ SET(np->n_flag, NGETATTRWANT);
+ msleep(np, &np->n_lock, PZERO - 1, "nfsgetattrwant", &ts);
+ if ((error = nfs_sigintr(NFSTONMP(np), NULL, vfs_context_thread(ctx), 0))) {
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ }
+ SET(np->n_flag, NGETATTRINPROG);
+ inprogset = 1;
+ } else if (!ISSET(np->n_flag, NGETATTRINPROG)) {
+ SET(np->n_flag, NGETATTRINPROG);
+ inprogset = 1;
+ } else if (flags & NGA_MONITOR) {
+ /* no need to make a request if one is pending */
+ error = EINPROGRESS;
+ }
+ nfs_node_unlock(np);
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ }
+ if (error) {
+ goto nfsmout;
+ }
+
+ /*
+ * Return cached attributes if they are valid,
+ * if the server doesn't respond, and this is
+ * some softened up style of mount.
+ */
+ if (NATTRVALID(np) && nfs_use_cache(nmp)) {
+ flags |= NGA_SOFT;
+ }
+
+ /*
+ * We might want to try to get both the attributes and access info by
+ * making an ACCESS call and seeing if it returns updated attributes.
+ * But don't bother if we aren't caching access info or if the
+ * attributes returned wouldn't be cached.
+ */
+ if (!(flags & NGA_ACL) && (nfsvers != NFS_VER2) && nfs_access_for_getattr && (nfs_access_cache_timeout > 0)) {
+ if (nfs_attrcachetimeout(np) > 0) {
+ /* OSAddAtomic(1, &nfsstats.accesscache_misses); */
+ u_int32_t access = NFS_ACCESS_ALL;
+ int rpcflags = 0;
+
+ /* Return cached attrs if server doesn't respond */
+ if (flags & NGA_SOFT) {
+ rpcflags |= R_SOFT;
+ }
+
+ error = nmp->nm_funcs->nf_access_rpc(np, &access, rpcflags, ctx);
+
+ if (error == ETIMEDOUT) {
+ goto returncached;
+ }
+
+ if (error) {
+ goto nfsmout;
+ }
+ nfs_node_lock_force(np);
+ error = nfs_getattrcache(np, nvap, flags);
+ nfs_node_unlock(np);
+ if (!error || (error != ENOENT)) {
+ goto nfsmout;
+ }
+ /* Well, that didn't work... just do a getattr... */
+ error = 0;
+ }
+ }
+
+ avoidfloods = 0;
+
+tryagain:
+ error = nmp->nm_funcs->nf_getattr_rpc(np, NULL, np->n_fhp, np->n_fhsize, flags, ctx, nvap, &xid);
+ if (!error) {
+ nfs_node_lock_force(np);
+ error = nfs_loadattrcache(np, nvap, &xid, 0);
+ nfs_node_unlock(np);
+ }
+
+ /*
+ * If the server didn't respond, return cached attributes.
+ */
+returncached:
+ if ((flags & NGA_SOFT) && (error == ETIMEDOUT)) {
+ nfs_node_lock_force(np);
+ error = nfs_getattrcache(np, nvap, flags);
+ if (!error || (error != ENOENT)) {
+ nfs_node_unlock(np);
+ goto nfsmout;
+ }
+ nfs_node_unlock(np);
+ }
+ nfsmout_if(error);
+
+ if (!xid) { /* out-of-order rpc - attributes were dropped */
+ FSDBG(513, -1, np, np->n_xid >> 32, np->n_xid);
+ if (avoidfloods++ < 20) {
+ goto tryagain;
+ }
+ /* avoidfloods>1 is bizarre. at 20 pull the plug */
+ /* just return the last attributes we got */
+ }
+nfsmout:
+ nfs_node_lock_force(np);
+ if (inprogset) {
+ wanted = ISSET(np->n_flag, NGETATTRWANT);
+ CLR(np->n_flag, (NGETATTRINPROG | NGETATTRWANT));
+ }
+ if (!error) {
+ /* check if the node changed on us */
+ vnode_t vp = NFSTOV(np);
+ enum vtype vtype = vnode_vtype(vp);
+ if ((vtype == VDIR) && NFS_CHANGED_NC(nfsvers, np, nvap)) {
+ FSDBG(513, -1, np, 0, np);
+ np->n_flag &= ~NNEGNCENTRIES;
+ cache_purge(vp);
+ np->n_ncgen++;
+ NFS_CHANGED_UPDATE_NC(nfsvers, np, nvap);
+ NFS_VNOP_DBG("Purge directory 0x%llx\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(vp));
+ }
+ if (NFS_CHANGED(nfsvers, np, nvap)) {
+ FSDBG(513, -1, np, -1, np);
+ if (vtype == VDIR) {
+ NFS_VNOP_DBG("Invalidate directory 0x%llx\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(vp));
+ nfs_invaldir(np);
+ }
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ FSDBG(513, -1, np, -2, error);
+ if (!error) {
+ nfs_node_lock_force(np);
+ NFS_CHANGED_UPDATE(nfsvers, np, nvap);
+ nfs_node_unlock(np);
+ }
+ } else {
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+ }
+ } else {
+ nfs_node_unlock(np);
+ if (wanted) {
+ wakeup(np);
+ }
+ }
+
+ if (nvattr != NULL) {
+ NVATTR_CLEANUP(nvap);
+ FREE(nvattr, M_TEMP);
+ } else if (!(flags & NGA_ACL)) {
+ /* make sure we don't return an ACL if it wasn't asked for */
+ NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
+ if (nvap->nva_acl) {
+ kauth_acl_free(nvap->nva_acl);
+ nvap->nva_acl = NULL;
+ }
+ }
+ FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
+ return error;
+}
+
+
+/*
+ * NFS getattr call from vfs.
+ */
+
+/*
+ * The attributes we support over the wire.
+ * We also get fsid but the vfs layer gets it out of the mount
+ * structure after this calling us so there's no need to return it,
+ * and Finder expects to call getattrlist just looking for the FSID
+ * with out hanging on a non responsive server.
+ */
+#define NFS3_SUPPORTED_VATTRS \
+ (VNODE_ATTR_va_rdev | \
+ VNODE_ATTR_va_nlink | \
+ VNODE_ATTR_va_data_size | \
+ VNODE_ATTR_va_data_alloc | \
+ VNODE_ATTR_va_uid | \
+ VNODE_ATTR_va_gid | \
+ VNODE_ATTR_va_mode | \
+ VNODE_ATTR_va_modify_time | \
+ VNODE_ATTR_va_change_time | \
+ VNODE_ATTR_va_access_time | \
+ VNODE_ATTR_va_fileid | \
+ VNODE_ATTR_va_type)
+
+
+int
+nfs3_vnop_getattr(
+ struct vnop_getattr_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ int error;
+ nfsnode_t np;
+ uint64_t supported_attrs;
+ struct nfs_vattr *nva;
+ struct vnode_attr *vap = ap->a_vap;
+ struct nfsmount *nmp;
+ dev_t rdev;
+
+ nmp = VTONMP(ap->a_vp);
+
+ /*
+ * Lets don't go over the wire if we don't support any of the attributes.
+ * Just fall through at the VFS layer and let it cons up what it needs.
+ */
+ /* Return the io size no matter what, since we don't go over the wire for this */
+ VATTR_RETURN(vap, va_iosize, nfs_iosize);
+
+ supported_attrs = NFS3_SUPPORTED_VATTRS;
+
+ if ((vap->va_active & supported_attrs) == 0) {
+ return 0;
+ }
+
+ if (VATTR_IS_ACTIVE(ap->a_vap, va_name)) {
+ NFS_VNOP_DBG("Getting attrs for 0x%llx, vname is %s\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
+ ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
+ }
+
+ /*
+ * We should not go over the wire if only fileid was requested and has ever been populated.
+ */
+ if ((vap->va_active & supported_attrs) == VNODE_ATTR_va_fileid) {
+ np = VTONFS(ap->a_vp);
+ if (np->n_attrstamp) {
+ VATTR_RETURN(vap, va_fileid, np->n_vattr.nva_fileid);
+ return 0;
+ }
+ }
+
+ MALLOC(nva, struct nfs_vattr *, sizeof(*nva), M_TEMP, M_WAITOK);
+ error = nfs_getattr(VTONFS(ap->a_vp), nva, ap->a_context, NGA_CACHED);
+ if (error) {
+ goto out;
+ }
+
+ /* copy nva to *a_vap */
+ VATTR_RETURN(vap, va_type, nva->nva_type);
+ VATTR_RETURN(vap, va_mode, nva->nva_mode);
+ rdev = makedev(nva->nva_rawdev.specdata1, nva->nva_rawdev.specdata2);
+ VATTR_RETURN(vap, va_rdev, rdev);
+ VATTR_RETURN(vap, va_uid, nva->nva_uid);
+ VATTR_RETURN(vap, va_gid, nva->nva_gid);
+ VATTR_RETURN(vap, va_nlink, nva->nva_nlink);
+ VATTR_RETURN(vap, va_fileid, nva->nva_fileid);
+ VATTR_RETURN(vap, va_data_size, nva->nva_size);
+ VATTR_RETURN(vap, va_data_alloc, nva->nva_bytes);
+ vap->va_access_time.tv_sec = nva->nva_timesec[NFSTIME_ACCESS];
+ vap->va_access_time.tv_nsec = nva->nva_timensec[NFSTIME_ACCESS];
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ vap->va_modify_time.tv_sec = nva->nva_timesec[NFSTIME_MODIFY];
+ vap->va_modify_time.tv_nsec = nva->nva_timensec[NFSTIME_MODIFY];
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ vap->va_change_time.tv_sec = nva->nva_timesec[NFSTIME_CHANGE];
+ vap->va_change_time.tv_nsec = nva->nva_timensec[NFSTIME_CHANGE];
+ VATTR_SET_SUPPORTED(vap, va_change_time);
+
+
+ // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
+out:
+ FREE(nva, M_TEMP);
+ return error;
+}
+
+/*
+ * NFS setattr call.
+ */
+int
+nfs_vnop_setattr(
+ struct vnop_setattr_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t vp = ap->a_vp;
+ nfsnode_t np = VTONFS(vp);
+ struct nfsmount *nmp;
+ struct vnode_attr *vap = ap->a_vap;
+ int error = 0;
+ int biosize, nfsvers, namedattrs;
+ u_quad_t origsize, vapsize;
+ struct nfs_dulookup *dul;
+ nfsnode_t dnp = NULL;
+ int dul_in_progress = 0;
+ vnode_t dvp = NULL;
+ const char *vname = NULL;
+#if CONFIG_NFS4
+ struct nfs_open_owner *noop = NULL;
+ struct nfs_open_file *nofp = NULL;
+#endif
+ nmp = VTONMP(vp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+ biosize = nmp->nm_biosize;
+
+ /* Disallow write attempts if the filesystem is mounted read-only. */
+ if (vnode_vfsisrdonly(vp)) {
+ return EROFS;
+ }
+
+ origsize = np->n_size;
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ switch (vnode_vtype(vp)) {
+ case VDIR:
+ return EISDIR;
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
+ !VATTR_IS_ACTIVE(vap, va_access_time) &&
+ !VATTR_IS_ACTIVE(vap, va_mode) &&
+ !VATTR_IS_ACTIVE(vap, va_uid) &&
+ !VATTR_IS_ACTIVE(vap, va_gid)) {
+ return 0;
+ }
+ VATTR_CLEAR_ACTIVE(vap, va_data_size);
+ break;
+ default:
+ /*
+ * Disallow write attempts if the filesystem is
+ * mounted read-only.
+ */
+ if (vnode_vfsisrdonly(vp)) {
+ return EROFS;
+ }
+ FSDBG_TOP(512, np->n_size, vap->va_data_size,
+ np->n_vattr.nva_size, np->n_flag);
+ /* clear NNEEDINVALIDATE, if set */
+ if ((error = nfs_node_lock(np))) {
+ return error;
+ }
+ if (np->n_flag & NNEEDINVALIDATE) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ }
+ nfs_node_unlock(np);
+ /* flush everything */
+ error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0), ctx, 1);
+ if (error) {
+ NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
+ FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
+ return error;
+ }
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ /* setting file size requires having the file open for write access */
+ if (np->n_flag & NREVOKE) {
+ return EIO;
+ }
+ noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+ if (!noop) {
+ return ENOMEM;
+ }
+restart:
+ error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+ if (error) {
+ return error;
+ }
+ if (np->n_flag & NREVOKE) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ return EIO;
+ }
+ error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+ if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+ error = EIO;
+ }
+ if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+ error = nfs4_reopen(nofp, vfs_context_thread(ctx));
+ nofp = NULL;
+ if (!error) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ goto restart;
+ }
+ }
+ if (!error) {
+ error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+ }
+ if (error) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ nfs_open_owner_rele(noop);
+ return error;
+ }
+ if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
+ /* we don't have the file open for write access, so open it */
+ error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+ if (!error) {
+ nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
+ }
+ if (nfs_mount_state_error_should_restart(error)) {
+ nfs_open_file_clear_busy(nofp);
+ nofp = NULL;
+ nfs_mount_state_in_use_end(nmp, error);
+ goto restart;
+ }
+ }
+ }
+#endif
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (np->n_size > vap->va_data_size) { /* shrinking? */
+ daddr64_t obn, bn;
+ int mustwrite;
+ off_t neweofoff;
+ struct nfsbuf *bp;
+ nfsbufpgs pagemask;
+
+ obn = (np->n_size - 1) / biosize;
+ bn = vap->va_data_size / biosize;
+ for (; obn >= bn; obn--) {
+ if (!nfs_buf_is_incore(np, obn)) {
+ continue;
+ }
+ error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
+ if (error) {
+ continue;
+ }
+ if (obn != bn) {
+ FSDBG(512, bp, bp->nb_flags, 0, obn);
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 1);
+ continue;
+ }
+ mustwrite = 0;
+ neweofoff = vap->va_data_size - NBOFF(bp);
+ /* check for any dirty data before the new EOF */
+ if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
+ /* clip dirty range to EOF */
+ if (bp->nb_dirtyend > neweofoff) {
+ bp->nb_dirtyend = neweofoff;
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
+ mustwrite++;
+ }
+ }
+ nfs_buf_pgs_get_page_mask(&pagemask, round_page_64(neweofoff) / PAGE_SIZE);
+ nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &bp->nb_dirty);
+ if (nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+ mustwrite++;
+ }
+ if (!mustwrite) {
+ FSDBG(512, bp, bp->nb_flags, 0, obn);
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 1);
+ continue;
+ }
+ /* gotta write out dirty data before invalidating */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ /* (NB_NOCACHE indicates buffer should be discarded) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
+ SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_t cred = vfs_context_ucred(ctx);
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+ error = nfs_buf_write(bp);
+ // Note: bp has been released
+ if (error) {
+ FSDBG(512, bp, 0xd00dee, 0xbad, error);
+ nfs_node_lock_force(np);
+ np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ /*
+ * There was a write error and we need to
+ * invalidate attrs and flush buffers in
+ * order to sync up with the server.
+ * (if this write was extending the file,
+ * we may no longer know the correct size)
+ */
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ error = 0;
+ }
+ }
+ }
+ if (vap->va_data_size != np->n_size) {
+ ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
+ }
+ origsize = np->n_size;
+ np->n_size = np->n_vattr.nva_size = vap->va_data_size;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ nfs_node_unlock(np);
+ FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ }
+ } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
+ VATTR_IS_ACTIVE(vap, va_access_time) ||
+ (vap->va_vaflags & VA_UTIMES_NULL)) {
+ if ((error = nfs_node_lock(np))) {
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ }
+#endif
+ return error;
+ }
+ if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
+ nfs_node_unlock(np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ if (error == EINTR) {
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ }
+#endif
+ return error;
+ }
+ } else {
+ nfs_node_unlock(np);
+ }
+ }
+
+ MALLOC(dul, struct nfs_dulookup *, sizeof(*dul), M_TEMP, M_WAITOK);
+
+ if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
+ VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
+ !(error = nfs_node_lock(np))) {
+ NACCESSINVALIDATE(np);
+ nfs_node_unlock(np);
+ if (!namedattrs) {
+ dvp = vnode_getparent(vp);
+ vname = vnode_getname(vp);
+ dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
+ if (dnp) {
+ if (nfs_node_set_busy(dnp, vfs_context_thread(ctx))) {
+ vnode_put(dvp);
+ vnode_putname(vname);
+ } else {
+ nfs_dulookup_init(dul, dnp, vname, NFS_STRLEN_INT(vname), ctx);
+ nfs_dulookup_start(dul, dnp, ctx);
+ dul_in_progress = 1;
+ }
+ } else {
+ if (dvp) {
+ vnode_put(dvp);
+ }
+ if (vname) {
+ vnode_putname(vname);
+ }
+ }
+ }
+ }
+
+ if (!error) {
+ error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
+ }
+
+ if (dul_in_progress) {
+ nfs_dulookup_finish(dul, dnp, ctx);
+ nfs_node_clear_busy(dnp);
+ vnode_put(dvp);
+ vnode_putname(vname);
+ }
+
+ FREE(dul, M_TEMP);
+ FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ if (error && (origsize != np->n_size) &&
+ ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
+ /* make every effort to resync file size w/ server... */
+ /* (don't bother if we'll be restarting the operation) */
+ int err; /* preserve "error" for return */
+ np->n_size = np->n_vattr.nva_size = origsize;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ nfs_node_unlock(np);
+ FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+ vapsize = vap->va_data_size;
+ vap->va_data_size = origsize;
+ err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
+ if (err) {
+ NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
+ }
+ vap->va_data_size = vapsize;
+ }
+ nfs_node_lock_force(np);
+ /*
+ * The size was just set. If the size is already marked for update, don't
+ * trust the newsize (it may have been set while the setattr was in progress).
+ * Clear the update flag and make sure we fetch new attributes so we are sure
+ * we have the latest size.
+ */
+ if (ISSET(np->n_flag, NUPDATESIZE)) {
+ CLR(np->n_flag, NUPDATESIZE);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
+ } else {
+ nfs_node_unlock(np);
+ }
+ nfs_data_unlock(np);
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ if (nofp) {
+ /* don't close our setattr open if we'll be restarting... */
+ if (!nfs_mount_state_error_should_restart(error) &&
+ (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
+ int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+ if (err) {
+ NP(np, "nfs_vnop_setattr: close error: %d", err);
+ }
+ nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
+ }
+ nfs_open_file_clear_busy(nofp);
+ nofp = NULL;
+ }
+ if (nfs_mount_state_in_use_end(nmp, error)) {
+ goto restart;
+ }
+ nfs_open_owner_rele(noop);
+ }
+#endif
+ }
+ return error;
+}
+
+/*
+ * Do an NFS setattr RPC.
+ */
+int
+nfs3_setattr_rpc(
+ nfsnode_t np,
+ struct vnode_attr *vap,
+ vfs_context_t ctx)
+{
+ struct nfsmount *nmp = NFSTONMP(np);
+ int error = 0, lockerror = ENOENT, status = 0, wccpostattr = 0, nfsvers;
+ u_int64_t xid, nextxid;
+ struct nfsm_chain nmreq, nmrep;
+
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+
+
+ if (VATTR_IS_ACTIVE(vap, va_flags)
+ ) {
+ if (vap->va_flags) { /* we don't support setting flags */
+ if (vap->va_active & ~VNODE_ATTR_va_flags) {
+ return EINVAL; /* return EINVAL if other attributes also set */
+ } else {
+ return ENOTSUP; /* return ENOTSUP for chflags(2) */
+ }
+ }
+ /* no flags set, so we'll just ignore it */
+ if (!(vap->va_active & ~VNODE_ATTR_va_flags)) {
+ return 0; /* no (other) attributes to set, so nothing to do */
+ }
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ if (VATTR_IS_ACTIVE(vap, va_mode)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_mode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_uid)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_uid);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_gid)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_gid);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (vap->va_vaflags & VA_UTIMES_NULL) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
+ } else {
+ if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
+ }
+ }
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
+ vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
+ vap->va_uid : (uint32_t)-1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
+ vap->va_gid : (uint32_t)-1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
+ vap->va_data_size : (uint32_t)-1);
+ if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
+ ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, -1);
+ nfsm_chain_add_32(error, &nmreq, -1);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
+ ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, -1);
+ nfsm_chain_add_32(error, &nmreq, -1);
+ }
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ nfsmout_if(error);
+ /* if file hadn't changed, update cached mtime */
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+ }
+ /* if directory hadn't changed, update namecache mtime */
+ if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
+ nfstimespeccmp(&np->n_ncmtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
+ }
+ if (!wccpostattr) {
+ NATTRINVALIDATE(np);
+ }
+ error = status;
+ } else {
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ }
+ /*
+ * We just changed the attributes and we want to make sure that we
+ * see the latest attributes. Get the next XID. If it's not the
+ * next XID after the SETATTR XID, then it's possible that another
+ * RPC was in flight at the same time and it might put stale attributes
+ * in the cache. In that case, we invalidate the attributes and set
+ * the attribute cache XID to guarantee that newer attributes will
+ * get loaded next.
+ */
+ nextxid = 0;
+ nfs_get_xid(&nextxid);
+ if (nextxid != (xid + 1)) {
+ np->n_xid = nextxid;
+ NATTRINVALIDATE(np);
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the RPC
+ */
+int
+nfs_vnop_lookup(
+ struct vnop_lookup_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_dvp;
+ * vnode_t *a_vpp;
+ * struct componentname *a_cnp;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ struct componentname *cnp = ap->a_cnp;
+ vnode_t dvp = ap->a_dvp;
+ vnode_t *vpp = ap->a_vpp;
+ int flags = cnp->cn_flags;
+ vnode_t newvp;
+ nfsnode_t dnp, np;
+ struct nfsmount *nmp;
+ mount_t mp;
+ int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
+ u_int64_t xid = 0;
+ struct nfs_vattr *nvattr;
+ int ngflags, skipdu = 0;
+ struct vnop_access_args naa;
+ fhandle_t *fh;
+ struct nfsreq *req;
+
+ *vpp = NULLVP;
+
+ dnp = VTONFS(dvp);
+
+ fh = zalloc(nfs_fhandle_zone);
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);
+ NVATTR_INIT(nvattr);
+
+ mp = vnode_mount(dvp);
+ nmp = VFSTONFS(mp);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ goto error_return;
+ }
+ nfsvers = nmp->nm_vers;
+ negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
+
+ if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)))) {
+ goto error_return;
+ }
+ /* nfs_getattr() will check changed and purge caches */
+ if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED))) {
+ goto error_return;
+ }
+
+ error = cache_lookup(dvp, vpp, cnp);
+ switch (error) {
+ case ENOENT:
+ /* negative cache entry */
+ goto error_return;
+ case 0:
+ /* cache miss */
+ if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
+ /* if rdirplus, try dir buf cache lookup */
+ error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0, &skipdu);
+ if (!error && np) {
+ /* dir buf cache hit */
+ *vpp = NFSTOV(np);
+ error = -1;
+ } else if (skipdu) {
+ /* Skip lookup for du files */
+ error = ENOENT;
+ goto error_return;
+ }
+ }
+ if (error != -1) { /* cache miss */
+ break;
+ }
+ OS_FALLTHROUGH;
+ case -1:
+ /* cache hit, not really an error */
+ OSAddAtomic64(1, &nfsstats.lookupcache_hits);
+
+ nfs_node_clear_busy(dnp);
+ busyerror = ENOENT;
+
+ /* check for directory access */
+ naa.a_desc = &vnop_access_desc;
+ naa.a_vp = dvp;
+ naa.a_action = KAUTH_VNODE_SEARCH;
+ naa.a_context = ctx;
+
+ /* compute actual success/failure based on accessibility */
+ error = nfs_vnop_access(&naa);
+ OS_FALLTHROUGH;
+ default:
+ /* unexpected error from cache_lookup */
+ goto error_return;
+ }
+
+ /* skip lookup, if we know who we are: "." or ".." */
+ isdot = isdotdot = 0;
+ if (cnp->cn_nameptr[0] == '.') {
+ if (cnp->cn_namelen == 1) {
+ isdot = 1;
+ }
+ if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.')) {
+ isdotdot = 1;
+ }
+ }
+ if (isdotdot || isdot) {
+ fh->fh_len = 0;
+ goto found;
+ }
+#if CONFIG_NFS4
+ if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
+ /* we should never be looking things up in a trigger directory, return nothing */
+ error = ENOENT;
+ goto error_return;
+ }
+#endif
+
+ /* do we know this name is too long? */
+ nmp = VTONMP(dvp);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ goto error_return;
+ }
+ if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
+ (cnp->cn_namelen > nmp->nm_fsattr.nfsa_maxname)) {
+ error = ENAMETOOLONG;
+ goto error_return;
+ }
+
+ error = 0;
+ newvp = NULLVP;
+
+ OSAddAtomic64(1, &nfsstats.lookupcache_misses);
+
+ error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
+ nfsmout_if(error);
+ error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, fh, nvattr);
+ nfsmout_if(error);
+
+ /* is the file handle the same as this directory's file handle? */
+ isdot = NFS_CMPFH(dnp, fh->fh_data, fh->fh_len);
+
+found:
+ if (flags & ISLASTCN) {
+ switch (cnp->cn_nameiop) {
+ case DELETE:
+ cnp->cn_flags &= ~MAKEENTRY;
+ break;
+ case RENAME:
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (isdot) {
+ error = EISDIR;
+ goto error_return;
+ }
+ break;
+ }
+ }
+
+ if (isdotdot) {
+ newvp = vnode_getparent(dvp);
+ if (!newvp) {
+ error = ENOENT;
+ goto error_return;
+ }
+ } else if (isdot) {
+ error = vnode_get(dvp);
+ if (error) {
+ goto error_return;
+ }
+ newvp = dvp;
+ nfs_node_lock_force(dnp);
+ if (fh->fh_len && (dnp->n_xid <= xid)) {
+ nfs_loadattrcache(dnp, nvattr, &xid, 0);
+ }
+ nfs_node_unlock(dnp);
+ } else {
+ ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
+ error = nfs_nget(mp, dnp, cnp, fh->fh_data, fh->fh_len, nvattr, &xid, req->r_auth, ngflags, &np);
+ if (error) {
+ goto error_return;
+ }
+ newvp = NFSTOV(np);
+ nfs_node_unlock(np);
+ }
+ *vpp = newvp;
+
+nfsmout:
+ if (error) {
+ if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
+ (flags & ISLASTCN) && (error == ENOENT)) {
+ if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp)) {
+ error = EROFS;
+ } else {
+ error = EJUSTRETURN;
+ }
+ }
+ }
+ if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != CREATE) && negnamecache) {
+ /* add a negative entry in the name cache */
+ nfs_node_lock_force(dnp);
+ cache_enter(dvp, NULL, cnp);
+ dnp->n_flag |= NNEGNCENTRIES;
+ nfs_node_unlock(dnp);
+ }
+error_return:
+ NVATTR_CLEANUP(nvattr);
+ NFS_ZFREE(nfs_fhandle_zone, fh);
+ NFS_ZFREE(nfs_req_zone, req);
+ FREE(nvattr, M_TEMP);
+ if (!busyerror) {
+ nfs_node_clear_busy(dnp);
+ }
+ if (error && *vpp) {
+ vnode_put(*vpp);
+ *vpp = NULLVP;
+ }
+ return error;
+}
+
+int nfs_readlink_nocache = DEFAULT_READLINK_NOCACHE;
+
+/*
+ * NFS readlink call
+ */
+int
+nfs_vnop_readlink(
+ struct vnop_readlink_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct uio *a_uio;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ nfsnode_t np = VTONFS(ap->a_vp);
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ size_t buflen;
+ uio_t uio = ap->a_uio;
+ struct nfsbuf *bp = NULL;
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+ long timeo = 0;
+
+ if (vnode_vtype(ap->a_vp) != VLNK) {
+ return EPERM;
+ }
+
+ if (uio_resid(uio) == 0) {
+ return 0;
+ }
+ if (uio_offset(uio) < 0) {
+ return EINVAL;
+ }
+
+ nmp = VTONMP(ap->a_vp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+
+ /* nfs_getattr() will check changed and purge caches */
+ if ((error = nfs_getattr(np, NULL, ctx, nfs_readlink_nocache ? NGA_UNCACHED : NGA_CACHED))) {
+ FSDBG(531, np, 0xd1e0001, 0, error);
+ return error;
+ }
+
+ if (nfs_readlink_nocache) {
+ timeo = nfs_attrcachetimeout(np);
+ nanouptime(&ts);
+ }
+
+retry:
+ OSAddAtomic64(1, &nfsstats.biocache_readlinks);
+ error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_META, &bp);
+ if (error) {
+ FSDBG(531, np, 0xd1e0002, 0, error);
+ return error;
+ }
+
+ if (nfs_readlink_nocache) {
+ NFS_VNOP_DBG("timeo = %ld ts.tv_sec = %ld need refresh = %d cached = %d\n", timeo, ts.tv_sec,
+ (np->n_rltim.tv_sec + timeo) < ts.tv_sec || nfs_readlink_nocache > 1,
+ ISSET(bp->nb_flags, NB_CACHE) == NB_CACHE);
+ /* n_rltim is synchronized by the associated nfs buf */
+ if (ISSET(bp->nb_flags, NB_CACHE) && ((nfs_readlink_nocache > 1) || ((np->n_rltim.tv_sec + timeo) < ts.tv_sec))) {
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 0);
+ goto retry;
+ }
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE)) {
+readagain:
+ OSAddAtomic64(1, &nfsstats.readlink_bios);
+ buflen = bp->nb_bufsize;
+ error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
+ if (error) {
+ if (error == ESTALE) {
+ NFS_VNOP_DBG("Stale FH from readlink rpc\n");
+ error = nfs_refresh_fh(np, ctx);
+ if (error == 0) {
+ goto readagain;
+ }
+ }
+ SET(bp->nb_flags, NB_ERROR);
+ bp->nb_error = error;
+ NFS_VNOP_DBG("readlink failed %d\n", error);
+ } else {
+ bp->nb_validoff = 0;
+ bp->nb_validend = buflen;
+ np->n_rltim = ts;
+ NFS_VNOP_DBG("readlink of %.*s\n", (int32_t)bp->nb_validend, (char *)bp->nb_data);
+ }
+ } else {
+ NFS_VNOP_DBG("got cached link of %.*s\n", (int32_t)bp->nb_validend, (char *)bp->nb_data);
+ }
+
+ if (!error && (bp->nb_validend > 0)) {
+ int validend32 = bp->nb_validend > INT_MAX ? INT_MAX : (int)bp->nb_validend;
+ error = uiomove(bp->nb_data, validend32, uio);
+ if (!error && bp->nb_validend > validend32) {
+ error = uiomove(bp->nb_data + validend32, (int)(bp->nb_validend - validend32), uio);
+ }
+ }
+ FSDBG(531, np, bp->nb_validend, 0, error);
+ nfs_buf_release(bp, 1);
+ return error;
+}
+
+/*
+ * Do a readlink RPC.
+ */
+int
+nfs3_readlink_rpc(nfsnode_t np, char *buf, size_t *buflenp, vfs_context_t ctx)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ size_t len;
+ u_int64_t xid;
+ struct nfsm_chain nmreq, nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
+ }
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_get_32(error, &nmrep, len);
+ nfsmout_if(error);
+ if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
+ error = EBADRPC;
+ goto nfsmout;
+ }
+ if (len >= *buflenp) {
+ if (np->n_size && (np->n_size < *buflenp)) {
+ len = (size_t)np->n_size;
+ } else {
+ len = *buflenp - 1;
+ }
+ }
+ nfsm_chain_get_opaque(error, &nmrep, len, buf);
+ if (!error) {
+ *buflenp = len;
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS read RPC call
+ * Ditto above
+ */
+int
+nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers, eof = 0;
+ size_t nmrsize, len, retlen;
+ user_ssize_t tsiz;
+ off_t txoffset;
+ struct nfsreq *req;
+#if CONFIG_NFS4
+ uint32_t stategenid = 0, restart = 0;
+#endif
+ FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nmrsize = nmp->nm_rsize;
+
+ txoffset = uio_offset(uio);
+ tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return EFBIG;
+ }
+
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ while (tsiz > 0) {
+ len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
+ FSDBG(536, np, txoffset, len, 0);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ break;
+ }
+#if CONFIG_NFS4
+ if (nmp->nm_vers >= NFS_VER4) {
+ stategenid = nmp->nm_stategenid;
+ }
+#endif
+ error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+ if (!error) {
+ error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
+ }
+#if CONFIG_NFS4
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+ NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
+ nfs_need_recover(nmp, error);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ } else {
+ if (error == NFSERR_GRACE) {
+ tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+ }
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+ continue;
+ }
+ }
+ }
+#endif
+ if (error) {
+ break;
+ }
+ txoffset += retlen;
+ tsiz -= retlen;
+ if (nfsvers != NFS_VER2) {
+ if (eof || (retlen == 0)) {
+ tsiz = 0;
+ }
+ } else if (retlen < len) {
+ tsiz = 0;
+ }
+ }
+
+ NFS_ZFREE(nfs_req_zone, req);
+ FSDBG_BOT(536, np, eof, uio_resid(uio), error);
+ return error;
+}
+
+int
+nfs3_read_rpc_async(
+ nfsnode_t np,
+ off_t offset,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, offset);
+ nfsm_chain_add_32(error, &nmreq, len);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, offset);
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return error;
+}
+
+int
+nfs3_read_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ uio_t uio,
+ size_t *lenp,
+ int *eofp)
+{
+ int error = 0, lockerror, nfsvers, status = 0, eof = 0;
+ uint32_t retlen = 0;
+ uint64_t xid;
+ struct nfsmount *nmp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ nfs_request_async_cancel(req);
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) { /* async request restarted */
+ return error;
+ }
+
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
+ }
+ if (!error) {
+ error = status;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
+ nfsm_chain_get_32(error, &nmrep, eof);
+ } else {
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ }
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_get_32(error, &nmrep, retlen);
+ if ((nfsvers == NFS_VER2) && (retlen > *lenp)) {
+ error = EBADRPC;
+ }
+ nfsmout_if(error);
+ error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
+ if (eofp) {
+ if (nfsvers == NFS_VER3) {
+ if (!eof && !retlen) {
+ eof = 1;
+ }
+ } else if (retlen < *lenp) {
+ eof = 1;
+ }
+ *eofp = eof;
+ }
+ *lenp = MIN(retlen, *lenp);
+nfsmout:
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS write call
+ */
+int
+nfs_vnop_write(
+ struct vnop_write_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct uio *a_uio;
+ * int a_ioflag;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ uio_t uio = ap->a_uio;
+ vnode_t vp = ap->a_vp;
+ nfsnode_t np = VTONFS(vp);
+ int ioflag = ap->a_ioflag;
+ struct nfsbuf *bp;
+ struct nfsmount *nmp = VTONMP(vp);
+ daddr64_t lbn;
+ uint32_t biosize;
+ int error = 0;
+ off_t n, on;
+ int n32;
+ off_t boff, start, end;
+ uio_t auio;
+ char auio_buf[UIO_SIZEOF(1)];
+ thread_t thd;
+ kauth_cred_t cred;
+
+ FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
+
+ if (vnode_vtype(vp) != VREG) {
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
+ return EIO;
+ }
+
+ thd = vfs_context_thread(ctx);
+ cred = vfs_context_ucred(ctx);
+
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+
+ if ((error = nfs_node_lock(np))) {
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return error;
+ }
+ np->n_wrbusy++;
+
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+ if (np->n_flag & NNEEDINVALIDATE) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+ } else {
+ nfs_node_unlock(np);
+ }
+ if (error) {
+ goto out;
+ }
+
+ biosize = nmp->nm_biosize;
+
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ nfs_node_lock_force(np);
+ if (np->n_flag & NMODIFIED) {
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
+ goto out;
+ }
+ } else {
+ nfs_node_unlock(np);
+ }
+ if (ioflag & IO_APPEND) {
+ nfs_data_unlock(np);
+ /* nfs_getattr() will check changed and purge caches */
+ error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
+ /* we'll be extending the file, so take the data lock exclusive */
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
+ goto out;
+ }
+ uio_setoffset(uio, np->n_size);
+ }
+ }
+ if (uio_offset(uio) < 0) {
+ error = EINVAL;
+ FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
+ goto out;
+ }
+ if (uio_resid(uio) == 0) {
+ goto out;
+ }
+
+ if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
+ /*
+ * It looks like we'll be extending the file, so take the data lock exclusive.
+ */
+ nfs_data_unlock(np);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+
+ /*
+ * Also, if the write begins after the previous EOF buffer, make sure to zero
+ * and validate the new bytes in that buffer.
+ */
+ struct nfsbuf *eofbp = NULL;
+ daddr64_t eofbn = np->n_size / biosize;
+ uint32_t eofoff = np->n_size % biosize;
+ lbn = uio_offset(uio) / biosize;
+
+ if (eofoff && (eofbn < lbn)) {
+ if ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE | NBLK_ONLYVALID, &eofbp))) {
+ goto out;
+ }
+ np->n_size += (biosize - eofoff);
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ if (eofbp) {
+ /*
+ * For the old last page, don't zero bytes if there
+ * are invalid bytes in that page (i.e. the page isn't
+ * currently valid).
+ * For pages after the old last page, zero them and
+ * mark them as valid.
+ */
+ char *d;
+ int i;
+ if (ioflag & IO_NOCACHE) {
+ SET(eofbp->nb_flags, NB_NOCACHE);
+ }
+ NFS_BUF_MAP(eofbp);
+ FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
+ d = eofbp->nb_data;
+ i = eofoff / PAGE_SIZE;
+ while (eofoff < biosize) {
+ int poff = eofoff & PAGE_MASK;
+ if (!poff || NBPGVALID(eofbp, i)) {
+ bzero(d + eofoff, PAGE_SIZE - poff);
+ NBPGVALID_SET(eofbp, i);
+ }
+ eofoff += PAGE_SIZE - poff;
+ i++;
+ }
+ nfs_buf_release(eofbp, 1);
+ }
+ }
+ }
+
+ do {
+ OSAddAtomic64(1, &nfsstats.biocache_writes);
+ lbn = uio_offset(uio) / biosize;
+ on = uio_offset(uio) % biosize;
+ n = biosize - on;
+ if (uio_resid(uio) < n) {
+ n = uio_resid(uio);
+ }
+again:
+ /*
+ * Get a cache block for writing. The range to be written is
+ * (off..off+n) within the block. We ensure that the block
+ * either has no dirty region or that the given range is
+ * contiguous with the existing dirty region.
+ */
+ error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
+ if (error) {
+ goto out;
+ }
+ /* map the block because we know we're going to write to it */
+ NFS_BUF_MAP(bp);
+
+ if (ioflag & IO_NOCACHE) {
+ SET(bp->nb_flags, NB_NOCACHE);
+ }
+
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+
+ /*
+ * If there's already a dirty range AND dirty pages in this block we
+ * need to send a commit AND write the dirty pages before continuing.
+ *
+ * If there's already a dirty range OR dirty pages in this block
+ * and the new write range is not contiguous with the existing range,
+ * then force the buffer to be written out now.
+ * (We used to just extend the dirty range to cover the valid,
+ * but unwritten, data in between also. But writing ranges
+ * of data that weren't actually written by an application
+ * risks overwriting some other client's data with stale data
+ * that's just masquerading as new written data.)
+ */
+ if (bp->nb_dirtyend > 0) {
+ if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ goto again;
+ }
+ } else if (nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+ off_t firstpg = 0, lastpg = 0;
+ nfsbufpgs pagemask, pagemaskand;
+ /* calculate write range pagemask */
+ if (n > 0) {
+ firstpg = on / PAGE_SIZE;
+ lastpg = (on + n - 1) / PAGE_SIZE;
+ nfs_buf_pgs_set_pages_between(&pagemask, firstpg, lastpg + 1);
+ } else {
+ NBPGS_ERASE(&pagemask);
+ }
+ /* check if there are dirty pages outside the write range */
+ nfs_buf_pgs_bit_not(&pagemask);
+ nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &pagemaskand);
+ if (nfs_buf_pgs_is_set(&pagemaskand)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ goto again;
+ }
+ /* if the first or last pages are already dirty */
+ /* make sure that the dirty range encompasses those pages */
+ if (NBPGDIRTY(bp, firstpg) || NBPGDIRTY(bp, lastpg)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
+ bp->nb_dirtyoff = MIN(on, firstpg * PAGE_SIZE);
+ if (NBPGDIRTY(bp, lastpg)) {
+ bp->nb_dirtyend = (lastpg + 1) * PAGE_SIZE;
+ /* clip to EOF */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ } else {
+ bp->nb_dirtyend = on + n;
+ }
+ }
+ }
+
+ /*
+ * Are we extending the size of the file with this write?
+ * If so, update file size now that we have the block.
+ * If there was a partial buf at the old eof, validate
+ * and zero the new bytes.
+ */
+ if ((uio_offset(uio) + n) > (off_t)np->n_size) {
+ daddr64_t eofbn = np->n_size / biosize;
+ int neweofoff = (uio_offset(uio) + n) % biosize;
+
+ FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
+
+ /* if we're extending within the same last block */
+ /* and the block is flagged as being cached... */
+ if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
+ /* ...check that all pages in buffer are valid */
+ int endpg = ((neweofoff ? neweofoff : biosize) - 1) / PAGE_SIZE;
+ nfsbufpgs pagemask, pagemaskand;
+ /* pagemask only has to extend to last page being written to */
+ nfs_buf_pgs_get_page_mask(&pagemask, endpg + 1);
+ FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
+ nfs_buf_pgs_bit_and(&bp->nb_valid, &pagemask, &pagemaskand);
+ if (!NBPGS_IS_EQUAL(&pagemaskand, &pagemask)) {
+ /* zerofill any hole */
+ if (on > bp->nb_validend) {
+ for (off_t i = bp->nb_validend / PAGE_SIZE; i <= (on - 1) / PAGE_SIZE; i++) {
+ NBPGVALID_SET(bp, i);
+ }
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
+ NFS_BZERO((char *)bp->nb_data + bp->nb_validend, on - bp->nb_validend);
+ }
+ /* zerofill any trailing data in the last page */
+ if (neweofoff) {
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
+ bzero((char *)bp->nb_data + neweofoff,
+ PAGE_SIZE - (neweofoff & PAGE_MASK));
+ }
+ }
+ }
+ np->n_size = uio_offset(uio) + n;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ }
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ /*
+ * UBC doesn't handle partial pages, so we need to make sure
+ * that any pages left in the page cache are completely valid.
+ *
+ * Writes that are smaller than a block are delayed if they
+ * don't extend to the end of the block.
+ *
+ * If the block isn't (completely) cached, we may need to read
+ * in some parts of pages that aren't covered by the write.
+ * If the write offset (on) isn't page aligned, we'll need to
+ * read the start of the first page being written to. Likewise,
+ * if the offset of the end of the write (on+n) isn't page aligned,
+ * we'll need to read the end of the last page being written to.
+ *
+ * Notes:
+ * We don't want to read anything we're just going to write over.
+ * We don't want to read anything we're just going drop when the
+ * I/O is complete (i.e. don't do reads for NOCACHE requests).
+ * We don't want to issue multiple I/Os if we don't have to
+ * (because they're synchronous rpcs).
+ * We don't want to read anything we already have modified in the
+ * page cache.
+ */
+ if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+ off_t firstpgoff, lastpgoff, firstpg, lastpg, dirtypg;
+ start = end = -1;
+ firstpg = on / PAGE_SIZE;
+ firstpgoff = on & PAGE_MASK;
+ lastpg = (on + n - 1) / PAGE_SIZE;
+ lastpgoff = (on + n) & PAGE_MASK;
+ if (firstpgoff && !NBPGVALID(bp, firstpg)) {
+ /* need to read start of first page */
+ start = firstpg * PAGE_SIZE;
+ end = start + firstpgoff;
+ }
+ if (lastpgoff && !NBPGVALID(bp, lastpg)) {
+ /* need to read end of last page */
+ if (start < 0) {
+ start = (lastpg * PAGE_SIZE) + lastpgoff;
+ }
+ end = (lastpg + 1) * PAGE_SIZE;
+ }
+ if (ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /*
+ * For nocache writes, if there is any partial page at the
+ * start or end of the write range, then we do the write
+ * synchronously to make sure that we can drop the data
+ * from the cache as soon as the WRITE finishes. Normally,
+ * we would do an unstable write and not drop the data until
+ * it was committed. But doing that here would risk allowing
+ * invalid data to be read from the cache between the WRITE
+ * and the COMMIT.
+ * (NB_STABLE indicates that data writes should be FILESYNC)
+ */
+ if (end > start) {
+ SET(bp->nb_flags, NB_STABLE);
+ }
+ goto skipread;
+ }
+ if (end > start) {
+ /* need to read the data in range: start...end-1 */
+
+ /* first, check for dirty pages in between */
+ /* if there are, we'll have to do two reads because */
+ /* we don't want to overwrite the dirty pages. */
+ for (dirtypg = start / PAGE_SIZE; dirtypg <= (end - 1) / PAGE_SIZE; dirtypg++) {
+ if (NBPGDIRTY(bp, dirtypg)) {
+ break;
+ }
+ }
+
+ /* if start is at beginning of page, try */
+ /* to get any preceeding pages as well. */
+ if (!(start & PAGE_MASK)) {
+ /* stop at next dirty/valid page or start of block */
+ for (; start > 0; start -= PAGE_SIZE) {
+ if (NBPGVALID(bp, ((start - 1) / PAGE_SIZE))) {
+ break;
+ }
+ }
+ }
+
+ NFS_BUF_MAP(bp);
+ /* setup uio for read(s) */
+ boff = NBOFF(bp);
+ auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+ &auio_buf, sizeof(auio_buf));
+
+ if (dirtypg <= (end - 1) / PAGE_SIZE) {
+ /* there's a dirty page in the way, so just do two reads */
+ /* we'll read the preceding data here */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ NFS_UIO_ADDIOV(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
+ bp->nb_validoff = start;
+ }
+ if ((bp->nb_validend < 0) || (bp->nb_validend < on)) {
+ bp->nb_validend = on;
+ }
+ if ((off_t)np->n_size > boff + bp->nb_validend) {
+ bp->nb_validend = MIN(np->n_size - (boff + start), biosize);
+ }
+ /* validate any pages before the write offset */
+ for (; start < on / PAGE_SIZE; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ }
+ }
+ /* adjust start to read any trailing data */
+ start = on + n;
+ }
+
+ /* if end is at end of page, try to */
+ /* get any following pages as well. */
+ if (!(end & PAGE_MASK)) {
+ /* stop at next valid page or end of block */
+ for (; end < biosize; end += PAGE_SIZE) {
+ if (NBPGVALID(bp, end / PAGE_SIZE)) {
+ break;
+ }
+ }
+ }
+
+ if (((boff + start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ NFS_BZERO(bp->nb_data + start, end - start);
+ error = 0;
+ } else {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ NFS_UIO_ADDIOV(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
+ bp->nb_validoff = start;
+ }
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end)) {
+ bp->nb_validend = end;
+ }
+ if ((off_t)np->n_size > boff + bp->nb_validend) {
+ bp->nb_validend = MIN(np->n_size - (boff + start), biosize);
+ }
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_64(on); start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ }
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_64(on + n - 1); end -= PAGE_SIZE) {
+ NBPGVALID_SET(bp, (end - 1) / PAGE_SIZE);
+ }
+ }
+ /* Note: pages being written to will be validated when written */
+ }
+ }
+skipread:
+
+ if (ISSET(bp->nb_flags, NB_ERROR)) {
+ error = bp->nb_error;
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ nfs_node_lock_force(np);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+
+ NFS_BUF_MAP(bp);
+ if (n < 0) {
+ error = EINVAL;
+ } else {
+ n32 = n > INT_MAX ? INT_MAX : (int)n;
+ error = uiomove(bp->nb_data + on, n32, uio);
+ if (!error && n > n32) {
+ error = uiomove(bp->nb_data + on + n32, (int)(n - n32), uio);
+ }
+ }
+ if (error) {
+ SET(bp->nb_flags, NB_ERROR);
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ /* validate any pages written to */
+ start = on & ~PAGE_MASK;
+ for (; start < on + n; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ /*
+ * This may seem a little weird, but we don't actually set the
+ * dirty bits for writes. This is because we keep the dirty range
+ * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
+ * delayed writes, when we give the pages back to the VM we don't
+ * want to keep them marked dirty, because when we later write the
+ * buffer we won't be able to tell which pages were written dirty
+ * and which pages were mmapped and dirtied.
+ */
+ }
+ if (bp->nb_dirtyend > 0) {
+ bp->nb_dirtyoff = MIN(on, bp->nb_dirtyoff);
+ bp->nb_dirtyend = MAX((on + n), bp->nb_dirtyend);