+ if (nvattr != NULL) {
+ NVATTR_CLEANUP(nvap);
+ FREE(nvattr, M_TEMP);
+ } else if (!(flags & NGA_ACL)) {
+ /* make sure we don't return an ACL if it wasn't asked for */
+ NFS_BITMAP_CLR(nvap->nva_bitmap, NFS_FATTR_ACL);
+ if (nvap->nva_acl) {
+ kauth_acl_free(nvap->nva_acl);
+ nvap->nva_acl = NULL;
+ }
+ }
+ FSDBG_BOT(513, np->n_size, error, np->n_vattr.nva_size, np->n_flag);
+ return error;
+}
+
+
+/*
+ * NFS getattr call from vfs.
+ */
+
+/*
+ * The attributes we support over the wire.
+ * We also get fsid but the vfs layer gets it out of the mount
+ * structure after this calling us so there's no need to return it,
+ * and Finder expects to call getattrlist just looking for the FSID
+ * with out hanging on a non responsive server.
+ */
+#define NFS3_SUPPORTED_VATTRS \
+ (VNODE_ATTR_va_rdev | \
+ VNODE_ATTR_va_nlink | \
+ VNODE_ATTR_va_data_size | \
+ VNODE_ATTR_va_data_alloc | \
+ VNODE_ATTR_va_uid | \
+ VNODE_ATTR_va_gid | \
+ VNODE_ATTR_va_mode | \
+ VNODE_ATTR_va_modify_time | \
+ VNODE_ATTR_va_change_time | \
+ VNODE_ATTR_va_access_time | \
+ VNODE_ATTR_va_fileid | \
+ VNODE_ATTR_va_type)
+
+
+int
+nfs3_vnop_getattr(
+ struct vnop_getattr_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ int error;
+ nfsnode_t np;
+ uint64_t supported_attrs;
+ struct nfs_vattr *nva;
+ struct vnode_attr *vap = ap->a_vap;
+ struct nfsmount *nmp;
+ dev_t rdev;
+
+ nmp = VTONMP(ap->a_vp);
+
+ /*
+ * Lets don't go over the wire if we don't support any of the attributes.
+ * Just fall through at the VFS layer and let it cons up what it needs.
+ */
+ /* Return the io size no matter what, since we don't go over the wire for this */
+ VATTR_RETURN(vap, va_iosize, nfs_iosize);
+
+ supported_attrs = NFS3_SUPPORTED_VATTRS;
+
+ if ((vap->va_active & supported_attrs) == 0) {
+ return 0;
+ }
+
+ if (VATTR_IS_ACTIVE(ap->a_vap, va_name)) {
+ NFS_VNOP_DBG("Getting attrs for 0x%llx, vname is %s\n",
+ (uint64_t)VM_KERNEL_ADDRPERM(ap->a_vp),
+ ap->a_vp->v_name ? ap->a_vp->v_name : "empty");
+ }
+
+ /*
+ * We should not go over the wire if only fileid was requested and has ever been populated.
+ */
+ if ((vap->va_active & supported_attrs) == VNODE_ATTR_va_fileid) {
+ np = VTONFS(ap->a_vp);
+ if (np->n_attrstamp) {
+ VATTR_RETURN(vap, va_fileid, np->n_vattr.nva_fileid);
+ return 0;
+ }
+ }
+
+ MALLOC(nva, struct nfs_vattr *, sizeof(*nva), M_TEMP, M_WAITOK);
+ error = nfs_getattr(VTONFS(ap->a_vp), nva, ap->a_context, NGA_CACHED);
+ if (error) {
+ goto out;
+ }
+
+ /* copy nva to *a_vap */
+ VATTR_RETURN(vap, va_type, nva->nva_type);
+ VATTR_RETURN(vap, va_mode, nva->nva_mode);
+ rdev = makedev(nva->nva_rawdev.specdata1, nva->nva_rawdev.specdata2);
+ VATTR_RETURN(vap, va_rdev, rdev);
+ VATTR_RETURN(vap, va_uid, nva->nva_uid);
+ VATTR_RETURN(vap, va_gid, nva->nva_gid);
+ VATTR_RETURN(vap, va_nlink, nva->nva_nlink);
+ VATTR_RETURN(vap, va_fileid, nva->nva_fileid);
+ VATTR_RETURN(vap, va_data_size, nva->nva_size);
+ VATTR_RETURN(vap, va_data_alloc, nva->nva_bytes);
+ vap->va_access_time.tv_sec = nva->nva_timesec[NFSTIME_ACCESS];
+ vap->va_access_time.tv_nsec = nva->nva_timensec[NFSTIME_ACCESS];
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ vap->va_modify_time.tv_sec = nva->nva_timesec[NFSTIME_MODIFY];
+ vap->va_modify_time.tv_nsec = nva->nva_timensec[NFSTIME_MODIFY];
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ vap->va_change_time.tv_sec = nva->nva_timesec[NFSTIME_CHANGE];
+ vap->va_change_time.tv_nsec = nva->nva_timensec[NFSTIME_CHANGE];
+ VATTR_SET_SUPPORTED(vap, va_change_time);
+
+
+ // VATTR_RETURN(vap, va_encoding, 0xffff /* kTextEncodingUnknown */);
+out:
+ FREE(nva, M_TEMP);
+ return error;
+}
+
+/*
+ * NFS setattr call.
+ */
+int
+nfs_vnop_setattr(
+ struct vnop_setattr_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t vp = ap->a_vp;
+ nfsnode_t np = VTONFS(vp);
+ struct nfsmount *nmp;
+ struct vnode_attr *vap = ap->a_vap;
+ int error = 0;
+ int biosize, nfsvers, namedattrs;
+ u_quad_t origsize, vapsize;
+ struct nfs_dulookup *dul;
+ nfsnode_t dnp = NULL;
+ int dul_in_progress = 0;
+ vnode_t dvp = NULL;
+ const char *vname = NULL;
+#if CONFIG_NFS4
+ struct nfs_open_owner *noop = NULL;
+ struct nfs_open_file *nofp = NULL;
+#endif
+ nmp = VTONMP(vp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+ biosize = nmp->nm_biosize;
+
+ /* Disallow write attempts if the filesystem is mounted read-only. */
+ if (vnode_vfsisrdonly(vp)) {
+ return EROFS;
+ }
+
+ origsize = np->n_size;
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ switch (vnode_vtype(vp)) {
+ case VDIR:
+ return EISDIR;
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ if (!VATTR_IS_ACTIVE(vap, va_modify_time) &&
+ !VATTR_IS_ACTIVE(vap, va_access_time) &&
+ !VATTR_IS_ACTIVE(vap, va_mode) &&
+ !VATTR_IS_ACTIVE(vap, va_uid) &&
+ !VATTR_IS_ACTIVE(vap, va_gid)) {
+ return 0;
+ }
+ VATTR_CLEAR_ACTIVE(vap, va_data_size);
+ break;
+ default:
+ /*
+ * Disallow write attempts if the filesystem is
+ * mounted read-only.
+ */
+ if (vnode_vfsisrdonly(vp)) {
+ return EROFS;
+ }
+ FSDBG_TOP(512, np->n_size, vap->va_data_size,
+ np->n_vattr.nva_size, np->n_flag);
+ /* clear NNEEDINVALIDATE, if set */
+ if ((error = nfs_node_lock(np))) {
+ return error;
+ }
+ if (np->n_flag & NNEEDINVALIDATE) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ }
+ nfs_node_unlock(np);
+ /* flush everything */
+ error = nfs_vinvalbuf(vp, (vap->va_data_size ? V_SAVE : 0), ctx, 1);
+ if (error) {
+ NP(np, "nfs_setattr: nfs_vinvalbuf %d", error);
+ FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, -1);
+ return error;
+ }
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ /* setting file size requires having the file open for write access */
+ if (np->n_flag & NREVOKE) {
+ return EIO;
+ }
+ noop = nfs_open_owner_find(nmp, vfs_context_ucred(ctx), 1);
+ if (!noop) {
+ return ENOMEM;
+ }
+restart:
+ error = nfs_mount_state_in_use_start(nmp, vfs_context_thread(ctx));
+ if (error) {
+ return error;
+ }
+ if (np->n_flag & NREVOKE) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ return EIO;
+ }
+ error = nfs_open_file_find(np, noop, &nofp, 0, 0, 1);
+ if (!error && (nofp->nof_flags & NFS_OPEN_FILE_LOST)) {
+ error = EIO;
+ }
+ if (!error && (nofp->nof_flags & NFS_OPEN_FILE_REOPEN)) {
+ error = nfs4_reopen(nofp, vfs_context_thread(ctx));
+ nofp = NULL;
+ if (!error) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ goto restart;
+ }
+ }
+ if (!error) {
+ error = nfs_open_file_set_busy(nofp, vfs_context_thread(ctx));
+ }
+ if (error) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ nfs_open_owner_rele(noop);
+ return error;
+ }
+ if (!(nofp->nof_access & NFS_OPEN_SHARE_ACCESS_WRITE)) {
+ /* we don't have the file open for write access, so open it */
+ error = nfs4_open(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+ if (!error) {
+ nofp->nof_flags |= NFS_OPEN_FILE_SETATTR;
+ }
+ if (nfs_mount_state_error_should_restart(error)) {
+ nfs_open_file_clear_busy(nofp);
+ nofp = NULL;
+ nfs_mount_state_in_use_end(nmp, error);
+ goto restart;
+ }
+ }
+ }
+#endif
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (np->n_size > vap->va_data_size) { /* shrinking? */
+ daddr64_t obn, bn;
+ int mustwrite;
+ off_t neweofoff;
+ struct nfsbuf *bp;
+ nfsbufpgs pagemask;
+
+ obn = (np->n_size - 1) / biosize;
+ bn = vap->va_data_size / biosize;
+ for (; obn >= bn; obn--) {
+ if (!nfs_buf_is_incore(np, obn)) {
+ continue;
+ }
+ error = nfs_buf_get(np, obn, biosize, NULL, NBLK_READ, &bp);
+ if (error) {
+ continue;
+ }
+ if (obn != bn) {
+ FSDBG(512, bp, bp->nb_flags, 0, obn);
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 1);
+ continue;
+ }
+ mustwrite = 0;
+ neweofoff = vap->va_data_size - NBOFF(bp);
+ /* check for any dirty data before the new EOF */
+ if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
+ /* clip dirty range to EOF */
+ if (bp->nb_dirtyend > neweofoff) {
+ bp->nb_dirtyend = neweofoff;
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ if ((bp->nb_dirtyend > 0) && (bp->nb_dirtyoff < neweofoff)) {
+ mustwrite++;
+ }
+ }
+ nfs_buf_pgs_get_page_mask(&pagemask, round_page_64(neweofoff) / PAGE_SIZE);
+ nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &bp->nb_dirty);
+ if (nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+ mustwrite++;
+ }
+ if (!mustwrite) {
+ FSDBG(512, bp, bp->nb_flags, 0, obn);
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 1);
+ continue;
+ }
+ /* gotta write out dirty data before invalidating */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ /* (NB_NOCACHE indicates buffer should be discarded) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL | NB_ASYNC | NB_READ));
+ SET(bp->nb_flags, NB_STABLE | NB_NOCACHE);
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_t cred = vfs_context_ucred(ctx);
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+ error = nfs_buf_write(bp);
+ // Note: bp has been released
+ if (error) {
+ FSDBG(512, bp, 0xd00dee, 0xbad, error);
+ nfs_node_lock_force(np);
+ np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ /*
+ * There was a write error and we need to
+ * invalidate attrs and flush buffers in
+ * order to sync up with the server.
+ * (if this write was extending the file,
+ * we may no longer know the correct size)
+ */
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ error = 0;
+ }
+ }
+ }
+ if (vap->va_data_size != np->n_size) {
+ ubc_setsize(vp, (off_t)vap->va_data_size); /* XXX error? */
+ }
+ origsize = np->n_size;
+ np->n_size = np->n_vattr.nva_size = vap->va_data_size;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ nfs_node_unlock(np);
+ FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ }
+ } else if (VATTR_IS_ACTIVE(vap, va_modify_time) ||
+ VATTR_IS_ACTIVE(vap, va_access_time) ||
+ (vap->va_vaflags & VA_UTIMES_NULL)) {
+ if ((error = nfs_node_lock(np))) {
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ }
+#endif
+ return error;
+ }
+ if ((np->n_flag & NMODIFIED) && (vnode_vtype(vp) == VREG)) {
+ nfs_node_unlock(np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ if (error == EINTR) {
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ nfs_mount_state_in_use_end(nmp, 0);
+ }
+#endif
+ return error;
+ }
+ } else {
+ nfs_node_unlock(np);
+ }
+ }
+
+ MALLOC(dul, struct nfs_dulookup *, sizeof(*dul), M_TEMP, M_WAITOK);
+
+ if ((VATTR_IS_ACTIVE(vap, va_mode) || VATTR_IS_ACTIVE(vap, va_uid) || VATTR_IS_ACTIVE(vap, va_gid) ||
+ VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid)) &&
+ !(error = nfs_node_lock(np))) {
+ NACCESSINVALIDATE(np);
+ nfs_node_unlock(np);
+ if (!namedattrs) {
+ dvp = vnode_getparent(vp);
+ vname = vnode_getname(vp);
+ dnp = (dvp && vname) ? VTONFS(dvp) : NULL;
+ if (dnp) {
+ if (nfs_node_set_busy(dnp, vfs_context_thread(ctx))) {
+ vnode_put(dvp);
+ vnode_putname(vname);
+ } else {
+ nfs_dulookup_init(dul, dnp, vname, NFS_STRLEN_INT(vname), ctx);
+ nfs_dulookup_start(dul, dnp, ctx);
+ dul_in_progress = 1;
+ }
+ } else {
+ if (dvp) {
+ vnode_put(dvp);
+ }
+ if (vname) {
+ vnode_putname(vname);
+ }
+ }
+ }
+ }
+
+ if (!error) {
+ error = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
+ }
+
+ if (dul_in_progress) {
+ nfs_dulookup_finish(dul, dnp, ctx);
+ nfs_node_clear_busy(dnp);
+ vnode_put(dvp);
+ vnode_putname(vname);
+ }
+
+ FREE(dul, M_TEMP);
+ FSDBG_BOT(512, np->n_size, vap->va_data_size, np->n_vattr.nva_size, error);
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ if (error && (origsize != np->n_size) &&
+ ((nfsvers < NFS_VER4) || !nfs_mount_state_error_should_restart(error))) {
+ /* make every effort to resync file size w/ server... */
+ /* (don't bother if we'll be restarting the operation) */
+ int err; /* preserve "error" for return */
+ np->n_size = np->n_vattr.nva_size = origsize;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ nfs_node_unlock(np);
+ FSDBG(512, np, np->n_size, np->n_vattr.nva_size, 0xf00d0002);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
+ vapsize = vap->va_data_size;
+ vap->va_data_size = origsize;
+ err = nmp->nm_funcs->nf_setattr_rpc(np, vap, ctx);
+ if (err) {
+ NP(np, "nfs_vnop_setattr: nfs%d_setattr_rpc %d %d", nfsvers, error, err);
+ }
+ vap->va_data_size = vapsize;
+ }
+ nfs_node_lock_force(np);
+ /*
+ * The size was just set. If the size is already marked for update, don't
+ * trust the newsize (it may have been set while the setattr was in progress).
+ * Clear the update flag and make sure we fetch new attributes so we are sure
+ * we have the latest size.
+ */
+ if (ISSET(np->n_flag, NUPDATESIZE)) {
+ CLR(np->n_flag, NUPDATESIZE);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
+ } else {
+ nfs_node_unlock(np);
+ }
+ nfs_data_unlock(np);
+#if CONFIG_NFS4
+ if (nfsvers >= NFS_VER4) {
+ if (nofp) {
+ /* don't close our setattr open if we'll be restarting... */
+ if (!nfs_mount_state_error_should_restart(error) &&
+ (nofp->nof_flags & NFS_OPEN_FILE_SETATTR)) {
+ int err = nfs_close(np, nofp, NFS_OPEN_SHARE_ACCESS_WRITE, NFS_OPEN_SHARE_DENY_NONE, ctx);
+ if (err) {
+ NP(np, "nfs_vnop_setattr: close error: %d", err);
+ }
+ nofp->nof_flags &= ~NFS_OPEN_FILE_SETATTR;
+ }
+ nfs_open_file_clear_busy(nofp);
+ nofp = NULL;
+ }
+ if (nfs_mount_state_in_use_end(nmp, error)) {
+ goto restart;
+ }
+ nfs_open_owner_rele(noop);
+ }
+#endif
+ }
+ return error;
+}
+
+/*
+ * Do an NFS setattr RPC.
+ */
+int
+nfs3_setattr_rpc(
+ nfsnode_t np,
+ struct vnode_attr *vap,
+ vfs_context_t ctx)
+{
+ struct nfsmount *nmp = NFSTONMP(np);
+ int error = 0, lockerror = ENOENT, status = 0, wccpostattr = 0, nfsvers;
+ u_int64_t xid, nextxid;
+ struct nfsm_chain nmreq, nmrep;
+
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+
+
+ if (VATTR_IS_ACTIVE(vap, va_flags)
+ ) {
+ if (vap->va_flags) { /* we don't support setting flags */
+ if (vap->va_active & ~VNODE_ATTR_va_flags) {
+ return EINVAL; /* return EINVAL if other attributes also set */
+ } else {
+ return ENOTSUP; /* return ENOTSUP for chflags(2) */
+ }
+ }
+ /* no flags set, so we'll just ignore it */
+ if (!(vap->va_active & ~VNODE_ATTR_va_flags)) {
+ return 0; /* no (other) attributes to set, so nothing to do */
+ }
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ if (VATTR_IS_ACTIVE(vap, va_mode)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_mode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_uid)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_uid);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_gid)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_32(error, &nmreq, vap->va_gid);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_data_size)) {
+ nfsm_chain_add_32(error, &nmreq, TRUE);
+ nfsm_chain_add_64(error, &nmreq, vap->va_data_size);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ }
+ if (vap->va_vaflags & VA_UTIMES_NULL) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_SERVER);
+ } else {
+ if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_nsec);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_SET_TO_CLIENT);
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_nsec);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_TIME_DONT_CHANGE);
+ }
+ }
+ nfsm_chain_add_32(error, &nmreq, FALSE);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_mode) ?
+ vtonfsv2_mode(vnode_vtype(NFSTOV(np)), vap->va_mode) : -1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_uid) ?
+ vap->va_uid : (uint32_t)-1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_gid) ?
+ vap->va_gid : (uint32_t)-1);
+ nfsm_chain_add_32(error, &nmreq, VATTR_IS_ACTIVE(vap, va_data_size) ?
+ vap->va_data_size : (uint32_t)-1);
+ if (VATTR_IS_ACTIVE(vap, va_access_time)) {
+ nfsm_chain_add_32(error, &nmreq, vap->va_access_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, (vap->va_access_time.tv_nsec != -1) ?
+ ((uint32_t)vap->va_access_time.tv_nsec / 1000) : 0xffffffff);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, -1);
+ nfsm_chain_add_32(error, &nmreq, -1);
+ }
+ if (VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ nfsm_chain_add_32(error, &nmreq, vap->va_modify_time.tv_sec);
+ nfsm_chain_add_32(error, &nmreq, (vap->va_modify_time.tv_nsec != -1) ?
+ ((uint32_t)vap->va_modify_time.tv_nsec / 1000) : 0xffffffff);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, -1);
+ nfsm_chain_add_32(error, &nmreq, -1);
+ }
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request(np, NULL, &nmreq, NFSPROC_SETATTR, ctx, NULL, &nmrep, &xid, &status);
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ nfsmout_if(error);
+ /* if file hadn't changed, update cached mtime */
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+ }
+ /* if directory hadn't changed, update namecache mtime */
+ if ((vnode_vtype(NFSTOV(np)) == VDIR) &&
+ nfstimespeccmp(&np->n_ncmtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE_NC(nfsvers, np, &np->n_vattr);
+ }
+ if (!wccpostattr) {
+ NATTRINVALIDATE(np);
+ }
+ error = status;
+ } else {
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ }
+ /*
+ * We just changed the attributes and we want to make sure that we
+ * see the latest attributes. Get the next XID. If it's not the
+ * next XID after the SETATTR XID, then it's possible that another
+ * RPC was in flight at the same time and it might put stale attributes
+ * in the cache. In that case, we invalidate the attributes and set
+ * the attribute cache XID to guarantee that newer attributes will
+ * get loaded next.
+ */
+ nextxid = 0;
+ nfs_get_xid(&nextxid);
+ if (nextxid != (xid + 1)) {
+ np->n_xid = nextxid;
+ NATTRINVALIDATE(np);
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the RPC
+ */
+int
+nfs_vnop_lookup(
+ struct vnop_lookup_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_dvp;
+ * vnode_t *a_vpp;
+ * struct componentname *a_cnp;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ struct componentname *cnp = ap->a_cnp;
+ vnode_t dvp = ap->a_dvp;
+ vnode_t *vpp = ap->a_vpp;
+ int flags = cnp->cn_flags;
+ vnode_t newvp;
+ nfsnode_t dnp, np;
+ struct nfsmount *nmp;
+ mount_t mp;
+ int nfsvers, error, busyerror = ENOENT, isdot, isdotdot, negnamecache;
+ u_int64_t xid = 0;
+ struct nfs_vattr *nvattr;
+ int ngflags, skipdu = 0;
+ struct vnop_access_args naa;
+ fhandle_t *fh;
+ struct nfsreq *req;
+
+ *vpp = NULLVP;
+
+ dnp = VTONFS(dvp);
+
+ fh = zalloc(nfs_fhandle_zone);
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);
+ NVATTR_INIT(nvattr);
+
+ mp = vnode_mount(dvp);
+ nmp = VFSTONFS(mp);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ goto error_return;
+ }
+ nfsvers = nmp->nm_vers;
+ negnamecache = !NMFLAG(nmp, NONEGNAMECACHE);
+
+ if ((error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx)))) {
+ goto error_return;
+ }
+ /* nfs_getattr() will check changed and purge caches */
+ if ((error = nfs_getattr(dnp, NULL, ctx, NGA_CACHED))) {
+ goto error_return;
+ }
+
+ error = cache_lookup(dvp, vpp, cnp);
+ switch (error) {
+ case ENOENT:
+ /* negative cache entry */
+ goto error_return;
+ case 0:
+ /* cache miss */
+ if ((nfsvers > NFS_VER2) && NMFLAG(nmp, RDIRPLUS)) {
+ /* if rdirplus, try dir buf cache lookup */
+ error = nfs_dir_buf_cache_lookup(dnp, &np, cnp, ctx, 0, &skipdu);
+ if (!error && np) {
+ /* dir buf cache hit */
+ *vpp = NFSTOV(np);
+ error = -1;
+ } else if (skipdu) {
+ /* Skip lookup for du files */
+ error = ENOENT;
+ goto error_return;
+ }
+ }
+ if (error != -1) { /* cache miss */
+ break;
+ }
+ OS_FALLTHROUGH;
+ case -1:
+ /* cache hit, not really an error */
+ OSAddAtomic64(1, &nfsstats.lookupcache_hits);
+
+ nfs_node_clear_busy(dnp);
+ busyerror = ENOENT;
+
+ /* check for directory access */
+ naa.a_desc = &vnop_access_desc;
+ naa.a_vp = dvp;
+ naa.a_action = KAUTH_VNODE_SEARCH;
+ naa.a_context = ctx;
+
+ /* compute actual success/failure based on accessibility */
+ error = nfs_vnop_access(&naa);
+ OS_FALLTHROUGH;
+ default:
+ /* unexpected error from cache_lookup */
+ goto error_return;
+ }
+
+ /* skip lookup, if we know who we are: "." or ".." */
+ isdot = isdotdot = 0;
+ if (cnp->cn_nameptr[0] == '.') {
+ if (cnp->cn_namelen == 1) {
+ isdot = 1;
+ }
+ if ((cnp->cn_namelen == 2) && (cnp->cn_nameptr[1] == '.')) {
+ isdotdot = 1;
+ }
+ }
+ if (isdotdot || isdot) {
+ fh->fh_len = 0;
+ goto found;
+ }
+#if CONFIG_NFS4
+ if ((nfsvers >= NFS_VER4) && (dnp->n_vattr.nva_flags & NFS_FFLAG_TRIGGER)) {
+ /* we should never be looking things up in a trigger directory, return nothing */
+ error = ENOENT;
+ goto error_return;
+ }
+#endif
+
+ /* do we know this name is too long? */
+ nmp = VTONMP(dvp);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ goto error_return;
+ }
+ if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXNAME) &&
+ (cnp->cn_namelen > nmp->nm_fsattr.nfsa_maxname)) {
+ error = ENAMETOOLONG;
+ goto error_return;
+ }
+
+ error = 0;
+ newvp = NULLVP;
+
+ OSAddAtomic64(1, &nfsstats.lookupcache_misses);
+
+ error = nmp->nm_funcs->nf_lookup_rpc_async(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &req);
+ nfsmout_if(error);
+ error = nmp->nm_funcs->nf_lookup_rpc_async_finish(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, req, &xid, fh, nvattr);
+ nfsmout_if(error);
+
+ /* is the file handle the same as this directory's file handle? */
+ isdot = NFS_CMPFH(dnp, fh->fh_data, fh->fh_len);
+
+found:
+ if (flags & ISLASTCN) {
+ switch (cnp->cn_nameiop) {
+ case DELETE:
+ cnp->cn_flags &= ~MAKEENTRY;
+ break;
+ case RENAME:
+ cnp->cn_flags &= ~MAKEENTRY;
+ if (isdot) {
+ error = EISDIR;
+ goto error_return;
+ }
+ break;
+ }
+ }
+
+ if (isdotdot) {
+ newvp = vnode_getparent(dvp);
+ if (!newvp) {
+ error = ENOENT;
+ goto error_return;
+ }
+ } else if (isdot) {
+ error = vnode_get(dvp);
+ if (error) {
+ goto error_return;
+ }
+ newvp = dvp;
+ nfs_node_lock_force(dnp);
+ if (fh->fh_len && (dnp->n_xid <= xid)) {
+ nfs_loadattrcache(dnp, nvattr, &xid, 0);
+ }
+ nfs_node_unlock(dnp);
+ } else {
+ ngflags = (cnp->cn_flags & MAKEENTRY) ? NG_MAKEENTRY : 0;
+ error = nfs_nget(mp, dnp, cnp, fh->fh_data, fh->fh_len, nvattr, &xid, req->r_auth, ngflags, &np);
+ if (error) {
+ goto error_return;
+ }
+ newvp = NFSTOV(np);
+ nfs_node_unlock(np);
+ }
+ *vpp = newvp;
+
+nfsmout:
+ if (error) {
+ if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
+ (flags & ISLASTCN) && (error == ENOENT)) {
+ if (vnode_mount(dvp) && vnode_vfsisrdonly(dvp)) {
+ error = EROFS;
+ } else {
+ error = EJUSTRETURN;
+ }
+ }
+ }
+ if ((error == ENOENT) && (cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != CREATE) && negnamecache) {
+ /* add a negative entry in the name cache */
+ nfs_node_lock_force(dnp);
+ cache_enter(dvp, NULL, cnp);
+ dnp->n_flag |= NNEGNCENTRIES;
+ nfs_node_unlock(dnp);
+ }
+error_return:
+ NVATTR_CLEANUP(nvattr);
+ NFS_ZFREE(nfs_fhandle_zone, fh);
+ NFS_ZFREE(nfs_req_zone, req);
+ FREE(nvattr, M_TEMP);
+ if (!busyerror) {
+ nfs_node_clear_busy(dnp);
+ }
+ if (error && *vpp) {
+ vnode_put(*vpp);
+ *vpp = NULLVP;
+ }
+ return error;
+}
+
+int nfs_readlink_nocache = DEFAULT_READLINK_NOCACHE;
+
+/*
+ * NFS readlink call
+ */
+int
+nfs_vnop_readlink(
+ struct vnop_readlink_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct uio *a_uio;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ nfsnode_t np = VTONFS(ap->a_vp);
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ size_t buflen;
+ uio_t uio = ap->a_uio;
+ struct nfsbuf *bp = NULL;
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 0 };
+ long timeo = 0;
+
+ if (vnode_vtype(ap->a_vp) != VLNK) {
+ return EPERM;
+ }
+
+ if (uio_resid(uio) == 0) {
+ return 0;
+ }
+ if (uio_offset(uio) < 0) {
+ return EINVAL;
+ }
+
+ nmp = VTONMP(ap->a_vp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+
+ /* nfs_getattr() will check changed and purge caches */
+ if ((error = nfs_getattr(np, NULL, ctx, nfs_readlink_nocache ? NGA_UNCACHED : NGA_CACHED))) {
+ FSDBG(531, np, 0xd1e0001, 0, error);
+ return error;
+ }
+
+ if (nfs_readlink_nocache) {
+ timeo = nfs_attrcachetimeout(np);
+ nanouptime(&ts);
+ }
+
+retry:
+ OSAddAtomic64(1, &nfsstats.biocache_readlinks);
+ error = nfs_buf_get(np, 0, NFS_MAXPATHLEN, vfs_context_thread(ctx), NBLK_META, &bp);
+ if (error) {
+ FSDBG(531, np, 0xd1e0002, 0, error);
+ return error;
+ }
+
+ if (nfs_readlink_nocache) {
+ NFS_VNOP_DBG("timeo = %ld ts.tv_sec = %ld need refresh = %d cached = %d\n", timeo, ts.tv_sec,
+ (np->n_rltim.tv_sec + timeo) < ts.tv_sec || nfs_readlink_nocache > 1,
+ ISSET(bp->nb_flags, NB_CACHE) == NB_CACHE);
+ /* n_rltim is synchronized by the associated nfs buf */
+ if (ISSET(bp->nb_flags, NB_CACHE) && ((nfs_readlink_nocache > 1) || ((np->n_rltim.tv_sec + timeo) < ts.tv_sec))) {
+ SET(bp->nb_flags, NB_INVAL);
+ nfs_buf_release(bp, 0);
+ goto retry;
+ }
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE)) {
+readagain:
+ OSAddAtomic64(1, &nfsstats.readlink_bios);
+ buflen = bp->nb_bufsize;
+ error = nmp->nm_funcs->nf_readlink_rpc(np, bp->nb_data, &buflen, ctx);
+ if (error) {
+ if (error == ESTALE) {
+ NFS_VNOP_DBG("Stale FH from readlink rpc\n");
+ error = nfs_refresh_fh(np, ctx);
+ if (error == 0) {
+ goto readagain;
+ }
+ }
+ SET(bp->nb_flags, NB_ERROR);
+ bp->nb_error = error;
+ NFS_VNOP_DBG("readlink failed %d\n", error);
+ } else {
+ bp->nb_validoff = 0;
+ bp->nb_validend = buflen;
+ np->n_rltim = ts;
+ NFS_VNOP_DBG("readlink of %.*s\n", (int32_t)bp->nb_validend, (char *)bp->nb_data);
+ }
+ } else {
+ NFS_VNOP_DBG("got cached link of %.*s\n", (int32_t)bp->nb_validend, (char *)bp->nb_data);
+ }
+
+ if (!error && (bp->nb_validend > 0)) {
+ int validend32 = bp->nb_validend > INT_MAX ? INT_MAX : (int)bp->nb_validend;
+ error = uiomove(bp->nb_data, validend32, uio);
+ if (!error && bp->nb_validend > validend32) {
+ error = uiomove(bp->nb_data + validend32, (int)(bp->nb_validend - validend32), uio);
+ }
+ }
+ FSDBG(531, np, bp->nb_validend, 0, error);
+ nfs_buf_release(bp, 1);
+ return error;
+}
+
+/*
+ * Do a readlink RPC.
+ */
+int
+nfs3_readlink_rpc(nfsnode_t np, char *buf, size_t *buflenp, vfs_context_t ctx)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ size_t len;
+ u_int64_t xid;
+ struct nfsm_chain nmreq, nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request(np, NULL, &nmreq, NFSPROC_READLINK, ctx, NULL, &nmrep, &xid, &status);
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
+ }
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_get_32(error, &nmrep, len);
+ nfsmout_if(error);
+ if ((nfsvers == NFS_VER2) && (len > *buflenp)) {
+ error = EBADRPC;
+ goto nfsmout;
+ }
+ if (len >= *buflenp) {
+ if (np->n_size && (np->n_size < *buflenp)) {
+ len = (size_t)np->n_size;
+ } else {
+ len = *buflenp - 1;
+ }
+ }
+ nfsm_chain_get_opaque(error, &nmrep, len, buf);
+ if (!error) {
+ *buflenp = len;
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS read RPC call
+ * Ditto above
+ */
+int
+nfs_read_rpc(nfsnode_t np, uio_t uio, vfs_context_t ctx)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers, eof = 0;
+ size_t nmrsize, len, retlen;
+ user_ssize_t tsiz;
+ off_t txoffset;
+ struct nfsreq *req;
+#if CONFIG_NFS4
+ uint32_t stategenid = 0, restart = 0;
+#endif
+ FSDBG_TOP(536, np, uio_offset(uio), uio_resid(uio), 0);
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nmrsize = nmp->nm_rsize;
+
+ txoffset = uio_offset(uio);
+ tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(txoffset + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(536, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return EFBIG;
+ }
+
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ while (tsiz > 0) {
+ len = retlen = (tsiz > (user_ssize_t)nmrsize) ? nmrsize : (size_t)tsiz;
+ FSDBG(536, np, txoffset, len, 0);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ break;
+ }
+#if CONFIG_NFS4
+ if (nmp->nm_vers >= NFS_VER4) {
+ stategenid = nmp->nm_stategenid;
+ }
+#endif
+ error = nmp->nm_funcs->nf_read_rpc_async(np, txoffset, len,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, &req);
+ if (!error) {
+ error = nmp->nm_funcs->nf_read_rpc_async_finish(np, req, uio, &retlen, &eof);
+ }
+#if CONFIG_NFS4
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+ NP(np, "nfs_read_rpc: error %d, initiating recovery", error);
+ nfs_need_recover(nmp, error);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ } else {
+ if (error == NFSERR_GRACE) {
+ tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+ }
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+ continue;
+ }
+ }
+ }
+#endif
+ if (error) {
+ break;
+ }
+ txoffset += retlen;
+ tsiz -= retlen;
+ if (nfsvers != NFS_VER2) {
+ if (eof || (retlen == 0)) {
+ tsiz = 0;
+ }
+ } else if (retlen < len) {
+ tsiz = 0;
+ }
+ }
+
+ NFS_ZFREE(nfs_req_zone, req);
+ FSDBG_BOT(536, np, eof, uio_resid(uio), error);
+ return error;
+}
+
+int
+nfs3_read_rpc_async(
+ nfsnode_t np,
+ off_t offset,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq, NFSX_FH(nfsvers) + 3 * NFSX_UNSIGNED);
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, offset);
+ nfsm_chain_add_32(error, &nmreq, len);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, offset);
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_READ, thd, cred, NULL, 0, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return error;
+}
+
+int
+nfs3_read_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ uio_t uio,
+ size_t *lenp,
+ int *eofp)
+{
+ int error = 0, lockerror, nfsvers, status = 0, eof = 0;
+ uint32_t retlen = 0;
+ uint64_t xid;
+ struct nfsmount *nmp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ nfs_request_async_cancel(req);
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) { /* async request restarted */
+ return error;
+ }
+
+ if ((lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_postop_attr_update(error, &nmrep, np, &xid);
+ }
+ if (!error) {
+ error = status;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_adv(error, &nmrep, NFSX_UNSIGNED);
+ nfsm_chain_get_32(error, &nmrep, eof);
+ } else {
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ }
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_get_32(error, &nmrep, retlen);
+ if ((nfsvers == NFS_VER2) && (retlen > *lenp)) {
+ error = EBADRPC;
+ }
+ nfsmout_if(error);
+ error = nfsm_chain_get_uio(&nmrep, MIN(retlen, *lenp), uio);
+ if (eofp) {
+ if (nfsvers == NFS_VER3) {
+ if (!eof && !retlen) {
+ eof = 1;
+ }
+ } else if (retlen < *lenp) {
+ eof = 1;
+ }
+ *eofp = eof;
+ }
+ *lenp = MIN(retlen, *lenp);
+nfsmout:
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS write call
+ */
+int
+nfs_vnop_write(
+ struct vnop_write_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_vp;
+ * struct uio *a_uio;
+ * int a_ioflag;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ uio_t uio = ap->a_uio;
+ vnode_t vp = ap->a_vp;
+ nfsnode_t np = VTONFS(vp);
+ int ioflag = ap->a_ioflag;
+ struct nfsbuf *bp;
+ struct nfsmount *nmp = VTONMP(vp);
+ daddr64_t lbn;
+ uint32_t biosize;
+ int error = 0;
+ off_t n, on;
+ int n32;
+ off_t boff, start, end;
+ uio_t auio;
+ char auio_buf[UIO_SIZEOF(1)];
+ thread_t thd;
+ kauth_cred_t cred;
+
+ FSDBG_TOP(515, np, uio_offset(uio), uio_resid(uio), ioflag);
+
+ if (vnode_vtype(vp) != VREG) {
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), EIO);
+ return EIO;
+ }
+
+ thd = vfs_context_thread(ctx);
+ cred = vfs_context_ucred(ctx);
+
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+
+ if ((error = nfs_node_lock(np))) {
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return error;
+ }
+ np->n_wrbusy++;
+
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+ if (np->n_flag & NNEEDINVALIDATE) {
+ np->n_flag &= ~NNEEDINVALIDATE;
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ nfs_vinvalbuf(vp, V_SAVE | V_IGNORE_WRITEERR, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+ } else {
+ nfs_node_unlock(np);
+ }
+ if (error) {
+ goto out;
+ }
+
+ biosize = nmp->nm_biosize;
+
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ nfs_node_lock_force(np);
+ if (np->n_flag & NMODIFIED) {
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ nfs_data_lock(np, NFS_DATA_LOCK_SHARED);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad01, error);
+ goto out;
+ }
+ } else {
+ nfs_node_unlock(np);
+ }
+ if (ioflag & IO_APPEND) {
+ nfs_data_unlock(np);
+ /* nfs_getattr() will check changed and purge caches */
+ error = nfs_getattr(np, NULL, ctx, NGA_UNCACHED);
+ /* we'll be extending the file, so take the data lock exclusive */
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
+ goto out;
+ }
+ uio_setoffset(uio, np->n_size);
+ }
+ }
+ if (uio_offset(uio) < 0) {
+ error = EINVAL;
+ FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
+ goto out;
+ }
+ if (uio_resid(uio) == 0) {
+ goto out;
+ }
+
+ if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
+ /*
+ * It looks like we'll be extending the file, so take the data lock exclusive.
+ */
+ nfs_data_unlock(np);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+
+ /*
+ * Also, if the write begins after the previous EOF buffer, make sure to zero
+ * and validate the new bytes in that buffer.
+ */
+ struct nfsbuf *eofbp = NULL;
+ daddr64_t eofbn = np->n_size / biosize;
+ uint32_t eofoff = np->n_size % biosize;
+ lbn = uio_offset(uio) / biosize;
+
+ if (eofoff && (eofbn < lbn)) {
+ if ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE | NBLK_ONLYVALID, &eofbp))) {
+ goto out;
+ }
+ np->n_size += (biosize - eofoff);
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ if (eofbp) {
+ /*
+ * For the old last page, don't zero bytes if there
+ * are invalid bytes in that page (i.e. the page isn't
+ * currently valid).
+ * For pages after the old last page, zero them and
+ * mark them as valid.
+ */
+ char *d;
+ int i;
+ if (ioflag & IO_NOCACHE) {
+ SET(eofbp->nb_flags, NB_NOCACHE);
+ }
+ NFS_BUF_MAP(eofbp);
+ FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
+ d = eofbp->nb_data;
+ i = eofoff / PAGE_SIZE;
+ while (eofoff < biosize) {
+ int poff = eofoff & PAGE_MASK;
+ if (!poff || NBPGVALID(eofbp, i)) {
+ bzero(d + eofoff, PAGE_SIZE - poff);
+ NBPGVALID_SET(eofbp, i);
+ }
+ eofoff += PAGE_SIZE - poff;
+ i++;
+ }
+ nfs_buf_release(eofbp, 1);
+ }
+ }
+ }
+
+ do {
+ OSAddAtomic64(1, &nfsstats.biocache_writes);
+ lbn = uio_offset(uio) / biosize;
+ on = uio_offset(uio) % biosize;
+ n = biosize - on;
+ if (uio_resid(uio) < n) {
+ n = uio_resid(uio);
+ }
+again:
+ /*
+ * Get a cache block for writing. The range to be written is
+ * (off..off+n) within the block. We ensure that the block
+ * either has no dirty region or that the given range is
+ * contiguous with the existing dirty region.
+ */
+ error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
+ if (error) {
+ goto out;
+ }
+ /* map the block because we know we're going to write to it */
+ NFS_BUF_MAP(bp);
+
+ if (ioflag & IO_NOCACHE) {
+ SET(bp->nb_flags, NB_NOCACHE);
+ }
+
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+
+ /*
+ * If there's already a dirty range AND dirty pages in this block we
+ * need to send a commit AND write the dirty pages before continuing.
+ *
+ * If there's already a dirty range OR dirty pages in this block
+ * and the new write range is not contiguous with the existing range,
+ * then force the buffer to be written out now.
+ * (We used to just extend the dirty range to cover the valid,
+ * but unwritten, data in between also. But writing ranges
+ * of data that weren't actually written by an application
+ * risks overwriting some other client's data with stale data
+ * that's just masquerading as new written data.)
+ */
+ if (bp->nb_dirtyend > 0) {
+ if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ goto again;
+ }
+ } else if (nfs_buf_pgs_is_set(&bp->nb_dirty)) {
+ off_t firstpg = 0, lastpg = 0;
+ nfsbufpgs pagemask, pagemaskand;
+ /* calculate write range pagemask */
+ if (n > 0) {
+ firstpg = on / PAGE_SIZE;
+ lastpg = (on + n - 1) / PAGE_SIZE;
+ nfs_buf_pgs_set_pages_between(&pagemask, firstpg, lastpg + 1);
+ } else {
+ NBPGS_ERASE(&pagemask);
+ }
+ /* check if there are dirty pages outside the write range */
+ nfs_buf_pgs_bit_not(&pagemask);
+ nfs_buf_pgs_bit_and(&bp->nb_dirty, &pagemask, &pagemaskand);
+ if (nfs_buf_pgs_is_set(&pagemaskand)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ goto again;
+ }
+ /* if the first or last pages are already dirty */
+ /* make sure that the dirty range encompasses those pages */
+ if (NBPGDIRTY(bp, firstpg) || NBPGDIRTY(bp, lastpg)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
+ bp->nb_dirtyoff = MIN(on, firstpg * PAGE_SIZE);
+ if (NBPGDIRTY(bp, lastpg)) {
+ bp->nb_dirtyend = (lastpg + 1) * PAGE_SIZE;
+ /* clip to EOF */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ } else {
+ bp->nb_dirtyend = on + n;
+ }
+ }
+ }
+
+ /*
+ * Are we extending the size of the file with this write?
+ * If so, update file size now that we have the block.
+ * If there was a partial buf at the old eof, validate
+ * and zero the new bytes.
+ */
+ if ((uio_offset(uio) + n) > (off_t)np->n_size) {
+ daddr64_t eofbn = np->n_size / biosize;
+ int neweofoff = (uio_offset(uio) + n) % biosize;
+
+ FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
+
+ /* if we're extending within the same last block */
+ /* and the block is flagged as being cached... */
+ if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
+ /* ...check that all pages in buffer are valid */
+ int endpg = ((neweofoff ? neweofoff : biosize) - 1) / PAGE_SIZE;
+ nfsbufpgs pagemask, pagemaskand;
+ /* pagemask only has to extend to last page being written to */
+ nfs_buf_pgs_get_page_mask(&pagemask, endpg + 1);
+ FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
+ nfs_buf_pgs_bit_and(&bp->nb_valid, &pagemask, &pagemaskand);
+ if (!NBPGS_IS_EQUAL(&pagemaskand, &pagemask)) {
+ /* zerofill any hole */
+ if (on > bp->nb_validend) {
+ for (off_t i = bp->nb_validend / PAGE_SIZE; i <= (on - 1) / PAGE_SIZE; i++) {
+ NBPGVALID_SET(bp, i);
+ }
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
+ NFS_BZERO((char *)bp->nb_data + bp->nb_validend, on - bp->nb_validend);
+ }
+ /* zerofill any trailing data in the last page */
+ if (neweofoff) {
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
+ bzero((char *)bp->nb_data + neweofoff,
+ PAGE_SIZE - (neweofoff & PAGE_MASK));
+ }
+ }
+ }
+ np->n_size = uio_offset(uio) + n;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ }
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend) {
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ }
+ /*
+ * UBC doesn't handle partial pages, so we need to make sure
+ * that any pages left in the page cache are completely valid.
+ *
+ * Writes that are smaller than a block are delayed if they
+ * don't extend to the end of the block.
+ *
+ * If the block isn't (completely) cached, we may need to read
+ * in some parts of pages that aren't covered by the write.
+ * If the write offset (on) isn't page aligned, we'll need to
+ * read the start of the first page being written to. Likewise,
+ * if the offset of the end of the write (on+n) isn't page aligned,
+ * we'll need to read the end of the last page being written to.
+ *
+ * Notes:
+ * We don't want to read anything we're just going to write over.
+ * We don't want to read anything we're just going drop when the
+ * I/O is complete (i.e. don't do reads for NOCACHE requests).
+ * We don't want to issue multiple I/Os if we don't have to
+ * (because they're synchronous rpcs).
+ * We don't want to read anything we already have modified in the
+ * page cache.
+ */
+ if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+ off_t firstpgoff, lastpgoff, firstpg, lastpg, dirtypg;
+ start = end = -1;
+ firstpg = on / PAGE_SIZE;
+ firstpgoff = on & PAGE_MASK;
+ lastpg = (on + n - 1) / PAGE_SIZE;
+ lastpgoff = (on + n) & PAGE_MASK;
+ if (firstpgoff && !NBPGVALID(bp, firstpg)) {
+ /* need to read start of first page */
+ start = firstpg * PAGE_SIZE;
+ end = start + firstpgoff;
+ }
+ if (lastpgoff && !NBPGVALID(bp, lastpg)) {
+ /* need to read end of last page */
+ if (start < 0) {
+ start = (lastpg * PAGE_SIZE) + lastpgoff;
+ }
+ end = (lastpg + 1) * PAGE_SIZE;
+ }
+ if (ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /*
+ * For nocache writes, if there is any partial page at the
+ * start or end of the write range, then we do the write
+ * synchronously to make sure that we can drop the data
+ * from the cache as soon as the WRITE finishes. Normally,
+ * we would do an unstable write and not drop the data until
+ * it was committed. But doing that here would risk allowing
+ * invalid data to be read from the cache between the WRITE
+ * and the COMMIT.
+ * (NB_STABLE indicates that data writes should be FILESYNC)
+ */
+ if (end > start) {
+ SET(bp->nb_flags, NB_STABLE);
+ }
+ goto skipread;
+ }
+ if (end > start) {
+ /* need to read the data in range: start...end-1 */
+
+ /* first, check for dirty pages in between */
+ /* if there are, we'll have to do two reads because */
+ /* we don't want to overwrite the dirty pages. */
+ for (dirtypg = start / PAGE_SIZE; dirtypg <= (end - 1) / PAGE_SIZE; dirtypg++) {
+ if (NBPGDIRTY(bp, dirtypg)) {
+ break;
+ }
+ }
+
+ /* if start is at beginning of page, try */
+ /* to get any preceeding pages as well. */
+ if (!(start & PAGE_MASK)) {
+ /* stop at next dirty/valid page or start of block */
+ for (; start > 0; start -= PAGE_SIZE) {
+ if (NBPGVALID(bp, ((start - 1) / PAGE_SIZE))) {
+ break;
+ }
+ }
+ }
+
+ NFS_BUF_MAP(bp);
+ /* setup uio for read(s) */
+ boff = NBOFF(bp);
+ auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+ &auio_buf, sizeof(auio_buf));
+
+ if (dirtypg <= (end - 1) / PAGE_SIZE) {
+ /* there's a dirty page in the way, so just do two reads */
+ /* we'll read the preceding data here */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ NFS_UIO_ADDIOV(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
+ bp->nb_validoff = start;
+ }
+ if ((bp->nb_validend < 0) || (bp->nb_validend < on)) {
+ bp->nb_validend = on;
+ }
+ if ((off_t)np->n_size > boff + bp->nb_validend) {
+ bp->nb_validend = MIN(np->n_size - (boff + start), biosize);
+ }
+ /* validate any pages before the write offset */
+ for (; start < on / PAGE_SIZE; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ }
+ }
+ /* adjust start to read any trailing data */
+ start = on + n;
+ }
+
+ /* if end is at end of page, try to */
+ /* get any following pages as well. */
+ if (!(end & PAGE_MASK)) {
+ /* stop at next valid page or end of block */
+ for (; end < biosize; end += PAGE_SIZE) {
+ if (NBPGVALID(bp, end / PAGE_SIZE)) {
+ break;
+ }
+ }
+ }
+
+ if (((boff + start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ NFS_BZERO(bp->nb_data + start, end - start);
+ error = 0;
+ } else {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ NFS_UIO_ADDIOV(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE | NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start)) {
+ bp->nb_validoff = start;
+ }
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end)) {
+ bp->nb_validend = end;
+ }
+ if ((off_t)np->n_size > boff + bp->nb_validend) {
+ bp->nb_validend = MIN(np->n_size - (boff + start), biosize);
+ }
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_64(on); start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ }
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_64(on + n - 1); end -= PAGE_SIZE) {
+ NBPGVALID_SET(bp, (end - 1) / PAGE_SIZE);
+ }
+ }
+ /* Note: pages being written to will be validated when written */
+ }
+ }
+skipread:
+
+ if (ISSET(bp->nb_flags, NB_ERROR)) {
+ error = bp->nb_error;
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ nfs_node_lock_force(np);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+
+ NFS_BUF_MAP(bp);
+ if (n < 0) {
+ error = EINVAL;
+ } else {
+ n32 = n > INT_MAX ? INT_MAX : (int)n;
+ error = uiomove(bp->nb_data + on, n32, uio);
+ if (!error && n > n32) {
+ error = uiomove(bp->nb_data + on + n32, (int)(n - n32), uio);
+ }
+ }
+ if (error) {
+ SET(bp->nb_flags, NB_ERROR);
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ /* validate any pages written to */
+ start = on & ~PAGE_MASK;
+ for (; start < on + n; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start / PAGE_SIZE);
+ /*
+ * This may seem a little weird, but we don't actually set the
+ * dirty bits for writes. This is because we keep the dirty range
+ * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
+ * delayed writes, when we give the pages back to the VM we don't
+ * want to keep them marked dirty, because when we later write the
+ * buffer we won't be able to tell which pages were written dirty
+ * and which pages were mmapped and dirtied.
+ */
+ }
+ if (bp->nb_dirtyend > 0) {
+ bp->nb_dirtyoff = MIN(on, bp->nb_dirtyoff);
+ bp->nb_dirtyend = MAX((on + n), bp->nb_dirtyend);
+ } else {
+ bp->nb_dirtyoff = on;
+ bp->nb_dirtyend = on + n;
+ }
+ if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
+ bp->nb_validoff > bp->nb_dirtyend) {
+ bp->nb_validoff = bp->nb_dirtyoff;
+ bp->nb_validend = bp->nb_dirtyend;
+ } else {
+ bp->nb_validoff = MIN(bp->nb_validoff, bp->nb_dirtyoff);
+ bp->nb_validend = MAX(bp->nb_validend, bp->nb_dirtyend);
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE)) {
+ nfs_buf_normalize_valid_range(np, bp);
+ }
+
+ /*
+ * Since this block is being modified, it must be written
+ * again and not just committed.
+ */
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ nfs_node_lock_force(np);
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ np->n_needcommitcnt--;
+ CHECK_NEEDCOMMITCNT(np);
+ }
+ CLR(bp->nb_flags, NB_NEEDCOMMIT);
+ nfs_node_unlock(np);
+ }
+
+ if (ioflag & IO_SYNC) {
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS) {
+ nfs_flushcommits(np, 1);
+ }
+ } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
+ (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
+ SET(bp->nb_flags, NB_ASYNC);
+ error = nfs_buf_write(bp);
+ if (error) {
+ goto out;
+ }
+ } else {
+ /* If the block wasn't already delayed: charge for the write */
+ if (!ISSET(bp->nb_flags, NB_DELWRI)) {
+ proc_t p = vfs_context_proc(ctx);
+ if (p && p->p_stats) {
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
+ }
+ }
+ nfs_buf_write_delayed(bp);
+ }
+
+ } while (uio_resid(uio) > 0 && n > 0);
+
+out:
+ nfs_node_lock_force(np);
+ np->n_wrbusy--;
+ if ((ioflag & IO_SYNC) && !np->n_wrbusy && !np->n_numoutput) {
+ np->n_flag &= ~NMODIFIED;
+ }
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return error;
+}
+
+
+/*
+ * NFS write call
+ */
+int
+nfs_write_rpc(
+ nfsnode_t np,
+ uio_t uio,
+ vfs_context_t ctx,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
+}
+
+int
+nfs_write_rpc2(
+ nfsnode_t np,
+ uio_t uio,
+ thread_t thd,
+ kauth_cred_t cred,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ int wverfset, commit = 0, committed;
+ uint64_t wverf = 0, wverf2 = 0;
+ size_t nmwsize, totalsize, tsiz, len, rlen = 0;
+ struct nfsreq *req;
+#if CONFIG_NFS4
+ uint32_t stategenid = 0, restart = 0;
+#endif
+ uint32_t vrestart = 0;
+ uio_t uio_save = NULL;
+
+#if DIAGNOSTIC
+ /* XXX limitation based on need to back up uio on short write */
+ if (uio_iovcnt(uio) != 1) {
+ panic("nfs3_write_rpc: iovcnt > 1");
+ }
+#endif
+ FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ nmwsize = nmp->nm_wsize;
+
+ wverfset = 0;
+ committed = NFS_WRITE_FILESYNC;
+
+ totalsize = tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return EFBIG;
+ }
+
+ uio_save = uio_duplicate(uio);
+ if (uio_save == NULL) {
+ return EIO;
+ }
+
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ while (tsiz > 0) {
+ len = (tsiz > nmwsize) ? nmwsize : tsiz;
+ FSDBG(537, np, uio_offset(uio), len, 0);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ break;
+ }
+#if CONFIG_NFS4
+ if (nmp->nm_vers >= NFS_VER4) {
+ stategenid = nmp->nm_stategenid;
+ }
+#endif
+ error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
+ if (!error) {
+ error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
+ }
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ }
+#if CONFIG_NFS4
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+ NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
+ nfs_need_recover(nmp, error);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ } else {
+ if (error == NFSERR_GRACE) {
+ tsleep(&nmp->nm_state, (PZERO - 1), "nfsgrace", 2 * hz);
+ }
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp))) {
+ continue;
+ }
+ }
+ }
+#endif
+ if (error) {
+ break;
+ }
+ if (nfsvers == NFS_VER2) {
+ tsiz -= len;
+ continue;
+ }
+
+ /* check for a short write */
+ if (rlen < len) {
+ /* Reset the uio to reflect the actual transfer */
+ *uio = *uio_save;
+ uio_update(uio, totalsize - (tsiz - rlen));
+ len = rlen;
+ }
+
+ /* return lowest commit level returned */
+ if (commit < committed) {
+ committed = commit;
+ }
+
+ tsiz -= len;
+
+ /* check write verifier */
+ if (!wverfset) {
+ wverf = wverf2;
+ wverfset = 1;
+ } else if (wverf != wverf2) {
+ /* verifier changed, so we need to restart all the writes */
+ if (++vrestart > 100) {
+ /* give up after too many restarts */
+ error = EIO;
+ break;
+ }
+ *uio = *uio_save; // Reset the uio back to the start
+ committed = NFS_WRITE_FILESYNC;
+ wverfset = 0;
+ tsiz = totalsize;
+ }
+ }
+ if (uio_save) {
+ uio_free(uio_save);
+ }
+ if (wverfset && wverfp) {
+ *wverfp = wverf;
+ }
+ *iomodep = committed;
+ if (error) {
+ uio_setresid(uio, tsiz);
+ }
+ NFS_ZFREE(nfs_req_zone, req);
+ FSDBG_BOT(537, np, committed, uio_resid(uio), error);
+ return error;
+}
+
+int
+nfs3_write_rpc_async(
+ nfsnode_t np,
+ uio_t uio,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ int iomode,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ mount_t mp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ /* for async mounts, don't bother sending sync write requests */
+ if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
+ ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) {
+ iomode = NFS_WRITE_UNSTABLE;
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, iomode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, 0);
+ nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsmout_if(error);
+ error = nfsm_chain_add_uio(&nmreq, uio, len);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, NULL, 0, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return error;
+}
+
+int
+nfs3_write_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ int *iomodep,
+ size_t *rlenp,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
+ u_int64_t xid, wverf;
+ mount_t mp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ nfs_request_async_cancel(req);
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) { /* async request restarted */
+ return error;
+ }
+ nmp = NFSTONMP(np);
+ if (nfs_mount_gone(nmp)) {
+ error = ENXIO;
+ }
+ if (!error && (lockerror = nfs_node_lock(np))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==)) {
+ updatemtime = 1;
+ }
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_get_32(error, &nmrep, rlen);
+ nfsmout_if(error);
+ *rlenp = rlen;
+ if (rlen <= 0) {
+ error = NFSERR_IO;
+ }
+ nfsm_chain_get_32(error, &nmrep, committed);
+ nfsm_chain_get_64(error, &nmrep, wverf);
+ nfsmout_if(error);
+ if (wverfp) {
+ *wverfp = wverf;
+ }
+ lck_mtx_lock(&nmp->nm_lock);
+ if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
+ nmp->nm_verf = wverf;
+ nmp->nm_state |= NFSSTA_HASWRITEVERF;
+ } else if (nmp->nm_verf != wverf) {
+ nmp->nm_verf = wverf;
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ } else {
+ if (!error) {
+ error = status;
+ }
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ nfsmout_if(error);
+ }
+ if (updatemtime) {
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(np);
+ }
+ nfsm_chain_cleanup(&nmrep);
+ if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
+ ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC)) {
+ committed = NFS_WRITE_FILESYNC;
+ }
+ *iomodep = committed;
+ return error;
+}
+
+/*
+ * NFS mknod vnode op
+ *
+ * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
+ * mode set to specify the file type and the size field for rdev.
+ */
+int
+nfs3_vnop_mknod(
+ struct vnop_mknod_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_dvp;
+ * vnode_t *a_vpp;
+ * struct componentname *a_cnp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vnode_t dvp = ap->a_dvp;
+ vnode_t *vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode_attr *vap = ap->a_vap;
+ vfs_context_t ctx = ap->a_context;
+ vnode_t newvp = NULL;
+ nfsnode_t np = NULL;
+ struct nfsmount *nmp;
+ nfsnode_t dnp = VTONFS(dvp);
+ struct nfs_vattr *nvattr;
+ fhandle_t *fh;
+ int error = 0, lockerror = ENOENT, busyerror = ENOENT, status = 0, wccpostattr = 0;
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ u_int32_t rdev;
+ u_int64_t xid = 0, dxid;
+ int nfsvers, gotuid, gotgid;
+ struct nfsm_chain nmreq, nmrep;
+ struct nfsreq *req;
+
+ nmp = VTONMP(dvp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ if (!VATTR_IS_ACTIVE(vap, va_type)) {
+ return EINVAL;
+ }
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ if (!VATTR_IS_ACTIVE(vap, va_rdev)) {
+ return EINVAL;
+ }
+ rdev = vap->va_rdev;
+ } else if (vap->va_type == VFIFO || vap->va_type == VSOCK) {
+ rdev = 0xffffffff;
+ } else {
+ return ENOTSUP;
+ }
+ if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
+ return ENAMETOOLONG;
+ }
+
+ nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ gotuid = VATTR_IS_ACTIVE(vap, va_uid);
+ gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ fh = zalloc(nfs_fhandle_zone);
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+ nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
+ nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
+ nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
+ }
+ } else {
+ nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ if (!error) {
+ error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+ }
+ nfsmout_if(error);
+
+ error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKNOD,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
+ if (!error) {
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ }
+
+ if ((lockerror = nfs_node_lock(dnp))) {
+ error = lockerror;
+ }
+ /* XXX no EEXIST kludge here? */
+ dxid = xid;
+ if (!error && !status) {
+ if (dnp->n_flag & NNEGNCENTRIES) {
+ dnp->n_flag &= ~NNEGNCENTRIES;
+ cache_purge_negatives(dvp);
+ }
+ error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, fh, nvattr);
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
+ }
+ if (!error) {
+ error = status;
+ }
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+
+ if (!lockerror) {
+ dnp->n_flag |= NMODIFIED;
+ /* if directory hadn't changed, update namecache mtime */
+ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
+ }
+ nfs_node_unlock(dnp);
+ /* nfs_getattr() will check changed and purge caches */
+ nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+ }
+
+ if (!error && fh->fh_len) {
+ error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh->fh_data, fh->fh_len, nvattr, &xid, req->r_auth, NG_MAKEENTRY, &np);
+ }
+ if (!error && !np) {
+ error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+ }
+ if (!error && np) {
+ newvp = NFSTOV(np);
+ }
+ if (!busyerror) {
+ nfs_node_clear_busy(dnp);
+ }
+
+ if (!error && (gotuid || gotgid) &&
+ (!newvp || nfs_getattrcache(np, nvattr, 0) ||
+ (gotuid && (nvattr->nva_uid != vap->va_uid)) ||
+ (gotgid && (nvattr->nva_gid != vap->va_gid)))) {
+ /* clear ID bits if server didn't use them (or we can't tell) */
+ VATTR_CLEAR_SUPPORTED(vap, va_uid);
+ VATTR_CLEAR_SUPPORTED(vap, va_gid);
+ }
+ if (error) {
+ if (newvp) {
+ nfs_node_unlock(np);
+ vnode_put(newvp);
+ }
+ } else {
+ *vpp = newvp;
+ nfs_node_unlock(np);
+ }
+ NFS_ZFREE(nfs_fhandle_zone, fh);
+ NFS_ZFREE(nfs_req_zone, req);
+ FREE(nvattr, M_TEMP);
+ return error;
+}
+
+static uint32_t create_verf;
+/*
+ * NFS file create call
+ */
+int
+nfs3_vnop_create(
+ struct vnop_create_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_dvp;
+ * vnode_t *a_vpp;
+ * struct componentname *a_cnp;
+ * struct vnode_attr *a_vap;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t dvp = ap->a_dvp;
+ struct vnode_attr *vap = ap->a_vap;
+ struct componentname *cnp = ap->a_cnp;
+ struct nfs_vattr *nvattr;
+ fhandle_t *fh;
+ nfsnode_t np = NULL;
+ struct nfsmount *nmp;
+ nfsnode_t dnp = VTONFS(dvp);
+ vnode_t newvp = NULL;
+ int error = 0, lockerror = ENOENT, busyerror = ENOENT, status = 0, wccpostattr = 0, fmode = 0;
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ int nfsvers, gotuid, gotgid;
+ u_int64_t xid = 0, dxid;
+ uint32_t val;
+ struct nfsm_chain nmreq, nmrep;
+ struct nfsreq *req;
+ struct nfs_dulookup *dul;
+ int dul_in_progress = 0;
+ int namedattrs;
+
+ nmp = VTONMP(dvp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+
+ if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN)) {
+ return ENAMETOOLONG;
+ }
+
+ nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ gotuid = VATTR_IS_ACTIVE(vap, va_uid);
+ gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+
+ if ((vap->va_vaflags & VA_EXCLUSIVE)
+ ) {
+ fmode |= O_EXCL;
+ if (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time)) {
+ vap->va_vaflags |= VA_UTIMES_NULL;
+ }
+ }
+
+ fh = zalloc(nfs_fhandle_zone);
+ req = zalloc_flags(nfs_req_zone, Z_WAITOK);
+ MALLOC(dul, struct nfs_dulookup *, sizeof(*dul), M_TEMP, M_WAITOK);
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);
+
+again:
+ error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+ if (!namedattrs) {
+ nfs_dulookup_init(dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+ nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+ if (nfsvers == NFS_VER3) {
+ if (fmode & O_EXCL) {
+ nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
+ lck_rw_lock_shared(in_ifaddr_rwlock);
+ if (!TAILQ_EMPTY(&in_ifaddrhead)) {
+ val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
+ } else {
+ val = create_verf;
+ }
+ lck_rw_done(in_ifaddr_rwlock);
+ nfsm_chain_add_32(error, &nmreq, val);
+ ++create_verf;
+ nfsm_chain_add_32(error, &nmreq, create_verf);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
+ nfsm_chain_add_v3sattr(nmp, error, &nmreq, vap);
+ }
+ } else {
+ nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+
+ error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
+ if (!error) {
+ if (!namedattrs) {
+ nfs_dulookup_start(dul, dnp, ctx);
+ dul_in_progress = 1;
+ }
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ }
+
+ if ((lockerror = nfs_node_lock(dnp))) {
+ error = lockerror;
+ }
+ dxid = xid;
+ if (!error && !status) {
+ if (dnp->n_flag & NNEGNCENTRIES) {
+ dnp->n_flag &= ~NNEGNCENTRIES;
+ cache_purge_negatives(dvp);
+ }
+ error = nfsm_chain_get_fh_attr(nmp, &nmrep, dnp, ctx, nfsvers, &xid, fh, nvattr);
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
+ }
+ if (!error) {
+ error = status;
+ }
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+
+ if (!lockerror) {
+ dnp->n_flag |= NMODIFIED;
+ /* if directory hadn't changed, update namecache mtime */
+ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
+ }
+ nfs_node_unlock(dnp);
+ /* nfs_getattr() will check changed and purge caches */
+ nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+ }
+
+ if (!error && fh->fh_len) {
+ error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh->fh_data, fh->fh_len, nvattr, &xid, req->r_auth, NG_MAKEENTRY, &np);
+ }
+ if (!error && !np) {
+ error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+ }
+ if (!error && np) {
+ newvp = NFSTOV(np);
+ }
+
+ if (dul_in_progress) {
+ nfs_dulookup_finish(dul, dnp, ctx);
+ }
+ if (!busyerror) {
+ nfs_node_clear_busy(dnp);
+ }
+
+ if (error) {
+ if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
+ fmode &= ~O_EXCL;
+ goto again;
+ }
+ if (newvp) {
+ nfs_node_unlock(np);
+ vnode_put(newvp);
+ }
+ } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
+ nfs_node_unlock(np);
+ error = nfs3_setattr_rpc(np, vap, ctx);
+ if (error && (gotuid || gotgid)) {
+ /* it's possible the server didn't like our attempt to set IDs. */
+ /* so, let's try it again without those */
+ VATTR_CLEAR_ACTIVE(vap, va_uid);
+ VATTR_CLEAR_ACTIVE(vap, va_gid);
+ error = nfs3_setattr_rpc(np, vap, ctx);
+ }
+ if (error) {
+ vnode_put(newvp);
+ } else {
+ nfs_node_lock_force(np);
+ }
+ }
+ if (!error) {
+ *ap->a_vpp = newvp;
+ }
+ if (!error && (gotuid || gotgid) &&
+ (!newvp || nfs_getattrcache(np, nvattr, 0) ||
+ (gotuid && (nvattr->nva_uid != vap->va_uid)) ||
+ (gotgid && (nvattr->nva_gid != vap->va_gid)))) {
+ /* clear ID bits if server didn't use them (or we can't tell) */
+ VATTR_CLEAR_SUPPORTED(vap, va_uid);
+ VATTR_CLEAR_SUPPORTED(vap, va_gid);
+ }
+ if (!error) {
+ nfs_node_unlock(np);
+ }
+ NFS_ZFREE(nfs_fhandle_zone, fh);
+ NFS_ZFREE(nfs_req_zone, req);
+ FREE(dul, M_TEMP);
+ FREE(nvattr, M_TEMP);
+ return error;
+}
+
+/*
+ * NFS file remove call
+ * To try and make NFS semantics closer to UFS semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If vnode_isinuse()
+ * If a rename is not already in the works
+ * call nfs_sillyrename() to set it up
+ * else
+ * do the remove RPC
+ */
+int
+nfs_vnop_remove(
+ struct vnop_remove_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_dvp;
+ * vnode_t a_vp;
+ * struct componentname *a_cnp;
+ * int a_flags;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t vp = ap->a_vp;
+ vnode_t dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ nfsnode_t dnp = VTONFS(dvp);
+ nfsnode_t np = VTONFS(vp);
+ int error = 0, nfsvers, namedattrs, inuse, gotattr = 0, flushed = 0, setsize = 0;
+ struct nfs_vattr *nvattr;
+ struct nfsmount *nmp;
+ struct nfs_dulookup *dul;
+
+ /* XXX prevent removing a sillyrenamed file? */
+
+ nmp = NFSTONMP(dnp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+
+ if (vnode_isdir(vp)) {
+ return EPERM;
+ }
+
+ nfsvers = nmp->nm_vers;
+ namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+ MALLOC(dul, struct nfs_dulookup *, sizeof(*dul), M_TEMP, M_WAITOK);
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);
+
+again_relock:
+ error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
+ if (error) {
+ goto out_free;
+ }
+
+ /* lock the node while we remove the file */
+ lck_mtx_lock(nfs_node_hash_mutex);
+ while (np->n_hflag & NHLOCKED) {
+ np->n_hflag |= NHLOCKWANT;
+ msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
+ }
+ np->n_hflag |= NHLOCKED;
+ lck_mtx_unlock(nfs_node_hash_mutex);
+
+ if (!namedattrs) {
+ nfs_dulookup_init(dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+ }
+
+again:
+ inuse = vnode_isinuse(vp, 0);
+ if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
+ /* Caller requested Carbon delete semantics, but file is busy */
+ error = EBUSY;
+ goto out;
+ }
+ if (inuse && !gotattr) {
+ if (nfs_getattr(np, nvattr, ctx, NGA_CACHED)) {
+ nvattr->nva_nlink = 1;
+ }
+ gotattr = 1;
+ goto again;
+ }
+ if (!inuse || (np->n_sillyrename && (nvattr->nva_nlink > 1))) {
+ if (!inuse && !flushed) { /* flush all the buffers first */
+ /* unlock the node */
+ lck_mtx_lock(nfs_node_hash_mutex);
+ np->n_hflag &= ~NHLOCKED;
+ if (np->n_hflag & NHLOCKWANT) {
+ np->n_hflag &= ~NHLOCKWANT;
+ wakeup(np);
+ }
+ lck_mtx_unlock(nfs_node_hash_mutex);
+ nfs_node_clear_busy2(dnp, np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
+ flushed = 1;
+ if (error == EINTR) {
+ nfs_node_lock_force(np);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ goto out_free;
+ }
+ if (!namedattrs) {
+ nfs_dulookup_finish(dul, dnp, ctx);
+ }
+ goto again_relock;
+ }
+#if CONFIG_NFS4
+ if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK)) {
+ nfs4_delegation_return(np, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+ }
+#endif
+ /*
+ * Purge the name cache so that the chance of a lookup for
+ * the name succeeding while the remove is in progress is
+ * minimized.
+ */
+ nfs_name_cache_purge(dnp, np, cnp, ctx);
+
+ if (!namedattrs) {
+ nfs_dulookup_start(dul, dnp, ctx);
+ }
+
+ /* Do the rpc */
+ error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx));
+
+ /*
+ * Kludge City: If the first reply to the remove rpc is lost..
+ * the reply to the retransmitted request will be ENOENT
+ * since the file was in fact removed
+ * Therefore, we cheat and return success.
+ */
+ if (error == ENOENT) {
+ error = 0;
+ }
+
+ if (!error && !inuse && !np->n_sillyrename) {
+ /*
+ * removal succeeded, it's not in use, and not silly renamed so
+ * remove nfsnode from hash now so we can't accidentally find it
+ * again if another object gets created with the same filehandle
+ * before this vnode gets reclaimed
+ */
+ lck_mtx_lock(nfs_node_hash_mutex);
+ if (np->n_hflag & NHHASHED) {
+ LIST_REMOVE(np, n_hash);
+ np->n_hflag &= ~NHHASHED;
+ FSDBG(266, 0, np, np->n_flag, 0xb1eb1e);
+ }
+ lck_mtx_unlock(nfs_node_hash_mutex);
+ /* clear flags now: won't get nfs_vnop_inactive for recycled vnode */
+ /* clear all flags other than these */
+ nfs_node_lock_force(np);
+ np->n_flag &= (NMODIFIED);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ vnode_recycle(vp);
+ setsize = 1;
+ } else {
+ nfs_node_lock_force(np);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ }
+ } else if (!np->n_sillyrename) {
+ if (!namedattrs) {
+ nfs_dulookup_start(dul, dnp, ctx);
+ }
+ error = nfs_sillyrename(dnp, np, cnp, ctx);
+ nfs_node_lock_force(np);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ } else {
+ nfs_node_lock_force(np);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ if (!namedattrs) {
+ nfs_dulookup_start(dul, dnp, ctx);
+ }
+ }
+
+ /* nfs_getattr() will check changed and purge caches */
+ nfs_getattr(dnp, NULL, ctx, NGA_CACHED);
+ if (!namedattrs) {
+ nfs_dulookup_finish(dul, dnp, ctx);
+ }
+out:
+ /* unlock the node */
+ lck_mtx_lock(nfs_node_hash_mutex);
+ np->n_hflag &= ~NHLOCKED;
+ if (np->n_hflag & NHLOCKWANT) {
+ np->n_hflag &= ~NHLOCKWANT;
+ wakeup(np);
+ }
+ lck_mtx_unlock(nfs_node_hash_mutex);
+ nfs_node_clear_busy2(dnp, np);
+ if (setsize) {
+ ubc_setsize(vp, 0);
+ }
+out_free:
+ FREE(dul, M_TEMP);
+ FREE(nvattr, M_TEMP);
+ return error;
+}
+
+/*
+ * NFS silly-renamed file removal function called from nfs_vnop_inactive
+ */
+int
+nfs_removeit(struct nfs_sillyrename *nsp)
+{
+ struct nfsmount *nmp = NFSTONMP(nsp->nsr_dnp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ return nmp->nm_funcs->nf_remove_rpc(nsp->nsr_dnp, nsp->nsr_name, nsp->nsr_namlen, NULL, nsp->nsr_cred);
+}
+
+/*
+ * NFS remove rpc, called from nfs_remove() and nfs_removeit().
+ */
+int
+nfs3_remove_rpc(
+ nfsnode_t dnp,
+ char *name,
+ int namelen,
+ thread_t thd,
+ kauth_cred_t cred)
+{
+ int error = 0, lockerror = ENOENT, status = 0, wccpostattr = 0;
+ struct timespec premtime = { .tv_sec = 0, .tv_nsec = 0 };
+ struct nfsmount *nmp;
+ int nfsvers;
+ u_int64_t xid;
+ struct nfsm_chain nmreq, nmrep;
+
+ nmp = NFSTONMP(dnp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+ if ((nfsvers == NFS_VER2) && (namelen > NFS_MAXNAMLEN)) {
+ return ENAMETOOLONG;
+ }
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + NFSX_UNSIGNED + nfsm_rndup(namelen));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+ nfsm_chain_add_name(error, &nmreq, name, namelen, nmp);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+
+ error = nfs_request2(dnp, NULL, &nmreq, NFSPROC_REMOVE, thd, cred, NULL, 0, &nmrep, &xid, &status);
+
+ if ((lockerror = nfs_node_lock(dnp))) {
+ error = lockerror;
+ }
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &xid);
+ }
+ nfsmout_if(error);
+ dnp->n_flag |= NMODIFIED;
+ /* if directory hadn't changed, update namecache mtime */
+ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==)) {
+ NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
+ }
+ if (!wccpostattr) {
+ NATTRINVALIDATE(dnp);
+ }
+ if (!error) {
+ error = status;
+ }
+nfsmout:
+ if (!lockerror) {
+ nfs_node_unlock(dnp);
+ }
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+ return error;
+}
+
+/*
+ * NFS file rename call
+ */
+int
+nfs_vnop_rename(
+ struct vnop_rename_args /* {
+ * struct vnodeop_desc *a_desc;
+ * vnode_t a_fdvp;
+ * vnode_t a_fvp;
+ * struct componentname *a_fcnp;
+ * vnode_t a_tdvp;
+ * vnode_t a_tvp;
+ * struct componentname *a_tcnp;
+ * vfs_context_t a_context;
+ * } */*ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t fdvp = ap->a_fdvp;
+ vnode_t fvp = ap->a_fvp;
+ vnode_t tdvp = ap->a_tdvp;
+ vnode_t tvp = ap->a_tvp;
+ nfsnode_t fdnp, fnp, tdnp, tnp;
+ struct componentname *tcnp = ap->a_tcnp;
+ struct componentname *fcnp = ap->a_fcnp;
+ int error, nfsvers, inuse = 0, tvprecycle = 0, locked = 0;
+ mount_t fmp, tdmp, tmp;
+ struct nfs_vattr *nvattr;
+ struct nfsmount *nmp;
+
+ fdnp = VTONFS(fdvp);
+ fnp = VTONFS(fvp);
+ tdnp = VTONFS(tdvp);
+ tnp = tvp ? VTONFS(tvp) : NULL;
+
+ nmp = NFSTONMP(fdnp);
+ if (nfs_mount_gone(nmp)) {
+ return ENXIO;
+ }
+ nfsvers = nmp->nm_vers;
+
+ error = nfs_node_set_busy4(fdnp, fnp, tdnp, tnp, vfs_context_thread(ctx));
+ if (error) {
+ return error;
+ }
+
+ MALLOC(nvattr, struct nfs_vattr *, sizeof(*nvattr), M_TEMP, M_WAITOK);