+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend)
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ /*
+ * UBC doesn't handle partial pages, so we need to make sure
+ * that any pages left in the page cache are completely valid.
+ *
+ * Writes that are smaller than a block are delayed if they
+ * don't extend to the end of the block.
+ *
+ * If the block isn't (completely) cached, we may need to read
+ * in some parts of pages that aren't covered by the write.
+ * If the write offset (on) isn't page aligned, we'll need to
+ * read the start of the first page being written to. Likewise,
+ * if the offset of the end of the write (on+n) isn't page aligned,
+ * we'll need to read the end of the last page being written to.
+ *
+ * Notes:
+ * We don't want to read anything we're just going to write over.
+ * We don't want to read anything we're just going drop when the
+ * I/O is complete (i.e. don't do reads for NOCACHE requests).
+ * We don't want to issue multiple I/Os if we don't have to
+ * (because they're synchronous rpcs).
+ * We don't want to read anything we already have modified in the
+ * page cache.
+ */
+ if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+ int firstpg, lastpg, dirtypg;
+ int firstpgoff, lastpgoff;
+ start = end = -1;
+ firstpg = on/PAGE_SIZE;
+ firstpgoff = on & PAGE_MASK;
+ lastpg = (on+n-1)/PAGE_SIZE;
+ lastpgoff = (on+n) & PAGE_MASK;
+ if (firstpgoff && !NBPGVALID(bp,firstpg)) {
+ /* need to read start of first page */
+ start = firstpg * PAGE_SIZE;
+ end = start + firstpgoff;
+ }
+ if (lastpgoff && !NBPGVALID(bp,lastpg)) {
+ /* need to read end of last page */
+ if (start < 0)
+ start = (lastpg * PAGE_SIZE) + lastpgoff;
+ end = (lastpg + 1) * PAGE_SIZE;
+ }
+ if (ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /*
+ * For nocache writes, if there is any partial page at the
+ * start or end of the write range, then we do the write
+ * synchronously to make sure that we can drop the data
+ * from the cache as soon as the WRITE finishes. Normally,
+ * we would do an unstable write and not drop the data until
+ * it was committed. But doing that here would risk allowing
+ * invalid data to be read from the cache between the WRITE
+ * and the COMMIT.
+ * (NB_STABLE indicates that data writes should be FILESYNC)
+ */
+ if (end > start)
+ SET(bp->nb_flags, NB_STABLE);
+ goto skipread;
+ }
+ if (end > start) {
+ /* need to read the data in range: start...end-1 */
+
+ /* first, check for dirty pages in between */
+ /* if there are, we'll have to do two reads because */
+ /* we don't want to overwrite the dirty pages. */
+ for (dirtypg=start/PAGE_SIZE; dirtypg <= (end-1)/PAGE_SIZE; dirtypg++)
+ if (NBPGDIRTY(bp,dirtypg))
+ break;
+
+ /* if start is at beginning of page, try */
+ /* to get any preceeding pages as well. */
+ if (!(start & PAGE_MASK)) {
+ /* stop at next dirty/valid page or start of block */
+ for (; start > 0; start-=PAGE_SIZE)
+ if (NBPGVALID(bp,((start-1)/PAGE_SIZE)))
+ break;
+ }
+
+ NFS_BUF_MAP(bp);
+ /* setup uio for read(s) */
+ boff = NBOFF(bp);
+ auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+ &auio_buf, sizeof(auio_buf));
+
+ if (dirtypg <= (end-1)/PAGE_SIZE) {
+ /* there's a dirty page in the way, so just do two reads */
+ /* we'll read the preceding data here */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < on))
+ bp->nb_validend = on;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset */
+ for (; start < on/PAGE_SIZE; start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ }
+ /* adjust start to read any trailing data */
+ start = on+n;
+ }
+
+ /* if end is at end of page, try to */
+ /* get any following pages as well. */
+ if (!(end & PAGE_MASK)) {
+ /* stop at next valid page or end of block */
+ for (; end < biosize; end+=PAGE_SIZE)
+ if (NBPGVALID(bp,end/PAGE_SIZE))
+ break;
+ }
+
+ if (((boff+start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ bzero(bp->nb_data + start, end - start);
+ error = 0;
+ } else {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end))
+ bp->nb_validend = end;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE)
+ NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
+ }
+ /* Note: pages being written to will be validated when written */
+ }
+ }
+skipread:
+
+ if (ISSET(bp->nb_flags, NB_ERROR)) {
+ error = bp->nb_error;
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ nfs_node_lock_force(np);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+
+ NFS_BUF_MAP(bp);
+ error = uiomove((char *)bp->nb_data + on, n, uio);
+ if (error) {
+ SET(bp->nb_flags, NB_ERROR);
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ /* validate any pages written to */
+ start = on & ~PAGE_MASK;
+ for (; start < on+n; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /*
+ * This may seem a little weird, but we don't actually set the
+ * dirty bits for writes. This is because we keep the dirty range
+ * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
+ * delayed writes, when we give the pages back to the VM we don't
+ * want to keep them marked dirty, because when we later write the
+ * buffer we won't be able to tell which pages were written dirty
+ * and which pages were mmapped and dirtied.
+ */
+ }
+ if (bp->nb_dirtyend > 0) {
+ bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
+ bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
+ } else {
+ bp->nb_dirtyoff = on;
+ bp->nb_dirtyend = on + n;
+ }
+ if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
+ bp->nb_validoff > bp->nb_dirtyend) {
+ bp->nb_validoff = bp->nb_dirtyoff;
+ bp->nb_validend = bp->nb_dirtyend;
+ } else {
+ bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
+ bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE))
+ nfs_buf_normalize_valid_range(np, bp);
+
+ /*
+ * Since this block is being modified, it must be written
+ * again and not just committed.
+ */
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ nfs_node_lock_force(np);
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ np->n_needcommitcnt--;
+ CHECK_NEEDCOMMITCNT(np);
+ }
+ CLR(bp->nb_flags, NB_NEEDCOMMIT);
+ nfs_node_unlock(np);
+ }
+
+ if (ioflag & IO_SYNC) {
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
+ (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
+ SET(bp->nb_flags, NB_ASYNC);
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ } else {
+ /* If the block wasn't already delayed: charge for the write */
+ if (!ISSET(bp->nb_flags, NB_DELWRI)) {
+ proc_t p = vfs_context_proc(ctx);
+ if (p && p->p_stats)
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
+ }
+ nfs_buf_write_delayed(bp);
+ }
+ if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS)
+ nfs_flushcommits(np, 1);
+
+ } while (uio_resid(uio) > 0 && n > 0);
+
+out:
+ nfs_node_lock_force(np);
+ np->n_wrbusy--;
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return (error);
+}
+
+
+/*
+ * NFS write call
+ */
+int
+nfs_write_rpc(
+ nfsnode_t np,
+ uio_t uio,
+ vfs_context_t ctx,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
+}
+
+int
+nfs_write_rpc2(
+ nfsnode_t np,
+ uio_t uio,
+ thread_t thd,
+ kauth_cred_t cred,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ int wverfset, commit, committed;
+ uint64_t wverf = 0, wverf2;
+ size_t nmwsize, totalsize, tsiz, len, rlen;
+ struct nfsreq rq, *req = &rq;
+ uint32_t stategenid = 0, vrestart = 0, restart = 0;
+ uio_t uio_save = NULL;
+
+#if DIAGNOSTIC
+ /* XXX limitation based on need to back up uio on short write */
+ if (uio_iovcnt(uio) != 1)
+ panic("nfs3_write_rpc: iovcnt > 1");
+#endif
+ FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+ nmwsize = nmp->nm_wsize;
+
+ wverfset = 0;
+ committed = NFS_WRITE_FILESYNC;
+
+ totalsize = tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return (EFBIG);
+ }
+
+ uio_save = uio_duplicate(uio);
+ if (uio_save == NULL) {
+ return (EIO);
+ }
+
+ while (tsiz > 0) {
+ len = (tsiz > nmwsize) ? nmwsize : tsiz;
+ FSDBG(537, np, uio_offset(uio), len, 0);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ break;
+ }
+ if (nmp->nm_vers >= NFS_VER4)
+ stategenid = nmp->nm_stategenid;
+ error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
+ if (!error)
+ error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ error = ENXIO;
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid)) {
+ NP(np, "nfs_write_rpc: error %d, initiating recovery", error);
+ nfs_need_recover(nmp, error);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (np->n_flag & NREVOKE) {
+ error = EIO;
+ } else {
+ if (error == NFSERR_GRACE)
+ tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+ continue;
+ }
+ }
+ if (error)
+ break;
+ if (nfsvers == NFS_VER2) {
+ tsiz -= len;
+ continue;
+ }
+
+ /* check for a short write */
+ if (rlen < len) {
+ /* Reset the uio to reflect the actual transfer */
+ *uio = *uio_save;
+ uio_update(uio, totalsize - (tsiz - rlen));
+ len = rlen;
+ }
+
+ /* return lowest commit level returned */
+ if (commit < committed)
+ committed = commit;
+
+ tsiz -= len;
+
+ /* check write verifier */
+ if (!wverfset) {
+ wverf = wverf2;
+ wverfset = 1;
+ } else if (wverf != wverf2) {
+ /* verifier changed, so we need to restart all the writes */
+ if (++vrestart > 100) {
+ /* give up after too many restarts */
+ error = EIO;
+ break;
+ }
+ *uio = *uio_save; // Reset the uio back to the start
+ committed = NFS_WRITE_FILESYNC;
+ wverfset = 0;
+ tsiz = totalsize;
+ }
+ }
+ if (uio_save)
+ uio_free(uio_save);
+ if (wverfset && wverfp)
+ *wverfp = wverf;
+ *iomodep = committed;
+ if (error)
+ uio_setresid(uio, tsiz);
+ FSDBG_BOT(537, np, committed, uio_resid(uio), error);
+ return (error);
+}
+
+int
+nfs3_write_rpc_async(
+ nfsnode_t np,
+ uio_t uio,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ int iomode,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ mount_t mp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+
+ /* for async mounts, don't bother sending sync write requests */
+ if ((iomode != NFS_WRITE_UNSTABLE) && nfs_allow_async &&
+ ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
+ iomode = NFS_WRITE_UNSTABLE;
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, iomode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, 0);
+ nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsmout_if(error);
+ error = nfsm_chain_add_uio(&nmreq, uio, len);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, NULL, 0, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return (error);
+}
+
+int
+nfs3_write_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ int *iomodep,
+ size_t *rlenp,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
+ u_int64_t xid, wverf;
+ mount_t mp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (!nmp) {
+ nfs_request_async_cancel(req);
+ return (ENXIO);
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) /* async request restarted */
+ return (error);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ error = ENXIO;
+ if (!error && (lockerror = nfs_node_lock(np)))
+ error = lockerror;
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { 0, 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
+ updatemtime = 1;
+ if (!error)
+ error = status;
+ nfsm_chain_get_32(error, &nmrep, rlen);
+ nfsmout_if(error);
+ *rlenp = rlen;
+ if (rlen <= 0)
+ error = NFSERR_IO;
+ nfsm_chain_get_32(error, &nmrep, committed);
+ nfsm_chain_get_64(error, &nmrep, wverf);
+ nfsmout_if(error);
+ if (wverfp)
+ *wverfp = wverf;
+ lck_mtx_lock(&nmp->nm_lock);
+ if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
+ nmp->nm_verf = wverf;
+ nmp->nm_state |= NFSSTA_HASWRITEVERF;
+ } else if (nmp->nm_verf != wverf) {
+ nmp->nm_verf = wverf;
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ } else {
+ if (!error)
+ error = status;
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, &xid);
+ nfsmout_if(error);
+ }
+ if (updatemtime)
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+nfsmout:
+ if (!lockerror)
+ nfs_node_unlock(np);
+ nfsm_chain_cleanup(&nmrep);
+ if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
+ ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
+ committed = NFS_WRITE_FILESYNC;
+ *iomodep = committed;
+ return (error);
+}
+
+/*
+ * NFS mknod vnode op
+ *
+ * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
+ * mode set to specify the file type and the size field for rdev.
+ */
+int
+nfs3_vnop_mknod(
+ struct vnop_mknod_args /* {
+ struct vnodeop_desc *a_desc;
+ vnode_t a_dvp;
+ vnode_t *a_vpp;
+ struct componentname *a_cnp;
+ struct vnode_attr *a_vap;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ vnode_t dvp = ap->a_dvp;
+ vnode_t *vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode_attr *vap = ap->a_vap;
+ vfs_context_t ctx = ap->a_context;
+ vnode_t newvp = NULL;
+ nfsnode_t np = NULL;
+ struct nfsmount *nmp;
+ nfsnode_t dnp = VTONFS(dvp);
+ struct nfs_vattr nvattr;
+ fhandle_t fh;
+ int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
+ struct timespec premtime = { 0, 0 };
+ u_int32_t rdev;
+ u_int64_t xid = 0, dxid;
+ int nfsvers, gotuid, gotgid;
+ struct nfsm_chain nmreq, nmrep;
+ struct nfsreq rq, *req = &rq;
+
+ nmp = VTONMP(dvp);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+
+ if (!VATTR_IS_ACTIVE(vap, va_type))
+ return (EINVAL);
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ if (!VATTR_IS_ACTIVE(vap, va_rdev))
+ return (EINVAL);
+ rdev = vap->va_rdev;
+ } else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
+ rdev = 0xffffffff;
+ else {
+ return (ENOTSUP);
+ }
+ if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
+ return (ENAMETOOLONG);
+
+ nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ gotuid = VATTR_IS_ACTIVE(vap, va_uid);
+ gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+ nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
+ nfsm_chain_add_v3sattr(error, &nmreq, vap);
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
+ nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
+ }
+ } else {
+ nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ if (!error)
+ error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+ nfsmout_if(error);
+
+ error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_MKNOD,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
+ if (!error)
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+
+ if ((lockerror = nfs_node_lock(dnp)))
+ error = lockerror;
+ /* XXX no EEXIST kludge here? */
+ dxid = xid;
+ if (!error && !status) {
+ if (dnp->n_flag & NNEGNCENTRIES) {
+ dnp->n_flag &= ~NNEGNCENTRIES;
+ cache_purge_negatives(dvp);
+ }
+ error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
+ }
+ if (nfsvers == NFS_VER3)
+ nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
+ if (!error)
+ error = status;
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+
+ if (!lockerror) {
+ dnp->n_flag |= NMODIFIED;
+ /* if directory hadn't changed, update namecache mtime */
+ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
+ NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
+ nfs_node_unlock(dnp);
+ /* nfs_getattr() will check changed and purge caches */
+ nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+ }
+
+ if (!error && fh.fh_len)
+ error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
+ if (!error && !np)
+ error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+ if (!error && np)
+ newvp = NFSTOV(np);
+ if (!busyerror)
+ nfs_node_clear_busy(dnp);
+
+ if (!error && (gotuid || gotgid) &&
+ (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
+ (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
+ (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
+ /* clear ID bits if server didn't use them (or we can't tell) */
+ VATTR_CLEAR_SUPPORTED(vap, va_uid);
+ VATTR_CLEAR_SUPPORTED(vap, va_gid);
+ }
+ if (error) {
+ if (newvp) {
+ nfs_node_unlock(np);
+ vnode_put(newvp);
+ }
+ } else {
+ *vpp = newvp;
+ nfs_node_unlock(np);
+ }
+ return (error);
+}
+
+static uint32_t create_verf;
+/*
+ * NFS file create call
+ */
+int
+nfs3_vnop_create(
+ struct vnop_create_args /* {
+ struct vnodeop_desc *a_desc;
+ vnode_t a_dvp;
+ vnode_t *a_vpp;
+ struct componentname *a_cnp;
+ struct vnode_attr *a_vap;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t dvp = ap->a_dvp;
+ struct vnode_attr *vap = ap->a_vap;
+ struct componentname *cnp = ap->a_cnp;
+ struct nfs_vattr nvattr;
+ fhandle_t fh;
+ nfsnode_t np = NULL;
+ struct nfsmount *nmp;
+ nfsnode_t dnp = VTONFS(dvp);
+ vnode_t newvp = NULL;
+ int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0, fmode = 0;
+ struct timespec premtime = { 0, 0 };
+ int nfsvers, gotuid, gotgid;
+ u_int64_t xid, dxid;
+ uint32_t val;
+ struct nfsm_chain nmreq, nmrep;
+ struct nfsreq rq, *req = &rq;
+ struct nfs_dulookup dul;
+
+ nmp = VTONMP(dvp);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+
+ if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
+ return (ENAMETOOLONG);
+
+ nfs_avoid_needless_id_setting_on_create(dnp, vap, ctx);
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ gotuid = VATTR_IS_ACTIVE(vap, va_uid);
+ gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+
+ if (vap->va_vaflags & VA_EXCLUSIVE) {
+ fmode |= O_EXCL;
+ if (!VATTR_IS_ACTIVE(vap, va_access_time) || !VATTR_IS_ACTIVE(vap, va_modify_time))
+ vap->va_vaflags |= VA_UTIMES_NULL;
+ }
+
+again:
+ error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+ nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 2 * NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+ nfsm_chain_add_name(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen, nmp);
+ if (nfsvers == NFS_VER3) {
+ if (fmode & O_EXCL) {
+ nfsm_chain_add_32(error, &nmreq, NFS_CREATE_EXCLUSIVE);
+ lck_rw_lock_shared(in_ifaddr_rwlock);
+ if (!TAILQ_EMPTY(&in_ifaddrhead))
+ val = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
+ else
+ val = create_verf;
+ lck_rw_done(in_ifaddr_rwlock);
+ nfsm_chain_add_32(error, &nmreq, val);
+ ++create_verf;
+ nfsm_chain_add_32(error, &nmreq, create_verf);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, NFS_CREATE_UNCHECKED);
+ nfsm_chain_add_v3sattr(error, &nmreq, vap);
+ }
+ } else {
+ nfsm_chain_add_v2sattr(error, &nmreq, vap, 0);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+
+ error = nfs_request_async(dnp, NULL, &nmreq, NFSPROC_CREATE,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx), NULL, 0, NULL, &req);
+ if (!error) {
+ nfs_dulookup_start(&dul, dnp, ctx);
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ }
+
+ if ((lockerror = nfs_node_lock(dnp)))
+ error = lockerror;
+ dxid = xid;
+ if (!error && !status) {
+ if (dnp->n_flag & NNEGNCENTRIES) {
+ dnp->n_flag &= ~NNEGNCENTRIES;
+ cache_purge_negatives(dvp);
+ }
+ error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
+ }
+ if (nfsvers == NFS_VER3)
+ nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
+ if (!error)
+ error = status;
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+
+ if (!lockerror) {
+ dnp->n_flag |= NMODIFIED;
+ /* if directory hadn't changed, update namecache mtime */
+ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
+ NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
+ nfs_node_unlock(dnp);
+ /* nfs_getattr() will check changed and purge caches */
+ nfs_getattr(dnp, NULL, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+ }
+
+ if (!error && fh.fh_len)
+ error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, rq.r_auth, NG_MAKEENTRY, &np);
+ if (!error && !np)
+ error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+ if (!error && np)
+ newvp = NFSTOV(np);
+
+ nfs_dulookup_finish(&dul, dnp, ctx);
+ if (!busyerror)
+ nfs_node_clear_busy(dnp);
+
+ if (error) {
+ if ((nfsvers == NFS_VER3) && (fmode & O_EXCL) && (error == NFSERR_NOTSUPP)) {
+ fmode &= ~O_EXCL;
+ goto again;
+ }
+ if (newvp) {
+ nfs_node_unlock(np);
+ vnode_put(newvp);
+ }
+ } else if ((nfsvers == NFS_VER3) && (fmode & O_EXCL)) {
+ nfs_node_unlock(np);
+ error = nfs3_setattr_rpc(np, vap, ctx);
+ if (error && (gotuid || gotgid)) {
+ /* it's possible the server didn't like our attempt to set IDs. */
+ /* so, let's try it again without those */
+ VATTR_CLEAR_ACTIVE(vap, va_uid);
+ VATTR_CLEAR_ACTIVE(vap, va_gid);
+ error = nfs3_setattr_rpc(np, vap, ctx);
+ }
+ if (error)
+ vnode_put(newvp);
+ else
+ nfs_node_lock_force(np);
+ }
+ if (!error)
+ *ap->a_vpp = newvp;
+ if (!error && (gotuid || gotgid) &&
+ (!newvp || nfs_getattrcache(np, &nvattr, 0) ||
+ (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
+ (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
+ /* clear ID bits if server didn't use them (or we can't tell) */
+ VATTR_CLEAR_SUPPORTED(vap, va_uid);
+ VATTR_CLEAR_SUPPORTED(vap, va_gid);
+ }
+ if (!error)
+ nfs_node_unlock(np);
+ return (error);
+}
+
+/*
+ * NFS file remove call
+ * To try and make NFS semantics closer to UFS semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If vnode_isinuse()
+ * If a rename is not already in the works
+ * call nfs_sillyrename() to set it up
+ * else
+ * do the remove RPC
+ */
+int
+nfs_vnop_remove(
+ struct vnop_remove_args /* {
+ struct vnodeop_desc *a_desc;
+ vnode_t a_dvp;
+ vnode_t a_vp;
+ struct componentname *a_cnp;
+ int a_flags;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ vfs_context_t ctx = ap->a_context;
+ vnode_t vp = ap->a_vp;
+ vnode_t dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ nfsnode_t dnp = VTONFS(dvp);
+ nfsnode_t np = VTONFS(vp);
+ int error = 0, nfsvers, namedattrs, inuse, gotattr = 0, flushed = 0, setsize = 0;
+ struct nfs_vattr nvattr;
+ struct nfsmount *nmp;
+ struct nfs_dulookup dul;
+
+ /* XXX prevent removing a sillyrenamed file? */
+
+ nmp = NFSTONMP(dnp);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+ namedattrs = (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_NAMED_ATTR);
+
+again_relock:
+ error = nfs_node_set_busy2(dnp, np, vfs_context_thread(ctx));
+ if (error)
+ return (error);
+
+ /* lock the node while we remove the file */
+ lck_mtx_lock(nfs_node_hash_mutex);
+ while (np->n_hflag & NHLOCKED) {
+ np->n_hflag |= NHLOCKWANT;
+ msleep(np, nfs_node_hash_mutex, PINOD, "nfs_remove", NULL);
+ }
+ np->n_hflag |= NHLOCKED;
+ lck_mtx_unlock(nfs_node_hash_mutex);
+
+ if (!namedattrs)
+ nfs_dulookup_init(&dul, dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx);
+again:
+ inuse = vnode_isinuse(vp, 0);
+ if ((ap->a_flags & VNODE_REMOVE_NODELETEBUSY) && inuse) {
+ /* Caller requested Carbon delete semantics, but file is busy */
+ error = EBUSY;
+ goto out;
+ }
+ if (inuse && !gotattr) {
+ if (nfs_getattr(np, &nvattr, ctx, NGA_CACHED))
+ nvattr.nva_nlink = 1;
+ gotattr = 1;
+ goto again;
+ }
+ if (!inuse || (np->n_sillyrename && (nvattr.nva_nlink > 1))) {
+
+ if (!inuse && !flushed) { /* flush all the buffers first */
+ /* unlock the node */
+ lck_mtx_lock(nfs_node_hash_mutex);
+ np->n_hflag &= ~NHLOCKED;
+ if (np->n_hflag & NHLOCKWANT) {
+ np->n_hflag &= ~NHLOCKWANT;
+ wakeup(np);
+ }
+ lck_mtx_unlock(nfs_node_hash_mutex);
+ nfs_node_clear_busy2(dnp, np);
+ error = nfs_vinvalbuf(vp, V_SAVE, ctx, 1);
+ FSDBG(260, np, np->n_size, np->n_vattr.nva_size, 0xf00d0011);
+ flushed = 1;
+ if (error == EINTR) {
+ nfs_node_lock_force(np);
+ NATTRINVALIDATE(np);
+ nfs_node_unlock(np);
+ return (error);
+ }
+ if (!namedattrs)
+ nfs_dulookup_finish(&dul, dnp, ctx);
+ goto again_relock;
+ }
+
+ if ((nmp->nm_vers >= NFS_VER4) && (np->n_openflags & N_DELEG_MASK))
+ nfs4_delegation_return(np, 0, vfs_context_thread(ctx), vfs_context_ucred(ctx));
+
+ /*
+ * Purge the name cache so that the chance of a lookup for
+ * the name succeeding while the remove is in progress is
+ * minimized.
+ */
+ nfs_name_cache_purge(dnp, np, cnp, ctx);
+
+ if (!namedattrs)
+ nfs_dulookup_start(&dul, dnp, ctx);
+
+ /* Do the rpc */
+ error = nmp->nm_funcs->nf_remove_rpc(dnp, cnp->cn_nameptr, cnp->cn_namelen,
+ vfs_context_thread(ctx), vfs_context_ucred(ctx));
+
+ /*
+ * Kludge City: If the first reply to the remove rpc is lost..
+ * the reply to the retransmitted request will be ENOENT