+ if (ioflag & IO_APPEND) {
+ nfs_data_unlock(np);
+ /* nfs_getattr() will check changed and purge caches */
+ error = nfs_getattr(np, &nvattr, ctx, NGA_UNCACHED);
+ /* we'll be extending the file, so take the data lock exclusive */
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ if (error) {
+ FSDBG(515, np, uio_offset(uio), 0x10bad02, error);
+ goto out;
+ }
+ uio_setoffset(uio, np->n_size);
+ }
+ }
+ if (uio_offset(uio) < 0) {
+ error = EINVAL;
+ FSDBG_BOT(515, np, uio_offset(uio), 0xbad0ff, error);
+ goto out;
+ }
+ if (uio_resid(uio) == 0)
+ goto out;
+
+ if (((uio_offset(uio) + uio_resid(uio)) > (off_t)np->n_size) && !(ioflag & IO_APPEND)) {
+ /* it looks like we'll be extending the file, so take the data lock exclusive */
+ nfs_data_unlock(np);
+ nfs_data_lock(np, NFS_DATA_LOCK_EXCLUSIVE);
+ }
+
+ do {
+ OSAddAtomic(1, &nfsstats.biocache_writes);
+ lbn = uio_offset(uio) / biosize;
+ on = uio_offset(uio) % biosize;
+ n = biosize - on;
+ if (uio_resid(uio) < n)
+ n = uio_resid(uio);
+again:
+ /*
+ * Get a cache block for writing. The range to be written is
+ * (off..off+n) within the block. We ensure that the block
+ * either has no dirty region or that the given range is
+ * contiguous with the existing dirty region.
+ */
+ error = nfs_buf_get(np, lbn, biosize, thd, NBLK_WRITE, &bp);
+ if (error)
+ goto out;
+ /* map the block because we know we're going to write to it */
+ NFS_BUF_MAP(bp);
+
+ if (ioflag & IO_NOCACHE)
+ SET(bp->nb_flags, NB_NOCACHE);
+
+ if (!IS_VALID_CRED(bp->nb_wcred)) {
+ kauth_cred_ref(cred);
+ bp->nb_wcred = cred;
+ }
+
+ /*
+ * If there's already a dirty range AND dirty pages in this block we
+ * need to send a commit AND write the dirty pages before continuing.
+ *
+ * If there's already a dirty range OR dirty pages in this block
+ * and the new write range is not contiguous with the existing range,
+ * then force the buffer to be written out now.
+ * (We used to just extend the dirty range to cover the valid,
+ * but unwritten, data in between also. But writing ranges
+ * of data that weren't actually written by an application
+ * risks overwriting some other client's data with stale data
+ * that's just masquerading as new written data.)
+ */
+ if (bp->nb_dirtyend > 0) {
+ if (on > bp->nb_dirtyend || (on + n) < bp->nb_dirtyoff || bp->nb_dirty) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c001);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ goto again;
+ }
+ } else if (bp->nb_dirty) {
+ int firstpg, lastpg;
+ u_int32_t pagemask;
+ /* calculate write range pagemask */
+ firstpg = on/PAGE_SIZE;
+ lastpg = (on+n-1)/PAGE_SIZE;
+ pagemask = ((1 << (lastpg+1)) - 1) & ~((1 << firstpg) - 1);
+ /* check if there are dirty pages outside the write range */
+ if (bp->nb_dirty & ~pagemask) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c002);
+ /* write/commit buffer "synchronously" */
+ /* (NB_STABLE indicates that data writes should be FILESYNC) */
+ CLR(bp->nb_flags, (NB_DONE | NB_ERROR | NB_INVAL));
+ SET(bp->nb_flags, (NB_ASYNC | NB_STABLE));
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ goto again;
+ }
+ /* if the first or last pages are already dirty */
+ /* make sure that the dirty range encompasses those pages */
+ if (NBPGDIRTY(bp,firstpg) || NBPGDIRTY(bp,lastpg)) {
+ FSDBG(515, np, uio_offset(uio), bp, 0xd15c003);
+ bp->nb_dirtyoff = min(on, firstpg * PAGE_SIZE);
+ if (NBPGDIRTY(bp,lastpg)) {
+ bp->nb_dirtyend = (lastpg+1) * PAGE_SIZE;
+ /* clip to EOF */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend)
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ } else
+ bp->nb_dirtyend = on+n;
+ }
+ }
+
+ /*
+ * Are we extending the size of the file with this write?
+ * If so, update file size now that we have the block.
+ * If there was a partial buf at the old eof, validate
+ * and zero the new bytes.
+ */
+ if ((uio_offset(uio) + n) > (off_t)np->n_size) {
+ struct nfsbuf *eofbp = NULL;
+ daddr64_t eofbn = np->n_size / biosize;
+ int eofoff = np->n_size % biosize;
+ int neweofoff = (uio_offset(uio) + n) % biosize;
+
+ FSDBG(515, 0xb1ffa000, uio_offset(uio) + n, eofoff, neweofoff);
+
+ if (eofoff && (eofbn < lbn) &&
+ ((error = nfs_buf_get(np, eofbn, biosize, thd, NBLK_WRITE|NBLK_ONLYVALID, &eofbp))))
+ goto out;
+
+ /* if we're extending within the same last block */
+ /* and the block is flagged as being cached... */
+ if ((lbn == eofbn) && ISSET(bp->nb_flags, NB_CACHE)) {
+ /* ...check that all pages in buffer are valid */
+ int endpg = ((neweofoff ? neweofoff : biosize) - 1)/PAGE_SIZE;
+ u_int32_t pagemask;
+ /* pagemask only has to extend to last page being written to */
+ pagemask = (1 << (endpg+1)) - 1;
+ FSDBG(515, 0xb1ffa001, bp->nb_valid, pagemask, 0);
+ if ((bp->nb_valid & pagemask) != pagemask) {
+ /* zerofill any hole */
+ if (on > bp->nb_validend) {
+ int i;
+ for (i=bp->nb_validend/PAGE_SIZE; i <= (on - 1)/PAGE_SIZE; i++)
+ NBPGVALID_SET(bp, i);
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, bp->nb_validend, on - bp->nb_validend, 0xf01e);
+ bzero((char *)bp->nb_data + bp->nb_validend,
+ on - bp->nb_validend);
+ }
+ /* zerofill any trailing data in the last page */
+ if (neweofoff) {
+ NFS_BUF_MAP(bp);
+ FSDBG(516, bp, neweofoff, PAGE_SIZE - (neweofoff & PAGE_MASK), 0xe0f);
+ bzero((char *)bp->nb_data + neweofoff,
+ PAGE_SIZE - (neweofoff & PAGE_MASK));
+ }
+ }
+ }
+ np->n_size = uio_offset(uio) + n;
+ nfs_node_lock_force(np);
+ CLR(np->n_flag, NUPDATESIZE);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+ FSDBG(516, np, np->n_size, np->n_vattr.nva_size, 0xf00d0001);
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ if (eofbp) {
+ /*
+ * We may need to zero any previously invalid data
+ * after the old EOF in the previous EOF buffer.
+ *
+ * For the old last page, don't zero bytes if there
+ * are invalid bytes in that page (i.e. the page isn't
+ * currently valid).
+ * For pages after the old last page, zero them and
+ * mark them as valid.
+ */
+ char *d;
+ int i;
+ if (ioflag & IO_NOCACHE)
+ SET(eofbp->nb_flags, NB_NOCACHE);
+ NFS_BUF_MAP(eofbp);
+ FSDBG(516, eofbp, eofoff, biosize - eofoff, 0xe0fff01e);
+ d = eofbp->nb_data;
+ i = eofoff/PAGE_SIZE;
+ while (eofoff < biosize) {
+ int poff = eofoff & PAGE_MASK;
+ if (!poff || NBPGVALID(eofbp,i)) {
+ bzero(d + eofoff, PAGE_SIZE - poff);
+ NBPGVALID_SET(eofbp, i);
+ }
+ if (bp->nb_validend == eofoff)
+ bp->nb_validend += PAGE_SIZE - poff;
+ eofoff += PAGE_SIZE - poff;
+ i++;
+ }
+ nfs_buf_release(eofbp, 1);
+ }
+ }
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend)
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ /*
+ * UBC doesn't handle partial pages, so we need to make sure
+ * that any pages left in the page cache are completely valid.
+ *
+ * Writes that are smaller than a block are delayed if they
+ * don't extend to the end of the block.
+ *
+ * If the block isn't (completely) cached, we may need to read
+ * in some parts of pages that aren't covered by the write.
+ * If the write offset (on) isn't page aligned, we'll need to
+ * read the start of the first page being written to. Likewise,
+ * if the offset of the end of the write (on+n) isn't page aligned,
+ * we'll need to read the end of the last page being written to.
+ *
+ * Notes:
+ * We don't want to read anything we're just going to write over.
+ * We don't want to issue multiple I/Os if we don't have to
+ * (because they're synchronous rpcs).
+ * We don't want to read anything we already have modified in the
+ * page cache.
+ */
+ if (!ISSET(bp->nb_flags, NB_NOCACHE) && !ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+ int firstpg, lastpg, dirtypg;
+ int firstpgoff, lastpgoff;
+ start = end = -1;
+ firstpg = on/PAGE_SIZE;
+ firstpgoff = on & PAGE_MASK;
+ lastpg = (on+n-1)/PAGE_SIZE;
+ lastpgoff = (on+n) & PAGE_MASK;
+ if (firstpgoff && !NBPGVALID(bp,firstpg)) {
+ /* need to read start of first page */
+ start = firstpg * PAGE_SIZE;
+ end = start + firstpgoff;
+ }
+ if (lastpgoff && !NBPGVALID(bp,lastpg)) {
+ /* need to read end of last page */
+ if (start < 0)
+ start = (lastpg * PAGE_SIZE) + lastpgoff;
+ end = (lastpg + 1) * PAGE_SIZE;
+ }
+ if (end > start) {
+ /* need to read the data in range: start...end-1 */
+
+ /* first, check for dirty pages in between */
+ /* if there are, we'll have to do two reads because */
+ /* we don't want to overwrite the dirty pages. */
+ for (dirtypg=start/PAGE_SIZE; dirtypg <= (end-1)/PAGE_SIZE; dirtypg++)
+ if (NBPGDIRTY(bp,dirtypg))
+ break;
+
+ /* if start is at beginning of page, try */
+ /* to get any preceeding pages as well. */
+ if (!(start & PAGE_MASK)) {
+ /* stop at next dirty/valid page or start of block */
+ for (; start > 0; start-=PAGE_SIZE)
+ if (NBPGVALID(bp,((start-1)/PAGE_SIZE)))
+ break;
+ }
+
+ NFS_BUF_MAP(bp);
+ /* setup uio for read(s) */
+ boff = NBOFF(bp);
+ auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+ &auio_buf, sizeof(auio_buf));
+
+ if (dirtypg <= (end-1)/PAGE_SIZE) {
+ /* there's a dirty page in the way, so just do two reads */
+ /* we'll read the preceding data here */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) /* couldn't read the data, so treat buffer as NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < on))
+ bp->nb_validend = on;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset */
+ for (; start < on/PAGE_SIZE; start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ }
+ /* adjust start to read any trailing data */
+ start = on+n;
+ }
+
+ /* if end is at end of page, try to */
+ /* get any following pages as well. */
+ if (!(end & PAGE_MASK)) {
+ /* stop at next valid page or end of block */
+ for (; end < biosize; end+=PAGE_SIZE)
+ if (NBPGVALID(bp,end/PAGE_SIZE))
+ break;
+ }
+
+ if (((boff+start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ bzero(bp->nb_data + start, end - start);
+ error = 0;
+ } else if (!ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) /* couldn't read the data, so treat buffer as NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end))
+ bp->nb_validend = end;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE)
+ NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
+ }
+ /* Note: pages being written to will be validated when written */
+ }
+ }
+
+ if (ISSET(bp->nb_flags, NB_ERROR)) {
+ error = bp->nb_error;
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ nfs_node_lock_force(np);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+
+ NFS_BUF_MAP(bp);
+ error = uiomove((char *)bp->nb_data + on, n, uio);
+ if (error) {
+ SET(bp->nb_flags, NB_ERROR);
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ /* validate any pages written to */
+ start = on & ~PAGE_MASK;
+ for (; start < on+n; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /*
+ * This may seem a little weird, but we don't actually set the
+ * dirty bits for writes. This is because we keep the dirty range
+ * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
+ * delayed writes, when we give the pages back to the VM we don't
+ * want to keep them marked dirty, because when we later write the
+ * buffer we won't be able to tell which pages were written dirty
+ * and which pages were mmapped and dirtied.
+ */
+ }
+ if (bp->nb_dirtyend > 0) {
+ bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
+ bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
+ } else {
+ bp->nb_dirtyoff = on;
+ bp->nb_dirtyend = on + n;
+ }
+ if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
+ bp->nb_validoff > bp->nb_dirtyend) {
+ bp->nb_validoff = bp->nb_dirtyoff;
+ bp->nb_validend = bp->nb_dirtyend;
+ } else {
+ bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
+ bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE))
+ nfs_buf_normalize_valid_range(np, bp);
+
+ /*
+ * Since this block is being modified, it must be written
+ * again and not just committed.
+ */
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ nfs_node_lock_force(np);
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ np->n_needcommitcnt--;
+ CHECK_NEEDCOMMITCNT(np);
+ }
+ CLR(bp->nb_flags, NB_NEEDCOMMIT);
+ nfs_node_unlock(np);
+ }
+
+ if (ioflag & IO_SYNC) {
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
+ (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
+ SET(bp->nb_flags, NB_ASYNC);
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ } else {
+ /* If the block wasn't already delayed: charge for the write */
+ if (!ISSET(bp->nb_flags, NB_DELWRI)) {
+ proc_t p = vfs_context_proc(ctx);
+ if (p && p->p_stats)
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
+ }
+ nfs_buf_write_delayed(bp);
+ }
+ if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS)
+ nfs_flushcommits(np, 1);
+
+ } while (uio_resid(uio) > 0 && n > 0);
+
+out:
+ nfs_node_lock_force(np);
+ np->n_wrbusy--;
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return (error);
+}
+
+
+/*
+ * NFS write call
+ */
+int
+nfs_write_rpc(
+ nfsnode_t np,
+ uio_t uio,
+ vfs_context_t ctx,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
+}
+
+int
+nfs_write_rpc2(
+ nfsnode_t np,
+ uio_t uio,
+ thread_t thd,
+ kauth_cred_t cred,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ int backup, wverfset, commit, committed;
+ uint64_t wverf = 0, wverf2;
+ size_t nmwsize, totalsize, tsiz, len, rlen;
+ struct nfsreq rq, *req = &rq;
+ uint32_t stategenid = 0, vrestart = 0, restart = 0;
+
+#if DIAGNOSTIC
+ /* XXX limitation based on need to back up uio on short write */
+ if (uio_iovcnt(uio) != 1)
+ panic("nfs3_write_rpc: iovcnt > 1");
+#endif
+ FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+ nmwsize = nmp->nm_wsize;
+
+ wverfset = 0;
+ committed = NFS_WRITE_FILESYNC;
+
+ totalsize = tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return (EFBIG);
+ }
+
+ while (tsiz > 0) {
+ len = (tsiz > nmwsize) ? nmwsize : tsiz;
+ FSDBG(537, np, uio_offset(uio), len, 0);
+ if (nmp->nm_vers >= NFS_VER4)
+ stategenid = nmp->nm_stategenid;
+ error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
+ if (!error)
+ error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ error = ENXIO;
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
+ printf("nfs_write_rpc: error %d, initiating recovery\n", error);
+ nmp->nm_state |= NFSSTA_RECOVER;
+ nfs_mount_sock_thread_wake(nmp);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (error == NFSERR_GRACE)
+ tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+ continue;
+ }
+ if (error)
+ break;
+ if (nfsvers == NFS_VER2) {
+ tsiz -= len;
+ continue;
+ }
+
+ /* check for a short write */
+ if (rlen < len) {
+ backup = len - rlen;
+ uio_pushback(uio, backup);
+ len = rlen;
+ }
+
+ /* return lowest commit level returned */
+ if (commit < committed)
+ committed = commit;
+
+ tsiz -= len;
+
+ /* check write verifier */
+ if (!wverfset) {
+ wverf = wverf2;
+ wverfset = 1;
+ } else if (wverf != wverf2) {
+ /* verifier changed, so we need to restart all the writes */
+ if (++vrestart > 100) {
+ /* give up after too many restarts */
+ error = EIO;
+ break;
+ }
+ backup = totalsize - tsiz;
+ uio_pushback(uio, backup);
+ committed = NFS_WRITE_FILESYNC;
+ wverfset = 0;
+ tsiz = totalsize;
+ }
+ }
+ if (wverfset && wverfp)
+ *wverfp = wverf;
+ *iomodep = committed;
+ if (error)
+ uio_setresid(uio, tsiz);
+ FSDBG_BOT(537, np, committed, uio_resid(uio), error);
+ return (error);
+}
+
+int
+nfs3_write_rpc_async(
+ nfsnode_t np,
+ uio_t uio,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ int iomode,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, iomode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, 0);
+ nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsmout_if(error);
+ error = nfsm_chain_add_uio(&nmreq, uio, len);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return (error);
+}
+
+int
+nfs3_write_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ int *iomodep,
+ size_t *rlenp,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
+ u_int64_t xid, wverf;
+ mount_t mp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (!nmp) {
+ nfs_request_async_cancel(req);
+ return (ENXIO);
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) /* async request restarted */
+ return (error);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ error = ENXIO;
+ if (!error && (lockerror = nfs_node_lock(np)))
+ error = lockerror;
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { 0, 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
+ updatemtime = 1;
+ if (!error)
+ error = status;
+ nfsm_chain_get_32(error, &nmrep, rlen);
+ nfsmout_if(error);
+ *rlenp = rlen;
+ if (rlen <= 0)
+ error = NFSERR_IO;
+ nfsm_chain_get_32(error, &nmrep, committed);
+ nfsm_chain_get_64(error, &nmrep, wverf);
+ nfsmout_if(error);
+ if (wverfp)
+ *wverfp = wverf;
+ lck_mtx_lock(&nmp->nm_lock);
+ if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
+ nmp->nm_verf = wverf;
+ nmp->nm_state |= NFSSTA_HASWRITEVERF;
+ } else if (nmp->nm_verf != wverf) {
+ nmp->nm_verf = wverf;
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ } else {
+ if (!error)
+ error = status;
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+ nfsmout_if(error);
+ }
+ if (updatemtime)
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+nfsmout:
+ if (!lockerror)
+ nfs_node_unlock(np);
+ nfsm_chain_cleanup(&nmrep);
+ if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
+ ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
+ committed = NFS_WRITE_FILESYNC;
+ *iomodep = committed;
+ return (error);
+}
+
+/*
+ * NFS mknod vnode op
+ *
+ * For NFS v2 this is a kludge. Use a create RPC but with the IFMT bits of the
+ * mode set to specify the file type and the size field for rdev.
+ */
+int
+nfs3_vnop_mknod(
+ struct vnop_mknod_args /* {
+ struct vnodeop_desc *a_desc;
+ vnode_t a_dvp;
+ vnode_t *a_vpp;
+ struct componentname *a_cnp;
+ struct vnode_attr *a_vap;
+ vfs_context_t a_context;
+ } */ *ap)
+{
+ vnode_t dvp = ap->a_dvp;
+ vnode_t *vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode_attr *vap = ap->a_vap;
+ vfs_context_t ctx = ap->a_context;
+ vnode_t newvp = NULL;
+ nfsnode_t np = NULL;
+ struct nfsmount *nmp;
+ nfsnode_t dnp = VTONFS(dvp);
+ struct nfs_vattr nvattr, dnvattr;
+ fhandle_t fh;
+ int error = 0, lockerror = ENOENT, busyerror = ENOENT, status, wccpostattr = 0;
+ struct timespec premtime = { 0, 0 };
+ u_int32_t rdev;
+ u_int64_t xid, dxid;
+ int nfsvers, gotuid, gotgid;
+ struct nfsm_chain nmreq, nmrep;
+
+ nmp = VTONMP(dvp);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+
+ if (!VATTR_IS_ACTIVE(vap, va_type))
+ return (EINVAL);
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ if (!VATTR_IS_ACTIVE(vap, va_rdev))
+ return (EINVAL);
+ rdev = vap->va_rdev;
+ } else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
+ rdev = 0xffffffff;
+ else {
+ return (ENOTSUP);
+ }
+ if ((nfsvers == NFS_VER2) && (cnp->cn_namelen > NFS_MAXNAMLEN))
+ return (ENAMETOOLONG);
+
+ VATTR_SET_SUPPORTED(vap, va_mode);
+ VATTR_SET_SUPPORTED(vap, va_uid);
+ VATTR_SET_SUPPORTED(vap, va_gid);
+ VATTR_SET_SUPPORTED(vap, va_data_size);
+ VATTR_SET_SUPPORTED(vap, va_access_time);
+ VATTR_SET_SUPPORTED(vap, va_modify_time);
+ gotuid = VATTR_IS_ACTIVE(vap, va_uid);
+ gotgid = VATTR_IS_ACTIVE(vap, va_gid);
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_null(&nmrep);
+
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 4 * NFSX_UNSIGNED +
+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(nfsvers));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, dnp->n_fhp, dnp->n_fhsize);
+ nfsm_chain_add_string(error, &nmreq, cnp->cn_nameptr, cnp->cn_namelen);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_32(error, &nmreq, vtonfs_type(vap->va_type, nfsvers));
+ nfsm_chain_add_v3sattr(error, &nmreq, vap);
+ if (vap->va_type == VCHR || vap->va_type == VBLK) {
+ nfsm_chain_add_32(error, &nmreq, major(vap->va_rdev));
+ nfsm_chain_add_32(error, &nmreq, minor(vap->va_rdev));
+ }
+ } else {
+ nfsm_chain_add_v2sattr(error, &nmreq, vap, rdev);
+ }
+ nfsm_chain_build_done(error, &nmreq);
+ if (!error)
+ error = busyerror = nfs_node_set_busy(dnp, vfs_context_thread(ctx));
+ nfsmout_if(error);
+
+ error = nfs_request(dnp, NULL, &nmreq, NFSPROC_MKNOD, ctx, &nmrep, &xid, &status);
+
+ if ((lockerror = nfs_node_lock(dnp)))
+ error = lockerror;
+ /* XXX no EEXIST kludge here? */
+ dxid = xid;
+ if (!error && !status) {
+ if (dnp->n_flag & NNEGNCENTRIES) {
+ dnp->n_flag &= ~NNEGNCENTRIES;
+ cache_purge_negatives(dvp);
+ }
+ error = nfsm_chain_get_fh_attr(&nmrep, dnp, ctx, nfsvers, &xid, &fh, &nvattr);
+ }
+ if (nfsvers == NFS_VER3)
+ nfsm_chain_get_wcc_data(error, &nmrep, dnp, &premtime, &wccpostattr, &dxid);
+ if (!error)
+ error = status;
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ nfsm_chain_cleanup(&nmrep);
+
+ if (!lockerror) {
+ dnp->n_flag |= NMODIFIED;
+ /* if directory hadn't changed, update namecache mtime */
+ if (nfstimespeccmp(&dnp->n_ncmtime, &premtime, ==))
+ NFS_CHANGED_UPDATE_NC(nfsvers, dnp, &dnp->n_vattr);
+ nfs_node_unlock(dnp);
+ /* nfs_getattr() will check changed and purge caches */
+ nfs_getattr(dnp, &dnvattr, ctx, wccpostattr ? NGA_CACHED : NGA_UNCACHED);
+ }
+
+ if (!error && fh.fh_len)
+ error = nfs_nget(NFSTOMP(dnp), dnp, cnp, fh.fh_data, fh.fh_len, &nvattr, &xid, NG_MAKEENTRY, &np);
+ if (!error && !np)
+ error = nfs_lookitup(dnp, cnp->cn_nameptr, cnp->cn_namelen, ctx, &np);
+ if (!error && np)
+ newvp = NFSTOV(np);
+ if (!busyerror)
+ nfs_node_clear_busy(dnp);
+
+ if (!error && (gotuid || gotgid) &&
+ (!newvp || nfs_getattrcache(np, &nvattr) ||
+ (gotuid && (nvattr.nva_uid != vap->va_uid)) ||
+ (gotgid && (nvattr.nva_gid != vap->va_gid)))) {
+ /* clear ID bits if server didn't use them (or we can't tell) */
+ VATTR_CLEAR_SUPPORTED(vap, va_uid);
+ VATTR_CLEAR_SUPPORTED(vap, va_gid);
+ }
+ if (error) {
+ if (newvp) {
+ nfs_node_unlock(np);
+ vnode_put(newvp);