+ if (((boff+start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ bzero(bp->nb_data + start, end - start);
+ error = 0;
+ } else if (!ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) /* couldn't read the data, so treat buffer as NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end))
+ bp->nb_validend = end;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE)
+ NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
+ }
+ /* Note: pages being written to will be validated when written */
+ }
+ }
+
+ if (ISSET(bp->nb_flags, NB_ERROR)) {
+ error = bp->nb_error;
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ nfs_node_lock_force(np);
+ np->n_flag |= NMODIFIED;
+ nfs_node_unlock(np);
+
+ NFS_BUF_MAP(bp);
+ error = uiomove((char *)bp->nb_data + on, n, uio);
+ if (error) {
+ SET(bp->nb_flags, NB_ERROR);
+ nfs_buf_release(bp, 1);
+ goto out;
+ }
+
+ /* validate any pages written to */
+ start = on & ~PAGE_MASK;
+ for (; start < on+n; start += PAGE_SIZE) {
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /*
+ * This may seem a little weird, but we don't actually set the
+ * dirty bits for writes. This is because we keep the dirty range
+ * in the nb_dirtyoff/nb_dirtyend fields. Also, particularly for
+ * delayed writes, when we give the pages back to the VM we don't
+ * want to keep them marked dirty, because when we later write the
+ * buffer we won't be able to tell which pages were written dirty
+ * and which pages were mmapped and dirtied.
+ */
+ }
+ if (bp->nb_dirtyend > 0) {
+ bp->nb_dirtyoff = min(on, bp->nb_dirtyoff);
+ bp->nb_dirtyend = max((on + n), bp->nb_dirtyend);
+ } else {
+ bp->nb_dirtyoff = on;
+ bp->nb_dirtyend = on + n;
+ }
+ if (bp->nb_validend <= 0 || bp->nb_validend < bp->nb_dirtyoff ||
+ bp->nb_validoff > bp->nb_dirtyend) {
+ bp->nb_validoff = bp->nb_dirtyoff;
+ bp->nb_validend = bp->nb_dirtyend;
+ } else {
+ bp->nb_validoff = min(bp->nb_validoff, bp->nb_dirtyoff);
+ bp->nb_validend = max(bp->nb_validend, bp->nb_dirtyend);
+ }
+ if (!ISSET(bp->nb_flags, NB_CACHE))
+ nfs_buf_normalize_valid_range(np, bp);
+
+ /*
+ * Since this block is being modified, it must be written
+ * again and not just committed.
+ */
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ nfs_node_lock_force(np);
+ if (ISSET(bp->nb_flags, NB_NEEDCOMMIT)) {
+ np->n_needcommitcnt--;
+ CHECK_NEEDCOMMITCNT(np);
+ }
+ CLR(bp->nb_flags, NB_NEEDCOMMIT);
+ nfs_node_unlock(np);
+ }
+
+ if (ioflag & IO_SYNC) {
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ } else if (((n + on) == biosize) || (ioflag & IO_APPEND) ||
+ (ioflag & IO_NOCACHE) || ISSET(bp->nb_flags, NB_NOCACHE)) {
+ SET(bp->nb_flags, NB_ASYNC);
+ error = nfs_buf_write(bp);
+ if (error)
+ goto out;
+ } else {
+ /* If the block wasn't already delayed: charge for the write */
+ if (!ISSET(bp->nb_flags, NB_DELWRI)) {
+ proc_t p = vfs_context_proc(ctx);
+ if (p && p->p_stats)
+ OSIncrementAtomicLong(&p->p_stats->p_ru.ru_oublock);
+ }
+ nfs_buf_write_delayed(bp);
+ }
+ if (np->n_needcommitcnt >= NFS_A_LOT_OF_NEEDCOMMITS)
+ nfs_flushcommits(np, 1);
+
+ } while (uio_resid(uio) > 0 && n > 0);
+
+out:
+ nfs_node_lock_force(np);
+ np->n_wrbusy--;
+ nfs_node_unlock(np);
+ nfs_data_unlock(np);
+ FSDBG_BOT(515, np, uio_offset(uio), uio_resid(uio), error);
+ return (error);
+}
+
+
+/*
+ * NFS write call
+ */
+int
+nfs_write_rpc(
+ nfsnode_t np,
+ uio_t uio,
+ vfs_context_t ctx,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ return nfs_write_rpc2(np, uio, vfs_context_thread(ctx), vfs_context_ucred(ctx), iomodep, wverfp);
+}
+
+int
+nfs_write_rpc2(
+ nfsnode_t np,
+ uio_t uio,
+ thread_t thd,
+ kauth_cred_t cred,
+ int *iomodep,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ int backup, wverfset, commit, committed;
+ uint64_t wverf = 0, wverf2;
+ size_t nmwsize, totalsize, tsiz, len, rlen;
+ struct nfsreq rq, *req = &rq;
+ uint32_t stategenid = 0, vrestart = 0, restart = 0;
+
+#if DIAGNOSTIC
+ /* XXX limitation based on need to back up uio on short write */
+ if (uio_iovcnt(uio) != 1)
+ panic("nfs3_write_rpc: iovcnt > 1");
+#endif
+ FSDBG_TOP(537, np, uio_offset(uio), uio_resid(uio), *iomodep);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+ nmwsize = nmp->nm_wsize;
+
+ wverfset = 0;
+ committed = NFS_WRITE_FILESYNC;
+
+ totalsize = tsiz = uio_resid(uio);
+ if ((nfsvers == NFS_VER2) && ((uint64_t)(uio_offset(uio) + tsiz) > 0xffffffffULL)) {
+ FSDBG_BOT(537, np, uio_offset(uio), uio_resid(uio), EFBIG);
+ return (EFBIG);
+ }
+
+ while (tsiz > 0) {
+ len = (tsiz > nmwsize) ? nmwsize : tsiz;
+ FSDBG(537, np, uio_offset(uio), len, 0);
+ if (nmp->nm_vers >= NFS_VER4)
+ stategenid = nmp->nm_stategenid;
+ error = nmp->nm_funcs->nf_write_rpc_async(np, uio, len, thd, cred, *iomodep, NULL, &req);
+ if (!error)
+ error = nmp->nm_funcs->nf_write_rpc_async_finish(np, req, &commit, &rlen, &wverf2);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ error = ENXIO;
+ if ((nmp->nm_vers >= NFS_VER4) && nfs_mount_state_error_should_restart(error) &&
+ (++restart <= nfs_mount_state_max_restarts(nmp))) { /* guard against no progress */
+ lck_mtx_lock(&nmp->nm_lock);
+ if ((error != NFSERR_GRACE) && (stategenid == nmp->nm_stategenid) && !(nmp->nm_state & NFSSTA_RECOVER)) {
+ printf("nfs_write_rpc: error %d, initiating recovery\n", error);
+ nmp->nm_state |= NFSSTA_RECOVER;
+ nfs_mount_sock_thread_wake(nmp);
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ if (error == NFSERR_GRACE)
+ tsleep(&nmp->nm_state, (PZERO-1), "nfsgrace", 2*hz);
+ if (!(error = nfs_mount_state_wait_for_recovery(nmp)))
+ continue;
+ }
+ if (error)
+ break;
+ if (nfsvers == NFS_VER2) {
+ tsiz -= len;
+ continue;
+ }
+
+ /* check for a short write */
+ if (rlen < len) {
+ backup = len - rlen;
+ uio_pushback(uio, backup);
+ len = rlen;
+ }
+
+ /* return lowest commit level returned */
+ if (commit < committed)
+ committed = commit;
+
+ tsiz -= len;
+
+ /* check write verifier */
+ if (!wverfset) {
+ wverf = wverf2;
+ wverfset = 1;
+ } else if (wverf != wverf2) {
+ /* verifier changed, so we need to restart all the writes */
+ if (++vrestart > 100) {
+ /* give up after too many restarts */
+ error = EIO;
+ break;
+ }
+ backup = totalsize - tsiz;
+ uio_pushback(uio, backup);
+ committed = NFS_WRITE_FILESYNC;
+ wverfset = 0;
+ tsiz = totalsize;
+ }
+ }
+ if (wverfset && wverfp)
+ *wverfp = wverf;
+ *iomodep = committed;
+ if (error)
+ uio_setresid(uio, tsiz);
+ FSDBG_BOT(537, np, committed, uio_resid(uio), error);
+ return (error);
+}
+
+int
+nfs3_write_rpc_async(
+ nfsnode_t np,
+ uio_t uio,
+ size_t len,
+ thread_t thd,
+ kauth_cred_t cred,
+ int iomode,
+ struct nfsreq_cbinfo *cb,
+ struct nfsreq **reqp)
+{
+ struct nfsmount *nmp;
+ int error = 0, nfsvers;
+ struct nfsm_chain nmreq;
+
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ return (ENXIO);
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmreq);
+ nfsm_chain_build_alloc_init(error, &nmreq,
+ NFSX_FH(nfsvers) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
+ nfsm_chain_add_fh(error, &nmreq, nfsvers, np->n_fhp, np->n_fhsize);
+ if (nfsvers == NFS_VER3) {
+ nfsm_chain_add_64(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsm_chain_add_32(error, &nmreq, iomode);
+ } else {
+ nfsm_chain_add_32(error, &nmreq, 0);
+ nfsm_chain_add_32(error, &nmreq, uio_offset(uio));
+ nfsm_chain_add_32(error, &nmreq, 0);
+ }
+ nfsm_chain_add_32(error, &nmreq, len);
+ nfsmout_if(error);
+ error = nfsm_chain_add_uio(&nmreq, uio, len);
+ nfsm_chain_build_done(error, &nmreq);
+ nfsmout_if(error);
+ error = nfs_request_async(np, NULL, &nmreq, NFSPROC_WRITE, thd, cred, cb, reqp);
+nfsmout:
+ nfsm_chain_cleanup(&nmreq);
+ return (error);
+}
+
+int
+nfs3_write_rpc_async_finish(
+ nfsnode_t np,
+ struct nfsreq *req,
+ int *iomodep,
+ size_t *rlenp,
+ uint64_t *wverfp)
+{
+ struct nfsmount *nmp;
+ int error = 0, lockerror = ENOENT, nfsvers, status;
+ int updatemtime = 0, wccpostattr = 0, rlen, committed = NFS_WRITE_FILESYNC;
+ u_int64_t xid, wverf;
+ mount_t mp;
+ struct nfsm_chain nmrep;
+
+ nmp = NFSTONMP(np);
+ if (!nmp) {
+ nfs_request_async_cancel(req);
+ return (ENXIO);
+ }
+ nfsvers = nmp->nm_vers;
+
+ nfsm_chain_null(&nmrep);
+
+ error = nfs_request_async_finish(req, &nmrep, &xid, &status);
+ if (error == EINPROGRESS) /* async request restarted */
+ return (error);
+ nmp = NFSTONMP(np);
+ if (!nmp)
+ error = ENXIO;
+ if (!error && (lockerror = nfs_node_lock(np)))
+ error = lockerror;
+ if (nfsvers == NFS_VER3) {
+ struct timespec premtime = { 0, 0 };
+ nfsm_chain_get_wcc_data(error, &nmrep, np, &premtime, &wccpostattr, &xid);
+ if (nfstimespeccmp(&np->n_mtime, &premtime, ==))
+ updatemtime = 1;
+ if (!error)
+ error = status;
+ nfsm_chain_get_32(error, &nmrep, rlen);
+ nfsmout_if(error);
+ *rlenp = rlen;
+ if (rlen <= 0)
+ error = NFSERR_IO;
+ nfsm_chain_get_32(error, &nmrep, committed);
+ nfsm_chain_get_64(error, &nmrep, wverf);
+ nfsmout_if(error);
+ if (wverfp)
+ *wverfp = wverf;
+ lck_mtx_lock(&nmp->nm_lock);
+ if (!(nmp->nm_state & NFSSTA_HASWRITEVERF)) {
+ nmp->nm_verf = wverf;
+ nmp->nm_state |= NFSSTA_HASWRITEVERF;
+ } else if (nmp->nm_verf != wverf) {
+ nmp->nm_verf = wverf;
+ }
+ lck_mtx_unlock(&nmp->nm_lock);
+ } else {
+ if (!error)
+ error = status;
+ nfsm_chain_loadattr(error, &nmrep, np, nfsvers, NULL, &xid);
+ nfsmout_if(error);
+ }
+ if (updatemtime)
+ NFS_CHANGED_UPDATE(nfsvers, np, &np->n_vattr);
+nfsmout:
+ if (!lockerror)
+ nfs_node_unlock(np);
+ nfsm_chain_cleanup(&nmrep);
+ if ((committed != NFS_WRITE_FILESYNC) && nfs_allow_async &&
+ ((mp = NFSTOMP(np))) && (vfs_flags(mp) & MNT_ASYNC))
+ committed = NFS_WRITE_FILESYNC;
+ *iomodep = committed;