+ /*
+ * Resize nfsnode *after* we busy the buffer to prevent
+ * readers from reading garbage.
+ * If there was a partial buf at the old eof, validate
+ * and zero the new bytes.
+ */
+ if (uio->uio_offset + n > np->n_size) {
+ struct buf *bp0 = NULL;
+ daddr_t bn = np->n_size / biosize;
+ int off = np->n_size & (biosize - 1);
+
+ if (off && bn < lbn && incore(vp, bn))
+ bp0 = nfs_getcacheblk(vp, bn, biosize, p,
+ BLK_WRITE);
+ np->n_flag |= NMODIFIED;
+ np->n_size = uio->uio_offset + n;
+ ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */
+ if (bp0) {
+ bzero((char *)bp0->b_data + off, biosize - off);
+ bp0->b_validend = biosize;
+ brelse(bp0);
+ }
+ }
+ /*
+ * NFS has embedded ucred so crhold() risks zone corruption
+ */
+ if (bp->b_wcred == NOCRED)
+ bp->b_wcred = crdup(cred);
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend >
+ np->n_size)
+ bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno *
+ DEV_BSIZE;
+ /*
+ * UBC doesn't (yet) handle partial pages so nfs_biowrite was
+ * hacked to never bdwrite, to start every little write right
+ * away. Running IE Avie noticed the performance problem, thus
+ * this code, which permits those delayed writes by ensuring an
+ * initial read of the entire page. The read may hit eof
+ * ("short read") but that we will handle.
+ *
+ * We are quite dependant on the correctness of B_CACHE so check
+ * that first in case of problems.
+ */
+ if (!ISSET(bp->b_flags, B_CACHE) && n < PAGE_SIZE) {
+ boff = (off_t)bp->b_blkno * DEV_BSIZE;
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = boff;
+ auio.uio_resid = PAGE_SIZE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_procp = p;
+ iov.iov_base = bp->b_data;
+ iov.iov_len = PAGE_SIZE;
+ error = nfs_readrpc(vp, &auio, cred);
+ if (error) {
+ bp->b_error = error;
+ SET(bp->b_flags, B_ERROR);
+ printf("nfs_write: readrpc %d", error);
+ }
+ if (auio.uio_resid > 0)
+ bzero(iov.iov_base, auio.uio_resid);
+ bp->b_validoff = 0;
+ bp->b_validend = PAGE_SIZE - auio.uio_resid;
+ if (np->n_size > boff + bp->b_validend)
+ bp->b_validend = min(np->n_size - boff,
+ PAGE_SIZE);
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = 0;
+ }
+
+ /*
+ * If the new write will leave a contiguous dirty
+ * area, just update the b_dirtyoff and b_dirtyend,
+ * otherwise try to extend the dirty region.
+ */
+ if (bp->b_dirtyend > 0 &&
+ (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+ off_t start, end;
+
+ boff = (off_t)bp->b_blkno * DEV_BSIZE;
+ if (on > bp->b_dirtyend) {
+ start = boff + bp->b_validend;
+ end = boff + on;
+ } else {
+ start = boff + on + n;
+ end = boff + bp->b_validoff;
+ }
+
+ /*
+ * It may be that the valid region in the buffer
+ * covers the region we want, in which case just
+ * extend the dirty region. Otherwise we try to
+ * extend the valid region.
+ */
+ if (end > start) {
+ auio.uio_iov = &iov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = start;
+ auio.uio_resid = end - start;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_procp = p;
+ iov.iov_base = bp->b_data + (start - boff);
+ iov.iov_len = end - start;
+ error = nfs_readrpc(vp, &auio, cred);
+ /*
+ * If we couldn't read, do not do a VOP_BWRITE
+ * as originally coded. That could also error
+ * and looping back to "again" as it was doing
+ * could have us stuck trying to write same buf
+ * again. nfs_write, will get the entire region
+ * if nfs_readrpc succeeded. If unsuccessful
+ * we should just error out. Errors like ESTALE
+ * would keep us looping rather than transient
+ * errors justifying a retry. We can return here
+ * instead of altering dirty region later. We
+ * did not write old dirty region at this point.
+ */
+ if (error) {
+ bp->b_error = error;
+ SET(bp->b_flags, B_ERROR);
+ printf("nfs_write: readrpc2 %d", error);
+ brelse(bp);
+ return (error);
+ }
+ /*
+ * The read worked.
+ * If there was a short read, just zero fill.
+ */
+ if (auio.uio_resid > 0)
+ bzero(iov.iov_base, auio.uio_resid);
+ if (on > bp->b_dirtyend)
+ bp->b_validend = on;
+ else
+ bp->b_validoff = on + n;
+ }
+ /*
+ * We now have a valid region which extends up to the
+ * dirty region which we want.
+ */
+ if (on > bp->b_dirtyend)
+ bp->b_dirtyend = on;
+ else
+ bp->b_dirtyoff = on + n;
+ }