+ }
+ /*
+ * If dirtyend exceeds file size, chop it down. This should
+ * not occur unless there is a race.
+ */
+ if (NBOFF(bp) + bp->nb_dirtyend > (off_t)np->n_size) {
+ bp->nb_dirtyend = np->n_size - NBOFF(bp);
+ if (bp->nb_dirtyoff >= bp->nb_dirtyend)
+ bp->nb_dirtyoff = bp->nb_dirtyend = 0;
+ }
+ /*
+ * UBC doesn't handle partial pages, so we need to make sure
+ * that any pages left in the page cache are completely valid.
+ *
+ * Writes that are smaller than a block are delayed if they
+ * don't extend to the end of the block.
+ *
+ * If the block isn't (completely) cached, we may need to read
+ * in some parts of pages that aren't covered by the write.
+ * If the write offset (on) isn't page aligned, we'll need to
+ * read the start of the first page being written to. Likewise,
+ * if the offset of the end of the write (on+n) isn't page aligned,
+ * we'll need to read the end of the last page being written to.
+ *
+ * Notes:
+ * We don't want to read anything we're just going to write over.
+ * We don't want to read anything we're just going drop when the
+ * I/O is complete (i.e. don't do reads for NOCACHE requests).
+ * We don't want to issue multiple I/Os if we don't have to
+ * (because they're synchronous rpcs).
+ * We don't want to read anything we already have modified in the
+ * page cache.
+ */
+ if (!ISSET(bp->nb_flags, NB_CACHE) && (n < biosize)) {
+ int firstpg, lastpg, dirtypg;
+ int firstpgoff, lastpgoff;
+ start = end = -1;
+ firstpg = on/PAGE_SIZE;
+ firstpgoff = on & PAGE_MASK;
+ lastpg = (on+n-1)/PAGE_SIZE;
+ lastpgoff = (on+n) & PAGE_MASK;
+ if (firstpgoff && !NBPGVALID(bp,firstpg)) {
+ /* need to read start of first page */
+ start = firstpg * PAGE_SIZE;
+ end = start + firstpgoff;
+ }
+ if (lastpgoff && !NBPGVALID(bp,lastpg)) {
+ /* need to read end of last page */
+ if (start < 0)
+ start = (lastpg * PAGE_SIZE) + lastpgoff;
+ end = (lastpg + 1) * PAGE_SIZE;
+ }
+ if (ISSET(bp->nb_flags, NB_NOCACHE)) {
+ /*
+ * For nocache writes, if there is any partial page at the
+ * start or end of the write range, then we do the write
+ * synchronously to make sure that we can drop the data
+ * from the cache as soon as the WRITE finishes. Normally,
+ * we would do an unstable write and not drop the data until
+ * it was committed. But doing that here would risk allowing
+ * invalid data to be read from the cache between the WRITE
+ * and the COMMIT.
+ * (NB_STABLE indicates that data writes should be FILESYNC)
+ */
+ if (end > start)
+ SET(bp->nb_flags, NB_STABLE);
+ goto skipread;
+ }
+ if (end > start) {
+ /* need to read the data in range: start...end-1 */
+
+ /* first, check for dirty pages in between */
+ /* if there are, we'll have to do two reads because */
+ /* we don't want to overwrite the dirty pages. */
+ for (dirtypg=start/PAGE_SIZE; dirtypg <= (end-1)/PAGE_SIZE; dirtypg++)
+ if (NBPGDIRTY(bp,dirtypg))
+ break;
+
+ /* if start is at beginning of page, try */
+ /* to get any preceeding pages as well. */
+ if (!(start & PAGE_MASK)) {
+ /* stop at next dirty/valid page or start of block */
+ for (; start > 0; start-=PAGE_SIZE)
+ if (NBPGVALID(bp,((start-1)/PAGE_SIZE)))
+ break;
+ }
+
+ NFS_BUF_MAP(bp);
+ /* setup uio for read(s) */
+ boff = NBOFF(bp);
+ auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ,
+ &auio_buf, sizeof(auio_buf));
+
+ if (dirtypg <= (end-1)/PAGE_SIZE) {
+ /* there's a dirty page in the way, so just do two reads */
+ /* we'll read the preceding data here */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), on - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee01);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < on))
+ bp->nb_validend = on;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset */
+ for (; start < on/PAGE_SIZE; start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ }
+ /* adjust start to read any trailing data */
+ start = on+n;
+ }
+
+ /* if end is at end of page, try to */
+ /* get any following pages as well. */
+ if (!(end & PAGE_MASK)) {
+ /* stop at next valid page or end of block */
+ for (; end < biosize; end+=PAGE_SIZE)
+ if (NBPGVALID(bp,end/PAGE_SIZE))
+ break;
+ }
+
+ if (((boff+start) >= (off_t)np->n_size) ||
+ ((start >= on) && ((boff + on + n) >= (off_t)np->n_size))) {
+ /*
+ * Either this entire read is beyond the current EOF
+ * or the range that we won't be modifying (on+n...end)
+ * is all beyond the current EOF.
+ * No need to make a trip across the network to
+ * read nothing. So, just zero the buffer instead.
+ */
+ FSDBG(516, bp, start, end - start, 0xd00dee00);
+ bzero(bp->nb_data + start, end - start);
+ error = 0;
+ } else {
+ /* now we'll read the (rest of the) data */
+ uio_reset(auio, boff + start, UIO_SYSSPACE, UIO_READ);
+ uio_addiov(auio, CAST_USER_ADDR_T(bp->nb_data + start), end - start);
+ error = nfs_read_rpc(np, auio, ctx);
+ if (error) {
+ /* couldn't read the data, so treat buffer as synchronous NOCACHE */
+ SET(bp->nb_flags, (NB_NOCACHE|NB_STABLE));
+ goto skipread;
+ }
+ if (uio_resid(auio) > 0) {
+ FSDBG(516, bp, (caddr_t)uio_curriovbase(auio) - bp->nb_data, uio_resid(auio), 0xd00dee02);
+ bzero(CAST_DOWN(caddr_t, uio_curriovbase(auio)), uio_resid(auio));
+ }
+ }
+ if (!error) {
+ /* update validoff/validend if necessary */
+ if ((bp->nb_validoff < 0) || (bp->nb_validoff > start))
+ bp->nb_validoff = start;
+ if ((bp->nb_validend < 0) || (bp->nb_validend < end))
+ bp->nb_validend = end;
+ if ((off_t)np->n_size > boff + bp->nb_validend)
+ bp->nb_validend = min(np->n_size - (boff + start), biosize);
+ /* validate any pages before the write offset's page */
+ for (; start < (off_t)trunc_page_32(on); start+=PAGE_SIZE)
+ NBPGVALID_SET(bp, start/PAGE_SIZE);
+ /* validate any pages after the range of pages being written to */
+ for (; (end - 1) > (off_t)round_page_32(on+n-1); end-=PAGE_SIZE)
+ NBPGVALID_SET(bp, (end-1)/PAGE_SIZE);
+ }
+ /* Note: pages being written to will be validated when written */