]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_bio.c
e341a0441e9961ad886f6c4cea00b2ea178d7616
[apple/xnu.git] / bsd / nfs / nfs_bio.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * This code is derived from software contributed to Berkeley by
28 * Rick Macklem at The University of Guelph.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
59 * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $
60 */
61
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/resourcevar.h>
65 #include <sys/signalvar.h>
66 #include <sys/proc.h>
67 #include <sys/buf.h>
68 #include <sys/vnode.h>
69 #include <sys/mount.h>
70 #include <sys/kernel.h>
71 #include <sys/sysctl.h>
72 #include <sys/ubc.h>
73
74 #include <sys/vm.h>
75 #include <sys/vmparam.h>
76
77 #include <sys/time.h>
78 #include <kern/clock.h>
79
80 #include <nfs/rpcv2.h>
81 #include <nfs/nfsproto.h>
82 #include <nfs/nfs.h>
83 #include <nfs/nfsmount.h>
84 #include <nfs/nqnfs.h>
85 #include <nfs/nfsnode.h>
86
87 #include <sys/kdebug.h>
88
89 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
90 struct proc *p, int operation));
91 static struct buf *nfs_getwriteblk __P((struct vnode *vp, daddr_t bn,
92 int size, struct proc *p,
93 struct ucred *cred, int off, int len));
94
95 extern int nfs_numasync;
96 extern struct nfsstats nfsstats;
97
98 /*
99 * Vnode op for read using bio
100 * Any similarity to readip() is purely coincidental
101 */
102 int
103 nfs_bioread(vp, uio, ioflag, cred, getpages)
104 register struct vnode *vp;
105 register struct uio *uio;
106 int ioflag;
107 struct ucred *cred;
108 int getpages;
109 {
110 register struct nfsnode *np = VTONFS(vp);
111 register int biosize, diff, i;
112 struct buf *bp = 0, *rabp;
113 struct vattr vattr;
114 struct proc *p;
115 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
116 daddr_t lbn, rabn;
117 int bufsize;
118 int nra, error = 0, n = 0, on = 0, not_readin;
119 int operation = (getpages? BLK_PAGEIN : BLK_READ);
120
121 #if DIAGNOSTIC
122 if (uio->uio_rw != UIO_READ)
123 panic("nfs_read mode");
124 #endif
125 if (uio->uio_resid == 0)
126 return (0);
127 if (uio->uio_offset < 0)
128 return (EINVAL);
129 p = uio->uio_procp;
130 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
131 (void)nfs_fsinfo(nmp, vp, cred, p);
132 /*due to getblk/vm interractions, use vm page size or less values */
133 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
134 /*
135 * For nfs, cache consistency can only be maintained approximately.
136 * Although RFC1094 does not specify the criteria, the following is
137 * believed to be compatible with the reference port.
138 * For nqnfs, full cache consistency is maintained within the loop.
139 * For nfs:
140 * If the file's modify time on the server has changed since the
141 * last read rpc or you have written to the file,
142 * you may have lost data cache consistency with the
143 * server, so flush all of the file's data out of the cache.
144 * Then force a getattr rpc to ensure that you have up to date
145 * attributes.
146 * NB: This implies that cache data can be read when up to
147 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
148 * attributes this could be forced by setting n_attrstamp to 0 before
149 * the VOP_GETATTR() call.
150 */
151 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
152 if (np->n_flag & NMODIFIED) {
153 if (vp->v_type != VREG) {
154 if (vp->v_type != VDIR)
155 panic("nfs: bioread, not dir");
156 nfs_invaldir(vp);
157 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
158 if (error)
159 return (error);
160 }
161 np->n_attrstamp = 0;
162 error = VOP_GETATTR(vp, &vattr, cred, p);
163 if (error)
164 return (error);
165 np->n_mtime = vattr.va_mtime.tv_sec;
166 } else {
167 error = VOP_GETATTR(vp, &vattr, cred, p);
168 if (error)
169 return (error);
170 if (np->n_mtime != vattr.va_mtime.tv_sec) {
171 if (vp->v_type == VDIR)
172 nfs_invaldir(vp);
173 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
174 if (error)
175 return (error);
176 np->n_mtime = vattr.va_mtime.tv_sec;
177 }
178 }
179 }
180 do {
181
182 /*
183 * Get a valid lease. If cached data is stale, flush it.
184 */
185 if (nmp->nm_flag & NFSMNT_NQNFS) {
186 if (NQNFS_CKINVALID(vp, np, ND_READ)) {
187 do {
188 error = nqnfs_getlease(vp, ND_READ, cred, p);
189 } while (error == NQNFS_EXPIRED);
190 if (error)
191 return (error);
192 if (np->n_lrev != np->n_brev ||
193 (np->n_flag & NQNFSNONCACHE) ||
194 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
195 if (vp->v_type == VDIR)
196 nfs_invaldir(vp);
197 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
198 if (error)
199 return (error);
200 np->n_brev = np->n_lrev;
201 }
202 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
203 nfs_invaldir(vp);
204 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
205 if (error)
206 return (error);
207 }
208 }
209 if (np->n_flag & NQNFSNONCACHE) {
210 switch (vp->v_type) {
211 case VREG:
212 return (nfs_readrpc(vp, uio, cred));
213 case VLNK:
214 return (nfs_readlinkrpc(vp, uio, cred));
215 case VDIR:
216 break;
217 default:
218 printf(" NQNFSNONCACHE: type %x unexpected\n",
219 vp->v_type);
220 };
221 }
222 switch (vp->v_type) {
223 case VREG:
224 nfsstats.biocache_reads++;
225 lbn = uio->uio_offset / biosize;
226 on = uio->uio_offset & (biosize - 1);
227 not_readin = 1;
228
229 /*
230 * Start the read ahead(s), as required.
231 */
232 if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
233 for (nra = 0; nra < nmp->nm_readahead &&
234 (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
235 rabn = lbn + 1 + nra;
236 if (!incore(vp, rabn)) {
237 rabp = nfs_getcacheblk(vp, rabn, biosize, p, operation);
238 if (!rabp)
239 return (EINTR);
240 if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
241 SET(rabp->b_flags, (B_READ | B_ASYNC));
242 if (nfs_asyncio(rabp, cred)) {
243 SET(rabp->b_flags, (B_INVAL|B_ERROR));
244 rabp->b_error = EIO;
245 brelse(rabp);
246 }
247 } else
248 brelse(rabp);
249 }
250 }
251 }
252
253 /*
254 * If the block is in the cache and has the required data
255 * in a valid region, just copy it out.
256 * Otherwise, get the block and write back/read in,
257 * as required.
258 */
259 again:
260 bufsize = biosize;
261 if ((off_t)(lbn + 1) * biosize > np->n_size &&
262 (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
263 bufsize = np->n_size - lbn * biosize;
264 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
265 }
266 bp = nfs_getcacheblk(vp, lbn, bufsize, p, operation);
267 if (!bp)
268 return (EINTR);
269
270 if (!ISSET(bp->b_flags, B_CACHE)) {
271 SET(bp->b_flags, B_READ);
272 CLR(bp->b_flags, (B_DONE | B_ERROR | B_INVAL));
273 not_readin = 0;
274 error = nfs_doio(bp, cred, p);
275 if (error) {
276 brelse(bp);
277 return (error);
278 }
279 }
280 if (bufsize > on) {
281 n = min((unsigned)(bufsize - on), uio->uio_resid);
282 } else {
283 n = 0;
284 }
285 diff = np->n_size - uio->uio_offset;
286 if (diff < n)
287 n = diff;
288 if (not_readin && n > 0) {
289 if (on < bp->b_validoff || (on + n) > bp->b_validend) {
290 SET(bp->b_flags, (B_NOCACHE|B_INVAFTERWRITE));
291 if (bp->b_dirtyend > 0) {
292 if (!ISSET(bp->b_flags, B_DELWRI))
293 panic("nfsbioread");
294 if (VOP_BWRITE(bp) == EINTR)
295 return (EINTR);
296 } else
297 brelse(bp);
298 goto again;
299 }
300 }
301 vp->v_lastr = lbn;
302 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
303 if (diff < n)
304 n = diff;
305 break;
306 case VLNK:
307 nfsstats.biocache_readlinks++;
308 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p, operation);
309 if (!bp)
310 return (EINTR);
311 if (!ISSET(bp->b_flags, B_CACHE)) {
312 SET(bp->b_flags, B_READ);
313 error = nfs_doio(bp, cred, p);
314 if (error) {
315 SET(bp->b_flags, B_ERROR);
316 brelse(bp);
317 return (error);
318 }
319 }
320 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
321 on = 0;
322 break;
323 case VDIR:
324 nfsstats.biocache_readdirs++;
325 if (np->n_direofoffset
326 && uio->uio_offset >= np->n_direofoffset) {
327 return (0);
328 }
329 lbn = uio->uio_offset / NFS_DIRBLKSIZ;
330 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
331 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p, operation);
332 if (!bp)
333 return (EINTR);
334 if (!ISSET(bp->b_flags, B_CACHE)) {
335 SET(bp->b_flags, B_READ);
336 error = nfs_doio(bp, cred, p);
337 if (error) {
338 brelse(bp);
339 while (error == NFSERR_BAD_COOKIE) {
340 nfs_invaldir(vp);
341 error = nfs_vinvalbuf(vp, 0, cred, p, 1);
342 /*
343 * Yuck! The directory has been modified on the
344 * server. The only way to get the block is by
345 * reading from the beginning to get all the
346 * offset cookies.
347 */
348 for (i = 0; i <= lbn && !error; i++) {
349 if (np->n_direofoffset
350 && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
351 return (0);
352 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p, operation);
353 if (!bp)
354 return (EINTR);
355 if (!ISSET(bp->b_flags, B_DONE)) {
356 SET(bp->b_flags, B_READ);
357 error = nfs_doio(bp, cred, p);
358 if (error) {
359 brelse(bp);
360 } else if (i < lbn)
361 brelse(bp);
362 }
363 }
364 }
365 if (error)
366 return (error);
367 }
368 }
369
370 /*
371 * If not eof and read aheads are enabled, start one.
372 * (You need the current block first, so that you have the
373 * directory offset cookie of the next block.)
374 */
375 if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
376 (np->n_direofoffset == 0 ||
377 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
378 !(np->n_flag & NQNFSNONCACHE) &&
379 !incore(vp, lbn + 1)) {
380 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p, operation);
381 if (rabp) {
382 if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) {
383 SET(rabp->b_flags, (B_READ | B_ASYNC));
384 if (nfs_asyncio(rabp, cred)) {
385 SET(rabp->b_flags, (B_INVAL|B_ERROR));
386 rabp->b_error = EIO;
387 brelse(rabp);
388 }
389 } else {
390 brelse(rabp);
391 }
392 }
393 }
394 /*
395 * Make sure we use a signed variant of min() since
396 * the second term may be negative.
397 */
398 n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
399 break;
400 default:
401 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
402 break;
403 };
404
405 if (n > 0) {
406 error = uiomove(bp->b_data + on, (int)n, uio);
407 }
408 switch (vp->v_type) {
409 case VREG:
410 break;
411 case VLNK:
412 n = 0;
413 break;
414 case VDIR:
415 if (np->n_flag & NQNFSNONCACHE)
416 SET(bp->b_flags, B_INVAL);
417 break;
418 default:
419 printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
420 }
421 brelse(bp);
422 } while (error == 0 && uio->uio_resid > 0 && n > 0);
423 return (error);
424 }
425
426 /*
427 * Vnode op for write using bio
428 */
429 int
430 nfs_write(ap)
431 struct vop_write_args /* {
432 struct vnode *a_vp;
433 struct uio *a_uio;
434 int a_ioflag;
435 struct ucred *a_cred;
436 } */ *ap;
437 {
438 register int biosize;
439 register struct uio *uio = ap->a_uio;
440 struct proc *p = uio->uio_procp;
441 register struct vnode *vp = ap->a_vp;
442 struct nfsnode *np = VTONFS(vp);
443 register struct ucred *cred = ap->a_cred;
444 int ioflag = ap->a_ioflag;
445 struct buf *bp;
446 struct vattr vattr;
447 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
448 daddr_t lbn;
449 int bufsize;
450 int n, on, error = 0, iomode, must_commit;
451
452 #if DIAGNOSTIC
453 if (uio->uio_rw != UIO_WRITE)
454 panic("nfs_write mode");
455 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != current_proc())
456 panic("nfs_write proc");
457 #endif
458 if (vp->v_type != VREG)
459 return (EIO);
460 if (np->n_flag & NWRITEERR) {
461 np->n_flag &= ~NWRITEERR;
462 return (np->n_error);
463 }
464 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
465 (void)nfs_fsinfo(nmp, vp, cred, p);
466 if (ioflag & (IO_APPEND | IO_SYNC)) {
467 if (np->n_flag & NMODIFIED) {
468 np->n_attrstamp = 0;
469 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
470 if (error)
471 return (error);
472 }
473 if (ioflag & IO_APPEND) {
474 np->n_attrstamp = 0;
475 error = VOP_GETATTR(vp, &vattr, cred, p);
476 if (error)
477 return (error);
478 uio->uio_offset = np->n_size;
479 }
480 }
481 if (uio->uio_offset < 0)
482 return (EINVAL);
483 if (uio->uio_resid == 0)
484 return (0);
485 /*
486 * Maybe this should be above the vnode op call, but so long as
487 * file servers have no limits, i don't think it matters
488 */
489 if (p && uio->uio_offset + uio->uio_resid >
490 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
491 psignal(p, SIGXFSZ);
492 return (EFBIG);
493 }
494 /*
495 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
496 * will be the same size within a filesystem. nfs_writerpc will
497 * still use nm_wsize when sizing the rpc's.
498 */
499 /*due to getblk/vm interractions, use vm page size or less values */
500 biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
501
502 do {
503 /*
504 * Check for a valid write lease.
505 */
506 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
507 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
508 do {
509 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
510 } while (error == NQNFS_EXPIRED);
511 if (error)
512 return (error);
513 if (np->n_lrev != np->n_brev ||
514 (np->n_flag & NQNFSNONCACHE)) {
515 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
516 if (error)
517 return (error);
518 np->n_brev = np->n_lrev;
519 }
520 }
521 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
522 iomode = NFSV3WRITE_FILESYNC;
523 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
524 if (must_commit)
525 nfs_clearcommit(vp->v_mount);
526 return (error);
527 }
528 nfsstats.biocache_writes++;
529 lbn = uio->uio_offset / biosize;
530 on = uio->uio_offset & (biosize-1);
531 n = min((unsigned)(biosize - on), uio->uio_resid);
532 again:
533 if (uio->uio_offset + n > np->n_size) {
534 np->n_size = uio->uio_offset + n;
535 np->n_flag |= NMODIFIED;
536 if (UBCISVALID(vp))
537 ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */
538 }
539 bufsize = biosize;
540 #if 0
541 /* (removed for UBC) */
542 if ((lbn + 1) * biosize > np->n_size) {
543 bufsize = np->n_size - lbn * biosize;
544 bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
545 }
546 #endif
547 bp = nfs_getwriteblk(vp, lbn, bufsize, p, cred, on, n);
548 if (!bp)
549 return (EINTR);
550 if (ISSET(bp->b_flags, B_ERROR)) {
551 error = bp->b_error;
552 brelse(bp);
553 return (error);
554 }
555 if (bp->b_wcred == NOCRED) {
556 crhold(cred);
557 bp->b_wcred = cred;
558 }
559 np->n_flag |= NMODIFIED;
560
561 /*
562 * Check for valid write lease and get one as required.
563 * In case getblk() and/or bwrite() delayed us.
564 */
565 if ((nmp->nm_flag & NFSMNT_NQNFS) &&
566 NQNFS_CKINVALID(vp, np, ND_WRITE)) {
567 do {
568 error = nqnfs_getlease(vp, ND_WRITE, cred, p);
569 } while (error == NQNFS_EXPIRED);
570 if (error) {
571 brelse(bp);
572 return (error);
573 }
574 if (np->n_lrev != np->n_brev ||
575 (np->n_flag & NQNFSNONCACHE)) {
576 brelse(bp);
577 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
578 if (error)
579 return (error);
580 np->n_brev = np->n_lrev;
581 goto again;
582 }
583 }
584 error = uiomove((char *)bp->b_data + on, n, uio);
585 if (error) {
586 SET(bp->b_flags, B_ERROR);
587 brelse(bp);
588 return (error);
589 }
590 if (bp->b_dirtyend > 0) {
591 bp->b_dirtyoff = min(on, bp->b_dirtyoff);
592 bp->b_dirtyend = max((on + n), bp->b_dirtyend);
593 } else {
594 bp->b_dirtyoff = on;
595 bp->b_dirtyend = on + n;
596 }
597 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
598 bp->b_validoff > bp->b_dirtyend) {
599 bp->b_validoff = bp->b_dirtyoff;
600 bp->b_validend = bp->b_dirtyend;
601 } else {
602 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
603 bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
604 }
605
606 /*
607 * Since this block is being modified, it must be written
608 * again and not just committed.
609 */
610 CLR(bp->b_flags, B_NEEDCOMMIT);
611
612 /*
613 * If the lease is non-cachable or IO_SYNC do bwrite().
614 */
615 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
616 bp->b_proc = p;
617 error = VOP_BWRITE(bp);
618 if (error)
619 return (error);
620 if (np->n_flag & NQNFSNONCACHE) {
621 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
622 if (error)
623 return (error);
624 }
625 } else if ((n + on) == biosize &&
626 (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
627 bp->b_proc = (struct proc *)0;
628 SET(bp->b_flags, B_ASYNC);
629 (void)nfs_writebp(bp, 0);
630 } else
631 bdwrite(bp);
632 } while (uio->uio_resid > 0 && n > 0);
633 return (0);
634 }
635
636 /*
637 * Get a cache block for writing. The range to be written is
638 * (off..off+len) within the block. This routine ensures that the
639 * block is either has no dirty region or that the given range is
640 * contiguous with the existing dirty region.
641 */
642 static struct buf *
643 nfs_getwriteblk(vp, bn, size, p, cred, off, len)
644 struct vnode *vp;
645 daddr_t bn;
646 int size;
647 struct proc *p;
648 struct ucred *cred;
649 int off, len;
650 {
651 struct nfsnode *np = VTONFS(vp);
652 struct buf *bp;
653 int error;
654 struct iovec iov;
655 struct uio uio;
656 off_t boff;
657
658 again:
659 bp = nfs_getcacheblk(vp, bn, size, p, BLK_WRITE);
660 if (!bp)
661 return (NULL);
662 if (bp->b_wcred == NOCRED) {
663 crhold(cred);
664 bp->b_wcred = cred;
665 }
666
667 if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
668 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
669 }
670
671 /*
672 * UBC doesn't (yet) handle partial pages so nfs_biowrite was
673 * hacked to never bdwrite, to start every little write right away.
674 * Running IE Avie noticed the performance problem, thus this code,
675 * which permits those delayed writes by ensuring an initial read
676 * of the entire page. The read may hit eof ("short read") but
677 * that we will handle.
678 *
679 * We are quite dependant on the correctness of B_CACHE so check
680 * that first in case of problems.
681 */
682 if (!ISSET(bp->b_flags, B_CACHE) && len < PAGE_SIZE) {
683 struct nfsnode *np = VTONFS(vp);
684
685 boff = (off_t)bp->b_blkno * DEV_BSIZE;
686 uio.uio_iov = &iov;
687 uio.uio_iovcnt = 1;
688 uio.uio_offset = boff;
689 uio.uio_resid = PAGE_SIZE;
690 uio.uio_segflg = UIO_SYSSPACE;
691 uio.uio_rw = UIO_READ;
692 uio.uio_procp = p;
693 iov.iov_base = bp->b_data;
694 iov.iov_len = PAGE_SIZE;
695 error = nfs_readrpc(vp, &uio, cred);
696 if (error) {
697 bp->b_error = error;
698 SET(bp->b_flags, B_ERROR);
699 printf("nfs_getwriteblk: readrpc returned %d", error);
700 }
701 if (uio.uio_resid > 0)
702 bzero(iov.iov_base, uio.uio_resid);
703 bp->b_validoff = 0;
704 bp->b_validend = PAGE_SIZE - uio.uio_resid;
705 if (np->n_size > boff + bp->b_validend)
706 bp->b_validend = min(np->n_size - boff, PAGE_SIZE);
707 bp->b_dirtyoff = 0;
708 bp->b_dirtyend = 0;
709 }
710
711 /*
712 * If the new write will leave a contiguous dirty
713 * area, just update the b_dirtyoff and b_dirtyend,
714 * otherwise try to extend the dirty region.
715 */
716 if (bp->b_dirtyend > 0 &&
717 (off > bp->b_dirtyend || (off + len) < bp->b_dirtyoff)) {
718 off_t start, end;
719
720 boff = (off_t)bp->b_blkno * DEV_BSIZE;
721 if (off > bp->b_dirtyend) {
722 start = boff + bp->b_validend;
723 end = boff + off;
724 } else {
725 start = boff + off + len;
726 end = boff + bp->b_validoff;
727 }
728
729 /*
730 * It may be that the valid region in the buffer
731 * covers the region we want, in which case just
732 * extend the dirty region. Otherwise we try to
733 * extend the valid region.
734 */
735 if (end > start) {
736 uio.uio_iov = &iov;
737 uio.uio_iovcnt = 1;
738 uio.uio_offset = start;
739 uio.uio_resid = end - start;
740 uio.uio_segflg = UIO_SYSSPACE;
741 uio.uio_rw = UIO_READ;
742 uio.uio_procp = p;
743 iov.iov_base = bp->b_data + (start - boff);
744 iov.iov_len = end - start;
745 error = nfs_readrpc(vp, &uio, cred);
746 if (error) {
747 /*
748 * If we couldn't read, fall back to writing
749 * out the old dirty region.
750 */
751 bp->b_proc = p;
752 if (VOP_BWRITE(bp) == EINTR)
753 return (NULL);
754 goto again;
755 } else {
756 /*
757 * The read worked.
758 */
759 if (uio.uio_resid > 0) {
760 /*
761 * If there was a short read,
762 * just zero fill.
763 */
764 bzero(iov.iov_base,
765 uio.uio_resid);
766 }
767 if (off > bp->b_dirtyend)
768 bp->b_validend = off;
769 else
770 bp->b_validoff = off + len;
771 }
772 }
773
774 /*
775 * We now have a valid region which extends up to the
776 * dirty region which we want.
777 */
778 if (off > bp->b_dirtyend)
779 bp->b_dirtyend = off;
780 else
781 bp->b_dirtyoff = off + len;
782 }
783
784 return bp;
785 }
786
787 /*
788 * Get an nfs cache block.
789 * Allocate a new one if the block isn't currently in the cache
790 * and return the block marked busy. If the calling process is
791 * interrupted by a signal for an interruptible mount point, return
792 * NULL.
793 */
794 static struct buf *
795 nfs_getcacheblk(vp, bn, size, p, operation)
796 struct vnode *vp;
797 daddr_t bn;
798 int size;
799 struct proc *p;
800 int operation; /* defined in sys/buf.h */
801 {
802 register struct buf *bp;
803 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
804 /*due to getblk/vm interractions, use vm page size or less values */
805 int biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE);
806
807 if (nmp->nm_flag & NFSMNT_INT) {
808 bp = getblk(vp, bn, size, PCATCH, 0, operation);
809 while (bp == (struct buf *)0) {
810 if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
811 return ((struct buf *)0);
812 bp = getblk(vp, bn, size, 0, 2 * hz, operation);
813 }
814 } else
815 bp = getblk(vp, bn, size, 0, 0, operation);
816
817 if( vp->v_type == VREG)
818 bp->b_blkno = (bn * biosize) / DEV_BSIZE;
819
820 return (bp);
821 }
822
823 /*
824 * Flush and invalidate all dirty buffers. If another process is already
825 * doing the flush, just wait for completion.
826 */
827 int
828 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
829 struct vnode *vp;
830 int flags;
831 struct ucred *cred;
832 struct proc *p;
833 int intrflg;
834 {
835 register struct nfsnode *np = VTONFS(vp);
836 struct nfsmount *nmp = VFSTONFS(vp->v_mount);
837 int error = 0, slpflag, slptimeo;
838
839 if ((nmp->nm_flag & NFSMNT_INT) == 0)
840 intrflg = 0;
841 if (intrflg) {
842 slpflag = PCATCH;
843 slptimeo = 2 * hz;
844 } else {
845 slpflag = 0;
846 slptimeo = 0;
847 }
848 /*
849 * First wait for any other process doing a flush to complete.
850 */
851 while (np->n_flag & NFLUSHINPROG) {
852 np->n_flag |= NFLUSHWANT;
853 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
854 slptimeo);
855 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
856 return (EINTR);
857 }
858
859 /*
860 * Now, flush as required.
861 */
862 np->n_flag |= NFLUSHINPROG;
863 error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
864 while (error) {
865 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
866 np->n_flag &= ~NFLUSHINPROG;
867 if (np->n_flag & NFLUSHWANT) {
868 np->n_flag &= ~NFLUSHWANT;
869 wakeup((caddr_t)&np->n_flag);
870 }
871 return (EINTR);
872 }
873 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
874 }
875 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
876 if (np->n_flag & NFLUSHWANT) {
877 np->n_flag &= ~NFLUSHWANT;
878 wakeup((caddr_t)&np->n_flag);
879 }
880 (void) ubc_clean(vp, 1); /* get the pages out of vm also */
881 return (0);
882 }
883
884 /*
885 * Initiate asynchronous I/O. Return an error if no nfsiods are available.
886 * This is mainly to avoid queueing async I/O requests when the nfsiods
887 * are all hung on a dead server.
888 */
889 int
890 nfs_asyncio(bp, cred)
891 register struct buf *bp;
892 struct ucred *cred;
893 {
894 struct nfsmount *nmp;
895 int i;
896 int gotiod;
897 int slpflag = 0;
898 int slptimeo = 0;
899 int error;
900
901 if (nfs_numasync == 0)
902 return (EIO);
903
904 nmp = VFSTONFS(bp->b_vp->v_mount);
905 again:
906 if (nmp->nm_flag & NFSMNT_INT)
907 slpflag = PCATCH;
908 gotiod = FALSE;
909
910 /*
911 * Find a free iod to process this request.
912 */
913 for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
914 if (nfs_iodwant[i]) {
915 /*
916 * Found one, so wake it up and tell it which
917 * mount to process.
918 */
919 NFS_DPF(ASYNCIO,
920 ("nfs_asyncio: waking iod %d for mount %p\n",
921 i, nmp));
922 nfs_iodwant[i] = (struct proc *)0;
923 nfs_iodmount[i] = nmp;
924 nmp->nm_bufqiods++;
925 wakeup((caddr_t)&nfs_iodwant[i]);
926 gotiod = TRUE;
927 break;
928 }
929
930 /*
931 * If none are free, we may already have an iod working on this mount
932 * point. If so, it will process our request.
933 */
934 if (!gotiod) {
935 if (nmp->nm_bufqiods > 0) {
936 NFS_DPF(ASYNCIO,
937 ("nfs_asyncio: %d iods are already processing mount %p\n",
938 nmp->nm_bufqiods, nmp));
939 gotiod = TRUE;
940 }
941 }
942
943 /*
944 * If we have an iod which can process the request, then queue
945 * the buffer.
946 */
947 if (gotiod) {
948 /*
949 * Ensure that the queue never grows too large.
950 */
951 while (nmp->nm_bufqlen >= 2*nfs_numasync) {
952 NFS_DPF(ASYNCIO,
953 ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
954 nmp->nm_bufqwant = TRUE;
955 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
956 "nfsaio", slptimeo);
957 if (error) {
958 if (nfs_sigintr(nmp, NULL, bp->b_proc))
959 return (EINTR);
960 if (slpflag == PCATCH) {
961 slpflag = 0;
962 slptimeo = 2 * hz;
963 }
964 }
965 /*
966 * We might have lost our iod while sleeping,
967 * so check and loop if nescessary.
968 */
969 if (nmp->nm_bufqiods == 0) {
970 NFS_DPF(ASYNCIO,
971 ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
972 goto again;
973 }
974 }
975
976 if (ISSET(bp->b_flags, B_READ)) {
977 if (bp->b_rcred == NOCRED && cred != NOCRED) {
978 crhold(cred);
979 bp->b_rcred = cred;
980 }
981 } else {
982 SET(bp->b_flags, B_WRITEINPROG);
983 if (bp->b_wcred == NOCRED && cred != NOCRED) {
984 crhold(cred);
985 bp->b_wcred = cred;
986 }
987 }
988
989 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
990 nmp->nm_bufqlen++;
991 return (0);
992 }
993
994 /*
995 * All the iods are busy on other mounts, so return EIO to
996 * force the caller to process the i/o synchronously.
997 */
998 NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
999 return (EIO);
1000 }
1001
1002 /*
1003 * Do an I/O operation to/from a cache block. This may be called
1004 * synchronously or from an nfsiod.
1005 */
1006 int
1007 nfs_doio(bp, cr, p)
1008 register struct buf *bp;
1009 struct ucred *cr;
1010 struct proc *p;
1011 {
1012 register struct uio *uiop;
1013 register struct vnode *vp;
1014 struct nfsnode *np;
1015 struct nfsmount *nmp;
1016 int error = 0, diff, len, iomode, must_commit = 0;
1017 struct uio uio;
1018 struct iovec io;
1019
1020 vp = bp->b_vp;
1021 NFSTRACE(NFSTRC_DIO, vp);
1022 np = VTONFS(vp);
1023 nmp = VFSTONFS(vp->v_mount);
1024 uiop = &uio;
1025 uiop->uio_iov = &io;
1026 uiop->uio_iovcnt = 1;
1027 uiop->uio_segflg = UIO_SYSSPACE;
1028 uiop->uio_procp = p;
1029
1030 /*
1031 * With UBC, getblk() can return a buf with B_DONE set.
1032 * This indicates that the VM has valid data for that page.
1033 * NFS being stateless, this case poses a problem.
1034 * By definition, the NFS server should always be consulted
1035 * for the data in that page.
1036 * So we choose to clear the B_DONE and to the IO.
1037 *
1038 * XXX revisit this if there is a performance issue.
1039 * XXX In that case, we could play the attribute cache games ...
1040 */
1041 if (ISSET(bp->b_flags, B_DONE)) {
1042 if (!ISSET(bp->b_flags, B_ASYNC))
1043 panic("nfs_doio: done and not async");
1044 CLR(bp->b_flags, B_DONE);
1045 }
1046
1047 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_START,
1048 (int)np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount, bp->b_flags, 0);
1049
1050 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 257)) | DBG_FUNC_NONE,
1051 bp->b_validoff, bp->b_validend, bp->b_dirtyoff, bp->b_dirtyend, 0);
1052
1053 /*
1054 * Historically, paging was done with physio, but no more.
1055 */
1056 if (ISSET(bp->b_flags, B_PHYS)) {
1057 /*
1058 * ...though reading /dev/drum still gets us here.
1059 */
1060 io.iov_len = uiop->uio_resid = bp->b_bcount;
1061 /* mapping was done by vmapbuf() */
1062 io.iov_base = bp->b_data;
1063 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1064 if (ISSET(bp->b_flags, B_READ)) {
1065 uiop->uio_rw = UIO_READ;
1066 nfsstats.read_physios++;
1067 error = nfs_readrpc(vp, uiop, cr);
1068 } else {
1069 int com;
1070
1071 iomode = NFSV3WRITE_DATASYNC;
1072 uiop->uio_rw = UIO_WRITE;
1073 nfsstats.write_physios++;
1074 error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
1075 }
1076 if (error) {
1077 SET(bp->b_flags, B_ERROR);
1078 bp->b_error = error;
1079 }
1080 } else if (ISSET(bp->b_flags, B_READ)) {
1081 io.iov_len = uiop->uio_resid = bp->b_bcount;
1082 io.iov_base = bp->b_data;
1083 uiop->uio_rw = UIO_READ;
1084 switch (vp->v_type) {
1085 case VREG:
1086 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
1087 nfsstats.read_bios++;
1088 error = nfs_readrpc(vp, uiop, cr);
1089
1090 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 262)) | DBG_FUNC_NONE,
1091 (int)np->n_size, bp->b_blkno * DEV_BSIZE, uiop->uio_resid, error, 0);
1092
1093
1094 if (!error) {
1095 bp->b_validoff = 0;
1096 if (uiop->uio_resid) {
1097 /*
1098 * If len > 0, there is a hole in the file and
1099 * no writes after the hole have been pushed to
1100 * the server yet.
1101 * Just zero fill the rest of the valid area.
1102 */
1103 diff = bp->b_bcount - uiop->uio_resid;
1104 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
1105 + diff);
1106 if (len > 0) {
1107 len = min(len, uiop->uio_resid);
1108 bzero((char *)bp->b_data + diff, len);
1109 bp->b_validend = diff + len;
1110
1111 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE,
1112 diff, len, 0, 1, 0);
1113
1114 } else
1115 bp->b_validend = diff;
1116 } else
1117 bp->b_validend = bp->b_bcount;
1118 #if 1 /* USV + JOE [ */
1119 if (bp->b_validend < bp->b_bufsize) {
1120 /*
1121 * we're about to release a partial buffer after a read... the only
1122 * way we should get here is if this buffer contains the EOF
1123 * before releasing it, we'll zero out to the end of the buffer
1124 * so that if a mmap of this page occurs, we'll see zero's even
1125 * if a ftruncate extends the file in the meantime
1126 */
1127 bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend));
1128
1129 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE,
1130 bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 2, 0);
1131 }
1132 #endif /* ] USV + JOE */
1133 }
1134 if (p && (vp->v_flag & VTEXT) &&
1135 (((nmp->nm_flag & NFSMNT_NQNFS) &&
1136 NQNFS_CKINVALID(vp, np, ND_READ) &&
1137 np->n_lrev != np->n_brev) ||
1138 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
1139 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
1140 uprintf("Process killed due to text file modification\n");
1141 psignal(p, SIGKILL);
1142 p->p_flag |= P_NOSWAP;
1143 }
1144 break;
1145 case VLNK:
1146 uiop->uio_offset = (off_t)0;
1147 nfsstats.readlink_bios++;
1148 error = nfs_readlinkrpc(vp, uiop, cr);
1149 break;
1150 case VDIR:
1151 nfsstats.readdir_bios++;
1152 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
1153 if (!(nmp->nm_flag & NFSMNT_NFSV3))
1154 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; /* dk@farm.org */
1155 if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
1156 error = nfs_readdirplusrpc(vp, uiop, cr);
1157 if (error == NFSERR_NOTSUPP)
1158 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
1159 }
1160 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
1161 error = nfs_readdirrpc(vp, uiop, cr);
1162 break;
1163 default:
1164 printf("nfs_doio: type %x unexpected\n",vp->v_type);
1165 break;
1166 };
1167 if (error) {
1168 SET(bp->b_flags, B_ERROR);
1169 bp->b_error = error;
1170 }
1171 } else {
1172 if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
1173 bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
1174
1175 if (bp->b_dirtyend > bp->b_dirtyoff) {
1176
1177 io.iov_len = uiop->uio_resid = bp->b_dirtyend
1178 - bp->b_dirtyoff;
1179 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
1180 + bp->b_dirtyoff;
1181 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
1182 uiop->uio_rw = UIO_WRITE;
1183
1184 nfsstats.write_bios++;
1185 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
1186 iomode = NFSV3WRITE_UNSTABLE;
1187 else
1188 iomode = NFSV3WRITE_FILESYNC;
1189 SET(bp->b_flags, B_WRITEINPROG);
1190 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
1191 if (!error && iomode == NFSV3WRITE_UNSTABLE)
1192 SET(bp->b_flags, B_NEEDCOMMIT);
1193 else
1194 CLR(bp->b_flags, B_NEEDCOMMIT);
1195 CLR(bp->b_flags, B_WRITEINPROG);
1196
1197 /*
1198 * For an interrupted write, the buffer is still valid
1199 * and the write hasn't been pushed to the server yet,
1200 * so we can't set B_ERROR and report the interruption
1201 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
1202 * is not relevant, so the rpc attempt is essentially
1203 * a noop. For the case of a V3 write rpc not being
1204 * committed to stable storage, the block is still
1205 * dirty and requires either a commit rpc or another
1206 * write rpc with iomode == NFSV3WRITE_FILESYNC before
1207 * the block is reused. This is indicated by setting
1208 * the B_DELWRI and B_NEEDCOMMIT flags.
1209 */
1210 if (error == EINTR
1211 || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
1212 int s;
1213
1214 CLR(bp->b_flags, (B_INVAL|B_NOCACHE));
1215 SET(bp->b_flags, B_DELWRI);
1216
1217 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 261)) | DBG_FUNC_NONE,
1218 bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
1219
1220 /*
1221 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
1222 * buffer to the clean list, we have to reassign it back to the
1223 * dirty one. Ugh.
1224 */
1225 if (ISSET(bp->b_flags, B_ASYNC)) {
1226 s = splbio();
1227 reassignbuf(bp, vp);
1228 splx(s);
1229 } else {
1230 SET(bp->b_flags, B_EINTR);
1231 }
1232 } else {
1233 if (error) {
1234 SET(bp->b_flags, B_ERROR);
1235 bp->b_error = np->n_error = error;
1236 np->n_flag |= NWRITEERR;
1237 }
1238 bp->b_dirtyoff = bp->b_dirtyend = 0;
1239
1240 #if 1 /* JOE */
1241 /*
1242 * validoff and validend represent the real data present in this buffer
1243 * if validoff is non-zero, than we have to invalidate the buffer and kill
1244 * the page when biodone is called... the same is also true when validend
1245 * doesn't extend all the way to the end of the buffer and validend doesn't
1246 * equate to the current EOF... eventually we need to deal with this in a
1247 * more humane way (like keeping the partial buffer without making it immediately
1248 * available to the VM page cache).
1249 */
1250 if (bp->b_validoff)
1251 SET(bp->b_flags, B_INVAL);
1252 else
1253 if (bp->b_validend < bp->b_bufsize) {
1254 if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) == np->n_size) {
1255 bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend));
1256
1257 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 259)) | DBG_FUNC_NONE,
1258 bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 0, 0);;
1259 }
1260 else
1261 SET(bp->b_flags, B_INVAL);
1262 }
1263 #endif
1264 }
1265
1266 } else {
1267
1268 #if 1 /* JOE */
1269 if (bp->b_validoff)
1270 SET(bp->b_flags, B_INVAL);
1271 else if (bp->b_validend < bp->b_bufsize) {
1272 if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) != np->n_size)
1273 SET(bp->b_flags, B_INVAL);
1274 }
1275 if (bp->b_flags & B_INVAL) {
1276 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE,
1277 bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
1278 }
1279 #endif
1280 bp->b_resid = 0;
1281 biodone(bp);
1282 NFSTRACE(NFSTRC_DIO_DONE, vp);
1283 return (0);
1284 }
1285 }
1286 bp->b_resid = uiop->uio_resid;
1287 if (must_commit)
1288 nfs_clearcommit(vp->v_mount);
1289
1290 if (bp->b_flags & B_INVAL) {
1291 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE,
1292 bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0);
1293 }
1294 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_END,
1295 bp->b_validoff, bp->b_validend, bp->b_bcount, error, 0);
1296
1297 biodone(bp);
1298 NFSTRACE(NFSTRC_DIO_DONE, vp);
1299 return (error);
1300 }